blob: dbeac6aafbc875ee430d1cf68cab7351b2bfccf1 [file] [log] [blame]
use core::arch::wasm32::*;
pub const fn v128_from_f32x4(a: [f32; 4]) -> v128 {
f32x4(a[0], a[1], a[2], a[3])
}
/// Calculates the vector 3 dot product and returns answer in x lane of v128.
#[inline(always)]
pub(crate) fn dot3_in_x(lhs: v128, rhs: v128) -> v128 {
let x2_y2_z2_w2 = f32x4_mul(lhs, rhs);
let y2_0_0_0 = i32x4_shuffle::<1, 0, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2);
let z2_0_0_0 = i32x4_shuffle::<2, 0, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2);
let x2y2_0_0_0 = f32x4_add(x2_y2_z2_w2, y2_0_0_0);
f32x4_add(x2y2_0_0_0, z2_0_0_0)
}
/// Calculates the vector 4 dot product and returns answer in x lane of v128.
#[inline(always)]
pub(crate) fn dot4_in_x(lhs: v128, rhs: v128) -> v128 {
let x2_y2_z2_w2 = f32x4_mul(lhs, rhs);
let z2_w2_0_0 = i32x4_shuffle::<2, 3, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2);
let x2z2_y2w2_0_0 = f32x4_add(x2_y2_z2_w2, z2_w2_0_0);
let y2w2_0_0_0 = i32x4_shuffle::<1, 0, 0, 0>(x2z2_y2w2_0_0, x2z2_y2w2_0_0);
f32x4_add(x2z2_y2w2_0_0, y2w2_0_0_0)
}
#[inline]
pub(crate) fn dot3(lhs: v128, rhs: v128) -> f32 {
f32x4_extract_lane::<0>(dot3_in_x(lhs, rhs))
}
#[inline]
pub(crate) fn dot3_into_v128(lhs: v128, rhs: v128) -> v128 {
let dot_in_x = dot3_in_x(lhs, rhs);
i32x4_shuffle::<0, 0, 0, 0>(dot_in_x, dot_in_x)
}
#[inline]
pub(crate) fn dot4(lhs: v128, rhs: v128) -> f32 {
f32x4_extract_lane::<0>(dot4_in_x(lhs, rhs))
}
#[inline]
pub(crate) fn dot4_into_v128(lhs: v128, rhs: v128) -> v128 {
let dot_in_x = dot4_in_x(lhs, rhs);
i32x4_shuffle::<0, 0, 0, 0>(dot_in_x, dot_in_x)
}