| use core::arch::wasm32::*; |
| |
| pub const fn v128_from_f32x4(a: [f32; 4]) -> v128 { |
| f32x4(a[0], a[1], a[2], a[3]) |
| } |
| |
| /// Calculates the vector 3 dot product and returns answer in x lane of v128. |
| #[inline(always)] |
| pub(crate) fn dot3_in_x(lhs: v128, rhs: v128) -> v128 { |
| let x2_y2_z2_w2 = f32x4_mul(lhs, rhs); |
| let y2_0_0_0 = i32x4_shuffle::<1, 0, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2); |
| let z2_0_0_0 = i32x4_shuffle::<2, 0, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2); |
| let x2y2_0_0_0 = f32x4_add(x2_y2_z2_w2, y2_0_0_0); |
| f32x4_add(x2y2_0_0_0, z2_0_0_0) |
| } |
| |
| /// Calculates the vector 4 dot product and returns answer in x lane of v128. |
| #[inline(always)] |
| pub(crate) fn dot4_in_x(lhs: v128, rhs: v128) -> v128 { |
| let x2_y2_z2_w2 = f32x4_mul(lhs, rhs); |
| let z2_w2_0_0 = i32x4_shuffle::<2, 3, 0, 0>(x2_y2_z2_w2, x2_y2_z2_w2); |
| let x2z2_y2w2_0_0 = f32x4_add(x2_y2_z2_w2, z2_w2_0_0); |
| let y2w2_0_0_0 = i32x4_shuffle::<1, 0, 0, 0>(x2z2_y2w2_0_0, x2z2_y2w2_0_0); |
| f32x4_add(x2z2_y2w2_0_0, y2w2_0_0_0) |
| } |
| |
| #[inline] |
| pub(crate) fn dot3(lhs: v128, rhs: v128) -> f32 { |
| f32x4_extract_lane::<0>(dot3_in_x(lhs, rhs)) |
| } |
| |
| #[inline] |
| pub(crate) fn dot3_into_v128(lhs: v128, rhs: v128) -> v128 { |
| let dot_in_x = dot3_in_x(lhs, rhs); |
| i32x4_shuffle::<0, 0, 0, 0>(dot_in_x, dot_in_x) |
| } |
| |
| #[inline] |
| pub(crate) fn dot4(lhs: v128, rhs: v128) -> f32 { |
| f32x4_extract_lane::<0>(dot4_in_x(lhs, rhs)) |
| } |
| |
| #[inline] |
| pub(crate) fn dot4_into_v128(lhs: v128, rhs: v128) -> v128 { |
| let dot_in_x = dot4_in_x(lhs, rhs); |
| i32x4_shuffle::<0, 0, 0, 0>(dot_in_x, dot_in_x) |
| } |