| // Copyright 2022 Google LLC |
| // |
| // This source code is licensed under the BSD-style license found in the |
| // LICENSE file in the root directory of this source tree. |
| |
| #include <assert.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include <wasm_simd128.h> |
| |
| #include <xnnpack/math-stubs.h> |
| |
| |
| void xnn_math_f32_qs8_cvt__wasmsimd( |
| size_t n, |
| const float* input, |
| int8_t* output, |
| int8_t output_zero_point) |
| { |
| assert(n % (16 * sizeof(int8_t)) == 0); |
| |
| const v128_t vmin = wasm_f32x4_splat(12582912.0f - 128.0f - (float) output_zero_point); |
| const v128_t vfmagic = wasm_f32x4_const_splat(12582912.0f); |
| const v128_t vimagic = wasm_i32x4_splat(INT32_C(0x4B400000) - (int32_t) output_zero_point); |
| for (; n != 0; n -= 16 * sizeof(int8_t)) { |
| const v128_t vx_ll = wasm_v128_load(input); |
| const v128_t vx_lh = wasm_v128_load(input + 4); |
| const v128_t vx_hl = wasm_v128_load(input + 8); |
| const v128_t vx_hh = wasm_v128_load(input + 12); |
| input += 16; |
| |
| v128_t vy_ll = wasm_f32x4_add(vx_ll, vfmagic); |
| v128_t vy_lh = wasm_f32x4_add(vx_lh, vfmagic); |
| v128_t vy_hl = wasm_f32x4_add(vx_hl, vfmagic); |
| v128_t vy_hh = wasm_f32x4_add(vx_hh, vfmagic); |
| |
| vy_ll = wasm_i32x4_max(vy_ll, vmin); |
| vy_lh = wasm_i32x4_max(vy_lh, vmin); |
| vy_hl = wasm_i32x4_max(vy_hl, vmin); |
| vy_hh = wasm_i32x4_max(vy_hh, vmin); |
| |
| vy_ll = wasm_i32x4_sub(vy_ll, vimagic); |
| vy_lh = wasm_i32x4_sub(vy_lh, vimagic); |
| vy_hl = wasm_i32x4_sub(vy_hl, vimagic); |
| vy_hh = wasm_i32x4_sub(vy_hh, vimagic); |
| |
| const v128_t vy_lo = wasm_i16x8_narrow_i32x4(vy_ll, vy_lh); |
| const v128_t vy_hi = wasm_i16x8_narrow_i32x4(vy_hl, vy_hh); |
| |
| const v128_t vout = wasm_i8x16_narrow_i16x8(vy_lo, vy_hi); |
| wasm_v128_store(output, vout); |
| output += 16; |
| } |
| } |