blob: e6134d0149aea318ad317f95fbc3165f409bb20f [file] [log] [blame] [edit]
// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
$assert BATCH_TILE >= 1
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <xnnpack/math.h>
#include <xnnpack/window.h>
void xnn_s16_window_ukernel__scalar_x${BATCH_TILE}(
size_t rows,
size_t batch_size,
const int16_t* input,
const int16_t* weights,
int16_t* output,
uint32_t shift)
{
assert(rows > 0);
assert(batch_size != 0);
assert(input != NULL);
assert(weights != NULL);
assert(output != NULL);
assert(shift < 32);
do {
size_t n = batch_size;
const int16_t* w = weights;
$if BATCH_TILE > 1:
for (; n >= ${BATCH_TILE}; n -= ${BATCH_TILE}) {
$for N in range(BATCH_TILE):
const int16_t vi${N} = input[${N}];
input += ${BATCH_TILE};
$for N in range(BATCH_TILE):
const int16_t w${N} = w[${N}];
w += ${BATCH_TILE};
$for N in range(BATCH_TILE):
int32_t vout${N} = (int32_t) vi${N} * (int32_t) w${N};
$for N in range(BATCH_TILE):
vout${N} = math_asr_s32(vout${N}, shift);
$for N in range(BATCH_TILE):
vout${N} = math_max_s32(vout${N}, INT16_MIN);
$for N in range(BATCH_TILE):
vout${N} = math_min_s32(vout${N}, INT16_MAX);
$for N in range(BATCH_TILE):
output[${N}] = (int16_t) vout${N};
output += ${BATCH_TILE};
}
if XNN_UNLIKELY(n != 0) {
do {
const int32_t vi = (int32_t) *input++;
const int32_t vw = (int32_t) *w++;
int32_t vout = vi * vw;
vout = math_asr_s32(vout, shift);
vout = math_max_s32(vout, INT16_MIN);
vout = math_min_s32(vout, INT16_MAX);
*output++ = (int16_t) vout;
} while (--n != 0);
}
} while (--rows != 0);
}