blob: 3c075d259c33398ce5835ff5f8c3d8d4e7621c02 [file] [log] [blame] [edit]
// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
$assert SAMPLE_TILE >= 1
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <xnnpack/math.h>
#include <xnnpack/fft.h>
void xnn_cs16_fftr_ukernel__scalar_x${SAMPLE_TILE}(
size_t samples,
int16_t* data,
const int16_t* twiddle)
{
assert(samples >= 2);
assert(samples % 2 == 0);
assert(data != NULL);
assert(data != NULL);
assert(twiddle != NULL);
int16_t* dl = data;
int16_t* dr = data + samples * 2;
int32_t vdcr = (int32_t) dl[0];
int32_t vdci = (int32_t) dl[1];
vdcr = math_asr_s32(vdcr * 16383 + 16384, 15);
vdci = math_asr_s32(vdci * 16383 + 16384, 15);
dl[0] = vdcr + vdci;
dl[1] = 0;
dl += 2;
dr[0] = vdcr - vdci;
dr[1] = 0;
samples >>= 1;
$if SAMPLE_TILE > 1:
for (; samples >= ${SAMPLE_TILE}; samples -= ${SAMPLE_TILE}) {
dr -= ${SAMPLE_TILE} * 2;
$for C in range(SAMPLE_TILE):
int32_t vilr${C} = dl[${C * 2 + 0}];
int32_t vili${C} = dl[${C * 2 + 1}];
$for C in range(SAMPLE_TILE):
int32_t virr${C} = (int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}];
int32_t viri${C} = -(int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}];
$for C in range(SAMPLE_TILE):
const int32_t vtwr${C} = twiddle[${C * 2 + 0}];
const int32_t vtwi${C} = twiddle[${C * 2 + 1}];
twiddle += ${SAMPLE_TILE} * 2;
$for C in range(SAMPLE_TILE):
vilr${C} = math_asr_s32(vilr${C} * 16383 + 16384, 15);
virr${C} = math_asr_s32(virr${C} * 16383 + 16384, 15);
$for C in range(SAMPLE_TILE):
vili${C} = math_asr_s32(vili${C} * 16383 + 16384, 15);
viri${C} = math_asr_s32(viri${C} * 16383 + 16384, 15);
$for C in range(SAMPLE_TILE):
const int32_t vacc1r${C} = vilr${C} + virr${C};
const int32_t vacc2r${C} = vilr${C} - virr${C};
$for C in range(SAMPLE_TILE):
const int32_t vacc1i${C} = vili${C} + viri${C};
const int32_t vacc2i${C} = vili${C} - viri${C};
$for C in range(SAMPLE_TILE):
const int32_t twr${C} = math_asr_s32(vacc2r${C} * vtwr${C} - vacc2i${C} * vtwi${C} + 16384, 15);
$for C in range(SAMPLE_TILE):
const int32_t twi${C} = math_asr_s32(vacc2r${C} * vtwi${C} + vacc2i${C} * vtwr${C} + 16384, 15);
$for C in range(SAMPLE_TILE):
dl[${C * 2 + 0}] = math_asr_s32(vacc1r${C} + twr${C}, 1);
dl[${C * 2 + 1}] = math_asr_s32(vacc1i${C} + twi${C}, 1);
$for C in range(SAMPLE_TILE):
dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}] = math_asr_s32(vacc1r${C} - twr${C}, 1);
dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}] = math_asr_s32(twi${C} - vacc1i${C}, 1);
dl += ${SAMPLE_TILE} * 2;
}
if XNN_UNLIKELY(samples != 0) {
do {
dr -= 2;
int32_t vilr = dl[0];
int32_t vili = dl[1];
int32_t virr = (int32_t) dr[0];
int32_t viri = -(int32_t) dr[1];
const int32_t vtwr = twiddle[0];
const int32_t vtwi = twiddle[1];
twiddle += 2;
vilr = math_asr_s32(vilr * 16383 + 16384, 15);
vili = math_asr_s32(vili * 16383 + 16384, 15);
virr = math_asr_s32(virr * 16383 + 16384, 15);
viri = math_asr_s32(viri * 16383 + 16384, 15);
const int32_t vacc1r = vilr + virr;
const int32_t vacc1i = vili + viri;
const int32_t vacc2r = vilr - virr;
const int32_t vacc2i = vili - viri;
const int32_t twr = math_asr_s32(vacc2r * vtwr - vacc2i * vtwi + 16384, 15);
const int32_t twi = math_asr_s32(vacc2r * vtwi + vacc2i * vtwr + 16384, 15);
dl[0] = math_asr_s32(vacc1r + twr, 1);
dl[1] = math_asr_s32(vacc1i + twi, 1);
dr[0] = math_asr_s32(vacc1r - twr, 1);
dr[1] = math_asr_s32(twi - vacc1i, 1);
dl += 2;
} while (--samples != 0);
}
}