| // Copyright 2022 Google LLC |
| // |
| // This source code is licensed under the BSD-style license found in the |
| // LICENSE file in the root directory of this source tree. |
| |
| $assert SAMPLE_TILE >= 1 |
| #include <assert.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include <xnnpack/math.h> |
| #include <xnnpack/fft.h> |
| |
| |
| void xnn_cs16_fftr_ukernel__scalar_x${SAMPLE_TILE}( |
| size_t samples, |
| int16_t* data, |
| const int16_t* twiddle) |
| { |
| assert(samples >= 2); |
| assert(samples % 2 == 0); |
| assert(data != NULL); |
| assert(data != NULL); |
| assert(twiddle != NULL); |
| |
| int16_t* dl = data; |
| int16_t* dr = data + samples * 2; |
| int32_t vdcr = (int32_t) dl[0]; |
| int32_t vdci = (int32_t) dl[1]; |
| |
| vdcr = math_asr_s32(vdcr * 16383 + 16384, 15); |
| vdci = math_asr_s32(vdci * 16383 + 16384, 15); |
| |
| dl[0] = vdcr + vdci; |
| dl[1] = 0; |
| dl += 2; |
| dr[0] = vdcr - vdci; |
| dr[1] = 0; |
| |
| samples >>= 1; |
| |
| $if SAMPLE_TILE > 1: |
| for (; samples >= ${SAMPLE_TILE}; samples -= ${SAMPLE_TILE}) { |
| dr -= ${SAMPLE_TILE} * 2; |
| $for C in range(SAMPLE_TILE): |
| int32_t vilr${C} = dl[${C * 2 + 0}]; |
| int32_t vili${C} = dl[${C * 2 + 1}]; |
| $for C in range(SAMPLE_TILE): |
| int32_t virr${C} = (int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}]; |
| int32_t viri${C} = -(int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}]; |
| $for C in range(SAMPLE_TILE): |
| const int32_t vtwr${C} = twiddle[${C * 2 + 0}]; |
| const int32_t vtwi${C} = twiddle[${C * 2 + 1}]; |
| twiddle += ${SAMPLE_TILE} * 2; |
| |
| $for C in range(SAMPLE_TILE): |
| vilr${C} = math_asr_s32(vilr${C} * 16383 + 16384, 15); |
| virr${C} = math_asr_s32(virr${C} * 16383 + 16384, 15); |
| $for C in range(SAMPLE_TILE): |
| vili${C} = math_asr_s32(vili${C} * 16383 + 16384, 15); |
| viri${C} = math_asr_s32(viri${C} * 16383 + 16384, 15); |
| $for C in range(SAMPLE_TILE): |
| const int32_t vacc1r${C} = vilr${C} + virr${C}; |
| const int32_t vacc2r${C} = vilr${C} - virr${C}; |
| $for C in range(SAMPLE_TILE): |
| const int32_t vacc1i${C} = vili${C} + viri${C}; |
| const int32_t vacc2i${C} = vili${C} - viri${C}; |
| |
| $for C in range(SAMPLE_TILE): |
| const int32_t twr${C} = math_asr_s32(vacc2r${C} * vtwr${C} - vacc2i${C} * vtwi${C} + 16384, 15); |
| $for C in range(SAMPLE_TILE): |
| const int32_t twi${C} = math_asr_s32(vacc2r${C} * vtwi${C} + vacc2i${C} * vtwr${C} + 16384, 15); |
| |
| $for C in range(SAMPLE_TILE): |
| dl[${C * 2 + 0}] = math_asr_s32(vacc1r${C} + twr${C}, 1); |
| dl[${C * 2 + 1}] = math_asr_s32(vacc1i${C} + twi${C}, 1); |
| $for C in range(SAMPLE_TILE): |
| dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}] = math_asr_s32(vacc1r${C} - twr${C}, 1); |
| dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}] = math_asr_s32(twi${C} - vacc1i${C}, 1); |
| dl += ${SAMPLE_TILE} * 2; |
| } |
| |
| if XNN_UNLIKELY(samples != 0) { |
| do { |
| dr -= 2; |
| int32_t vilr = dl[0]; |
| int32_t vili = dl[1]; |
| int32_t virr = (int32_t) dr[0]; |
| int32_t viri = -(int32_t) dr[1]; |
| const int32_t vtwr = twiddle[0]; |
| const int32_t vtwi = twiddle[1]; |
| twiddle += 2; |
| |
| vilr = math_asr_s32(vilr * 16383 + 16384, 15); |
| vili = math_asr_s32(vili * 16383 + 16384, 15); |
| virr = math_asr_s32(virr * 16383 + 16384, 15); |
| viri = math_asr_s32(viri * 16383 + 16384, 15); |
| const int32_t vacc1r = vilr + virr; |
| const int32_t vacc1i = vili + viri; |
| const int32_t vacc2r = vilr - virr; |
| const int32_t vacc2i = vili - viri; |
| |
| const int32_t twr = math_asr_s32(vacc2r * vtwr - vacc2i * vtwi + 16384, 15); |
| const int32_t twi = math_asr_s32(vacc2r * vtwi + vacc2i * vtwr + 16384, 15); |
| |
| dl[0] = math_asr_s32(vacc1r + twr, 1); |
| dl[1] = math_asr_s32(vacc1i + twi, 1); |
| dr[0] = math_asr_s32(vacc1r - twr, 1); |
| dr[1] = math_asr_s32(twi - vacc1i, 1); |
| dl += 2; |
| } while (--samples != 0); |
| } |
| } |