blob: 88270011a3d1bdf5d9994fca545056a571776408 [file] [log] [blame]
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#include <stdlib.h>
#include <stdio.h>
#include "ixheaac_type_def.h"
#include "ixheaacd_interface.h"
#include "ixheaac_constants.h"
#include "ixheaac_basic_ops32.h"
#include "ixheaac_basic_ops40.h"
#include "ixheaacd_function_selector.h"
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
extern const WORD8 ixheaacd_mps_dig_rev[8];
#define PLATFORM_INLINE __inline
#define DIG_REV(i, m, j) \
do { \
unsigned _ = (i); \
_ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
_ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
_ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
(j) = _ >> (m); \
} while (0)
static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = ixheaac_sat64_32(temp_result >> 31);
return (result);
}
static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = ixheaac_add32_sat(a, ixheaacd_mult32_sat(b, c));
return (result);
}
static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
FLOAT32 result;
result = a * b;
return result;
}
static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
FLOAT32 result;
result = a + b * c;
return result;
}
VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
WORD32 npoints) {
WORD32 i, j, k;
FLOAT32 y[64], z[64];
FLOAT32 *ptr_y = y, *ptr_z = z;
const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
for (i = 0; i < npoints; i += 4) {
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
FLOAT32 *inp = ptr_xr;
FLOAT32 tmk;
WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
inp += (h2);
x0r = *inp;
x0i = *(inp + 1);
inp += 16;
x1r = *inp;
x1i = *(inp + 1);
inp += 16;
x2r = *inp;
x2i = *(inp + 1);
inp += 16;
x3r = *inp;
x3i = *(inp + 1);
x0r = x0r + x2r;
x0i = x0i + x2i;
tmk = x0r - x2r;
x2r = tmk - x2r;
tmk = x0i - x2i;
x2i = tmk - x2i;
x1r = x1r + x3r;
x1i = x1i + x3i;
tmk = x1r - x3r;
x3r = tmk - x3r;
tmk = x1i - x3i;
x3i = tmk - x3i;
x0r = x0r + x1r;
x0i = x0i + x1i;
tmk = x0r - x1r;
x1r = tmk - x1r;
tmk = x0i - x1i;
x1i = tmk - x1i;
x2r = x2r + x3i;
x2i = x2i - x3r;
tmk = x2r - x3i;
x3i = tmk - x3i;
tmk = x2i + x3r;
x3r = tmk + x3r;
*ptr_y++ = x0r;
*ptr_y++ = x0i;
*ptr_y++ = x2r;
*ptr_y++ = x2i;
*ptr_y++ = x1r;
*ptr_y++ = x1i;
*ptr_y++ = x3i;
*ptr_y++ = x3r;
inp = ptr_xi;
inp += (h2);
x0r = *inp;
x0i = *(inp + 1);
inp += 16;
x1r = *inp;
x1i = *(inp + 1);
inp += 16;
x2r = *inp;
x2i = *(inp + 1);
inp += 16;
x3r = *inp;
x3i = *(inp + 1);
x0r = x0r + x2r;
x0i = x0i + x2i;
tmk = x0r - x2r;
x2r = tmk - x2r;
tmk = x0i - x2i;
x2i = tmk - x2i;
x1r = x1r + x3r;
x1i = x1i + x3i;
tmk = x1r - x3r;
x3r = tmk - x3r;
tmk = x1i - x3i;
x3i = tmk - x3i;
x0r = x0r + x1r;
x0i = x0i + x1i;
tmk = x0r - x1r;
x1r = tmk - x1r;
tmk = x0i - x1i;
x1i = tmk - x1i;
x2r = x2r + x3i;
x2i = x2i - x3r;
tmk = x2r - x3i;
x3i = tmk - x3i;
tmk = x2i + x3r;
x3r = tmk + x3r;
*ptr_z++ = x0r;
*ptr_z++ = x0i;
*ptr_z++ = x2r;
*ptr_z++ = x2i;
*ptr_z++ = x1r;
*ptr_z++ = x1i;
*ptr_z++ = x3i;
*ptr_z++ = x3r;
}
ptr_y -= 64;
ptr_z -= 64;
{
FLOAT32 *data_r = ptr_y;
FLOAT32 *data_i = ptr_z;
for (k = 2; k != 0; k--) {
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
x0r = (*data_r);
x0i = (*(data_r + 1));
data_r += 8;
x1r = (*data_r);
x1i = (*(data_r + 1));
data_r += 8;
x2r = (*data_r);
x2i = (*(data_r + 1));
data_r += 8;
x3r = (*data_r);
x3i = (*(data_r + 1));
data_r -= 24;
x0r = x0r + x2r;
x0i = x0i + x2i;
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + x1r;
x0i = x0i + x1i;
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + x3i;
x2i = x2i - x3r;
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_r = x0r;
*(data_r + 1) = x0i;
data_r += 8;
*data_r = x2r;
*(data_r + 1) = x2i;
data_r += 8;
*data_r = x1r;
*(data_r + 1) = x1i;
data_r += 8;
*data_r = x3i;
*(data_r + 1) = x3r;
data_r += 8;
x0r = (*data_i);
x0i = (*(data_i + 1));
data_i += 8;
x1r = (*data_i);
x1i = (*(data_i + 1));
data_i += 8;
x2r = (*data_i);
x2i = (*(data_i + 1));
data_i += 8;
x3r = (*data_i);
x3i = (*(data_i + 1));
data_i -= 24;
x0r = x0r + x2r;
x0i = x0i + x2i;
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + x1r;
x0i = x0i + x1i;
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + x3i;
x2i = x2i - x3r;
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_i = x0r;
*(data_i + 1) = x0i;
data_i += 8;
*data_i = x2r;
*(data_i + 1) = x2i;
data_i += 8;
*data_i = x1r;
*(data_i + 1) = x1i;
data_i += 8;
*data_i = x3i;
*(data_i + 1) = x3r;
data_i += 8;
}
data_r = ptr_y + 2;
data_i = ptr_z + 2;
for (k = 2; k != 0; k--) {
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data_r += 8;
x1r = *data_r;
x1i = *(data_r + 1);
data_r += 8;
x2r = *data_r;
x2i = *(data_r + 1);
data_r += 8;
x3r = *data_r;
x3i = *(data_r + 1);
data_r -= 24;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
(FLOAT32)x1i, 0.923880f);
x1r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
(FLOAT32)x2i, 0.707107f);
x2r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
(FLOAT32)x3i, 0.382683f);
x3r = tmp;
x0r = (*data_r);
x0i = (*(data_r + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_r = x0r;
*(data_r + 1) = x0i;
data_r += 8;
*data_r = x2r;
*(data_r + 1) = x2i;
data_r += 8;
*data_r = x1r;
*(data_r + 1) = x1i;
data_r += 8;
*data_r = x3i;
*(data_r + 1) = x3r;
data_r += 8;
data_i += 8;
x1r = *data_i;
x1i = *(data_i + 1);
data_i += 8;
x2r = *data_i;
x2i = *(data_i + 1);
data_i += 8;
x3r = *data_i;
x3i = *(data_i + 1);
data_i -= 24;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
(FLOAT32)x1i, 0.923880f);
x1r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
(FLOAT32)x2i, 0.707107f);
x2r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
(FLOAT32)x3i, 0.382683f);
x3r = tmp;
x0r = (*data_i);
x0i = (*(data_i + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_i = x0r;
*(data_i + 1) = x0i;
data_i += 8;
*data_i = x2r;
*(data_i + 1) = x2i;
data_i += 8;
*data_i = x1r;
*(data_i + 1) = x1i;
data_i += 8;
*data_i = x3i;
*(data_i + 1) = x3r;
data_i += 8;
}
data_r -= 62;
data_i -= 62;
for (k = 2; k != 0; k--) {
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data_r += 8;
x1r = *data_r;
x1i = *(data_r + 1);
data_r += 8;
x2r = *data_r;
x2i = *(data_r + 1);
data_r += 8;
x3r = *data_r;
x3i = *(data_r + 1);
data_r -= 24;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
(FLOAT32)x1i, 0.707107f);
x1r = tmp;
tmp = x2i;
x2i = -x2r;
x2r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
x3r = tmp;
x0r = (*data_r);
x0i = (*(data_r + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_r = x0r;
*(data_r + 1) = x0i;
data_r += 8;
*data_r = x2r;
*(data_r + 1) = x2i;
data_r += 8;
*data_r = x1r;
*(data_r + 1) = x1i;
data_r += 8;
*data_r = x3i;
*(data_r + 1) = x3r;
data_r += 8;
data_i += 8;
x1r = *data_i;
x1i = *(data_i + 1);
data_i += 8;
x2r = *data_i;
x2i = *(data_i + 1);
data_i += 8;
x3r = *data_i;
x3i = *(data_i + 1);
data_i -= 24;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
(FLOAT32)x1i, 0.707107f);
x1r = tmp;
tmp = x2i;
x2i = -x2r;
x2r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
x3r = tmp;
x0r = (*data_i);
x0i = (*(data_i + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_i = x0r;
*(data_i + 1) = x0i;
data_i += 8;
*data_i = x2r;
*(data_i + 1) = x2i;
data_i += 8;
*data_i = x1r;
*(data_i + 1) = x1i;
data_i += 8;
*data_i = x3i;
*(data_i + 1) = x3r;
data_i += 8;
}
data_r -= 62;
data_i -= 62;
for (k = 2; k != 0; k--) {
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data_r += 8;
x1r = *data_r;
x1i = *(data_r + 1);
data_r += 8;
x2r = *data_r;
x2i = *(data_r + 1);
data_r += 8;
x3r = *data_r;
x3i = *(data_r + 1);
data_r -= 24;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
(FLOAT32)x1i, 0.382683f);
x1r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
x2r = tmp;
tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
(FLOAT32)x3i, 0.923880f);
x3r = tmp;
x0r = (*data_r);
x0i = (*(data_r + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i - x3i;
x3r = x1r - (x3r * 2);
x3i = x1i + (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_r = x0r;
*(data_r + 1) = x0i;
data_r += 8;
*data_r = x2r;
*(data_r + 1) = x2i;
data_r += 8;
*data_r = x1r;
*(data_r + 1) = x1i;
data_r += 8;
*data_r = x3i;
*(data_r + 1) = x3r;
data_r += 8;
data_i += 8;
x1r = *data_i;
x1i = *(data_i + 1);
data_i += 8;
x2r = *data_i;
x2i = *(data_i + 1);
data_i += 8;
x3r = *data_i;
x3i = *(data_i + 1);
data_i -= 24;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
(FLOAT32)x1i, 0.382683f);
x1r = tmp;
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
x2r = tmp;
tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
(FLOAT32)x3i, 0.923880f);
x3r = tmp;
x0r = (*data_i);
x0i = (*(data_i + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i - x3i;
x3r = x1r - (x3r * 2);
x3i = x1i + (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data_i = x0r;
*(data_i + 1) = x0i;
data_i += 8;
*data_i = x2r;
*(data_i + 1) = x2i;
data_i += 8;
*data_i = x1r;
*(data_i + 1) = x1i;
data_i += 8;
*data_i = x3i;
*(data_i + 1) = x3r;
data_i += 8;
}
data_r -= 62;
data_i -= 62;
}
{
const FLOAT32 *twiddles = ptr_w;
FLOAT32 x0r, x0i, x1r, x1i;
for (j = 8; j != 0; j--) {
FLOAT32 W1 = *twiddles;
twiddles++;
FLOAT32 W4 = *twiddles;
twiddles++;
FLOAT32 tmp;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += 32;
ptr_xr += 32;
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
ixheaacd_mult32X32float((FLOAT32)x1i, W4));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
(FLOAT32)x1i, W1);
x1r = tmp;
*ptr_xr = (x0r) - (x1r);
*(ptr_xr + 1) = (x0i) - (x1i);
ptr_y -= 32;
ptr_xr -= 32;
*ptr_xr = (x0r) + (x1r);
*(ptr_xr + 1) = (x0i) + (x1i);
ptr_y += 2;
ptr_xr += 2;
x0r = *ptr_z;
x0i = *(ptr_z + 1);
ptr_z += 32;
ptr_xi += 32;
x1r = *ptr_z;
x1i = *(ptr_z + 1);
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
ixheaacd_mult32X32float((FLOAT32)x1i, W4));
x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
(FLOAT32)x1i, W1);
x1r = tmp;
*ptr_xi = (x0r) - (x1r);
*(ptr_xi + 1) = (x0i) - (x1i);
ptr_z -= 32;
ptr_xi -= 32;
*ptr_xi = (x0r) + (x1r);
*(ptr_xi + 1) = (x0i) + (x1i);
ptr_z += 2;
ptr_xi += 2;
}
twiddles = ptr_w;
for (j = 8; j != 0; j--) {
FLOAT32 W1 = *twiddles;
twiddles++;
FLOAT32 W4 = *twiddles;
twiddles++;
FLOAT32 tmp;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += 32;
ptr_xr += 32;
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
ixheaacd_mult32X32float((FLOAT32)x1i, W1));
x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
ixheaacd_mult32X32float((FLOAT32)x1i, W4));
x1r = tmp;
*ptr_xr = (x0r) - (x1r);
*(ptr_xr + 1) = (x0i) - (x1i);
ptr_y -= 32;
ptr_xr -= 32;
*ptr_xr = (x0r) + (x1r);
*(ptr_xr + 1) = (x0i) + (x1i);
ptr_y += 2;
ptr_xr += 2;
x0r = *ptr_z;
x0i = *(ptr_z + 1);
ptr_z += 32;
ptr_xi += 32;
x1r = *ptr_z;
x1i = *(ptr_z + 1);
tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
ixheaacd_mult32X32float((FLOAT32)x1i, W1));
x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
ixheaacd_mult32X32float((FLOAT32)x1i, W4));
x1r = tmp;
*ptr_xi = (x0r) - (x1r);
*(ptr_xi + 1) = (x0i) - (x1i);
ptr_z -= 32;
ptr_xi -= 32;
*ptr_xi = (x0r) + (x1r);
*(ptr_xi + 1) = (x0i) + (x1i);
ptr_z += 2;
ptr_xi += 2;
}
}
}
VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
WORD32 i, j, k, n_stages, h2;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
WORD32 del, nodespacing, in_loop_cnt;
WORD32 dig_rev_shift;
WORD32 not_power_4;
FLOAT32 ptr_x[256];
FLOAT32 y[256];
WORD32 npoints = nlength;
FLOAT32 *ptr_y = y;
const FLOAT32 *ptr_w;
dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
n_stages = 30 - ixheaac_norm32(npoints);
not_power_4 = n_stages & 1;
n_stages = n_stages >> 1;
for (i = 0; i<nlength; i++)
{
ptr_x[2 * i] = xr[i];
ptr_x[2 * i + 1] = xi[i];
}
ptr_w = ixheaacd_twiddle_table_fft;
for (i = 0; i<npoints; i += 4)
{
FLOAT32 *inp = ptr_x;
DIG_REV(i, dig_rev_shift, h2);
if (not_power_4)
{
h2 += 1;
h2 &= ~1;
}
inp += (h2);
x0r = *inp;
x0i = *(inp + 1);
inp += (npoints >> 1);
x1r = *inp;
x1i = *(inp + 1);
inp += (npoints >> 1);
x2r = *inp;
x2i = *(inp + 1);
inp += (npoints >> 1);
x3r = *inp;
x3i = *(inp + 1);
x0r = x0r + x2r;
x0i = x0i + x2i;
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + x1r;
x0i = x0i + x1i;
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + x3i;
x2i = x2i - x3r;
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*ptr_y++ = x0r;
*ptr_y++ = x0i;
*ptr_y++ = x2r;
*ptr_y++ = x2i;
*ptr_y++ = x1r;
*ptr_y++ = x1i;
*ptr_y++ = x3i;
*ptr_y++ = x3r;
}
ptr_y -= 2 * npoints;
del = 4;
nodespacing = 64;
in_loop_cnt = npoints >> 4;
for (i = n_stages - 1; i>0; i--)
{
const FLOAT32 *twiddles = ptr_w;
FLOAT32 *data = ptr_y;
FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
WORD32 sec_loop_cnt;
for (k = in_loop_cnt; k != 0; k--)
{
x0r = (*data);
x0i = (*(data + 1));
data += (del << 1);
x1r = (*data);
x1i = (*(data + 1));
data += (del << 1);
x2r = (*data);
x2i = (*(data + 1));
data += (del << 1);
x3r = (*data);
x3i = (*(data + 1));
data -= 3 * (del << 1);
x0r = x0r + x2r;
x0i = x0i + x2i;
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + x1r;
x0i = x0i + x1i;
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + x3i;
x2i = x2i - x3r;
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data = ptr_y + 2;
sec_loop_cnt = (nodespacing * del);
sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
+ (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
- (sec_loop_cnt / 256);
j = nodespacing;
for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
{
w1h = *(twiddles + 2 * j);
w1l = *(twiddles + 2 * j + 1);
w2h = *(twiddles + 2 * (j << 1));
w2l = *(twiddles + 2 * (j << 1) + 1);
w3h = *(twiddles + 2 * j + 2 * (j << 1));
w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
for (k = in_loop_cnt; k != 0; k--)
{
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
x2r = tmp;
tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j <= (nodespacing * del) >> 1; j += nodespacing)
{
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1));
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) + 1);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
for (k = in_loop_cnt; k != 0; k--)
{
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
x2r = tmp;
tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j <= sec_loop_cnt * 2; j += nodespacing)
{
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
for (k = in_loop_cnt; k != 0; k--)
{
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
x2r = tmp;
tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i + x3i;
x3r = x1r - (x3r * 2);
x3i = x1i - (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j<nodespacing * del; j += nodespacing)
{
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
for (k = in_loop_cnt; k != 0; k--)
{
FLOAT32 tmp;
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
x2r = tmp;
tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = x0r + (x2r);
x0i = x0i + (x2i);
x2r = x0r - (x2r * 2);
x2i = x0i - (x2i * 2);
x1r = x1r + x3r;
x1i = x1i - x3i;
x3r = x1r - (x3r * 2);
x3i = x1i + (x3i * 2);
x0r = x0r + (x1r);
x0i = x0i + (x1i);
x1r = x0r - (x1r * 2);
x1i = x0i - (x1i * 2);
x2r = x2r + (x3i);
x2i = x2i - (x3r);
x3i = x2r - (x3i * 2);
x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
nodespacing >>= 2;
del <<= 2;
in_loop_cnt >>= 2;
}
if (not_power_4)
{
const FLOAT32 *twiddles = ptr_w;
nodespacing <<= 1;
for (j = del / 2; j != 0; j--)
{
FLOAT32 w1h = *twiddles;
FLOAT32 w1l = *(twiddles + 1);
FLOAT32 tmp;
twiddles += nodespacing * 2;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += (del << 1);
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
*ptr_y = (x0r) - (x1r);
*(ptr_y + 1) = (x0i) - (x1i);
ptr_y -= (del << 1);
*ptr_y = (x0r) + (x1r);
*(ptr_y + 1) = (x0i) + (x1i);
ptr_y += 2;
}
twiddles = ptr_w;
for (j = del / 2; j != 0; j--)
{
FLOAT32 w1h = *twiddles;
FLOAT32 w1l = *(twiddles + 1);
FLOAT32 tmp;
twiddles += nodespacing * 2;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += (del << 1);
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
x1r = tmp;
*ptr_y = (x0r) - (x1r);
*(ptr_y + 1) = (x0i) - (x1i);
ptr_y -= (del << 1);
*ptr_y = (x0r) + (x1r);
*(ptr_y + 1) = (x0i) + (x1i);
ptr_y += 2;
}
}
for (i = 0; i<nlength; i++)
{
xr[i] = y[2 * i];
xi[i] = y[2 * i + 1];
}
return;
}
VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
WORD32 fft_mode, WORD32 *preshift) {
WORD32 i, j, k, n_stages;
WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
WORD32 del, nodespacing, in_loop_cnt;
WORD32 not_power_4;
WORD32 npts, shift;
WORD32 dig_rev_shift;
WORD32 ptr_x[1024];
WORD32 y[1024];
WORD32 npoints = nlength;
WORD32 n = 0;
WORD32 *ptr_y = y;
const WORD32 *ptr_w;
dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
n_stages = 30 - ixheaac_norm32(npoints);
not_power_4 = n_stages & 1;
n_stages = n_stages >> 1;
npts = npoints;
while (npts >> 1) {
n++;
npts = npts >> 1;
}
if (n % 2 == 0)
shift = ((n + 4)) / 2;
else
shift = ((n + 3) / 2);
for (i = 0; i < nlength; i++) {
ptr_x[2 * i] = (xr[i] / (1 << (shift)));
ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
}
if (fft_mode == -1) {
ptr_w = ixheaacd_twiddle_table_fft_32x32;
for (i = 0; i < npoints; i += 4) {
WORD32 *inp = ptr_x;
DIG_REV(i, dig_rev_shift, h2);
if (not_power_4) {
h2 += 1;
h2 &= ~1;
}
inp += (h2);
x0r = *inp;
x0i = *(inp + 1);
inp += (npoints >> 1);
x1r = *inp;
x1i = *(inp + 1);
inp += (npoints >> 1);
x2r = *inp;
x2i = *(inp + 1);
inp += (npoints >> 1);
x3r = *inp;
x3i = *(inp + 1);
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_add32_sat(x2r, x3i);
x2i = ixheaac_sub32_sat(x2i, x3r);
x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*ptr_y++ = x0r;
*ptr_y++ = x0i;
*ptr_y++ = x2r;
*ptr_y++ = x2i;
*ptr_y++ = x1r;
*ptr_y++ = x1i;
*ptr_y++ = x3i;
*ptr_y++ = x3r;
}
ptr_y -= 2 * npoints;
del = 4;
nodespacing = 64;
in_loop_cnt = npoints >> 4;
for (i = n_stages - 1; i > 0; i--) {
const WORD32 *twiddles = ptr_w;
WORD32 *data = ptr_y;
WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
WORD32 sec_loop_cnt;
for (k = in_loop_cnt; k != 0; k--) {
x0r = (*data);
x0i = (*(data + 1));
data += (del << 1);
x1r = (*data);
x1i = (*(data + 1));
data += (del << 1);
x2r = (*data);
x2i = (*(data + 1));
data += (del << 1);
x3r = (*data);
x3i = (*(data + 1));
data -= 3 * (del << 1);
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_add32_sat(x2r, x3i);
x2i = ixheaac_sub32_sat(x2i, x3r);
x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data = ptr_y + 2;
sec_loop_cnt = (nodespacing * del);
sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
(sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
(sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
(sec_loop_cnt / 256);
j = nodespacing;
for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w1l = *(twiddles + 2 * j + 1);
w2h = *(twiddles + 2 * (j << 1));
w2l = *(twiddles + 2 * (j << 1) + 1);
w3h = *(twiddles + 2 * j + 2 * (j << 1));
w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
ixheaacd_mult32_sat(x2i, w2h));
x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
x2r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
ixheaacd_mult32_sat(x3i, w3h));
x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_add32_sat(x2r, x3i);
x2i = ixheaac_sub32_sat(x2i, x3r);
x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1));
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) + 1);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
ixheaacd_mult32_sat(x2i, w2h));
x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
x2r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
ixheaacd_mult32_sat(x3i, w3l));
x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
ixheaacd_mult32_sat(x3r, w3l));
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_add32_sat(x2r, x3i);
x2i = ixheaac_sub32_sat(x2i, x3r);
x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j <= sec_loop_cnt * 2; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
ixheaacd_mult32_sat(x2i, w2l));
x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
ixheaacd_mult32_sat(x2r, w2l));
x2r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
ixheaacd_mult32_sat(x3i, w3l));
x3i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
ixheaacd_mult32_sat(x3r, w3l));
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_add32_sat(x2r, x3i);
x2i = ixheaac_sub32_sat(x2i, x3r);
x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j < nodespacing * del; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
ixheaacd_mult32_sat(x2i, w2l));
x2i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
ixheaacd_mult32_sat(x2r, w2l));
x2r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
ixheaacd_mult32_sat(x3r, w3l));
x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_sub32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_add32_sat(x2r, x3i);
x2i = ixheaac_sub32_sat(x2i, x3r);
x3i = ixheaac_sub32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_add32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
nodespacing >>= 2;
del <<= 2;
in_loop_cnt >>= 2;
}
if (not_power_4) {
const WORD32 *twiddles = ptr_w;
nodespacing <<= 1;
shift += 1;
for (j = del / 2; j != 0; j--) {
WORD32 w1h = *twiddles;
WORD32 w1l = *(twiddles + 1);
WORD32 tmp;
twiddles += nodespacing * 2;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += (del << 1);
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
*ptr_y = (x0r) / 2 - (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
ptr_y -= (del << 1);
*ptr_y = (x0r) / 2 + (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
ptr_y += 2;
}
twiddles = ptr_w;
for (j = del / 2; j != 0; j--) {
WORD32 w1h = *twiddles;
WORD32 w1l = *(twiddles + 1);
WORD32 tmp;
twiddles += nodespacing * 2;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += (del << 1);
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1h),
ixheaacd_mult32_sat(x1i, w1l));
x1i = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1i, w1h),
ixheaacd_mult32_sat(x1r, w1l));
x1r = tmp;
*ptr_y = (x0r) / 2 - (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
ptr_y -= (del << 1);
*ptr_y = (x0r) / 2 + (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
ptr_y += 2;
}
}
}
else {
ptr_w = ixheaacd_twiddle_table_fft_32x32;
for (i = 0; i < npoints; i += 4) {
WORD32 *inp = ptr_x;
DIG_REV(i, dig_rev_shift, h2);
if (not_power_4) {
h2 += 1;
h2 &= ~1;
}
inp += (h2);
x0r = *inp;
x0i = *(inp + 1);
inp += (npoints >> 1);
x1r = *inp;
x1i = *(inp + 1);
inp += (npoints >> 1);
x2r = *inp;
x2i = *(inp + 1);
inp += (npoints >> 1);
x3r = *inp;
x3i = *(inp + 1);
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_sub32_sat(x2r, x3i);
x2i = ixheaac_add32_sat(x2i, x3r);
x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*ptr_y++ = x0r;
*ptr_y++ = x0i;
*ptr_y++ = x2r;
*ptr_y++ = x2i;
*ptr_y++ = x1r;
*ptr_y++ = x1i;
*ptr_y++ = x3i;
*ptr_y++ = x3r;
}
ptr_y -= 2 * npoints;
del = 4;
nodespacing = 64;
in_loop_cnt = npoints >> 4;
for (i = n_stages - 1; i > 0; i--) {
const WORD32 *twiddles = ptr_w;
WORD32 *data = ptr_y;
WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
WORD32 sec_loop_cnt;
for (k = in_loop_cnt; k != 0; k--) {
x0r = (*data);
x0i = (*(data + 1));
data += (del << 1);
x1r = (*data);
x1i = (*(data + 1));
data += (del << 1);
x2r = (*data);
x2i = (*(data + 1));
data += (del << 1);
x3r = (*data);
x3i = (*(data + 1));
data -= 3 * (del << 1);
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_sub32_sat(x2r, x3i);
x2i = ixheaac_add32_sat(x2i, x3r);
x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data = ptr_y + 2;
sec_loop_cnt = (nodespacing * del);
sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
(sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
(sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
(sec_loop_cnt / 256);
j = nodespacing;
for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1));
w3h = *(twiddles + 2 * j + 2 * (j << 1));
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) + 1);
w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
ixheaacd_mult32_sat(x2i, w2h));
x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
x2r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
ixheaacd_mult32_sat(x3i, w3h));
x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_sub32_sat(x2r, x3i);
x2i = ixheaac_add32_sat(x2i, x3r);
x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1));
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) + 1);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
ixheaacd_mult32_sat(x2i, w2h));
x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
x2r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
ixheaacd_mult32_sat(x3i, w3l));
x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
ixheaacd_mult32_sat(x3i, w3h));
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_sub32_sat(x2r, x3i);
x2i = ixheaac_add32_sat(x2i, x3r);
x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j <= sec_loop_cnt * 2; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
ixheaacd_mult32_sat(x2i, w2l));
x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
ixheaacd_mult32_sat(x2i, w2h));
x2r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x3r, w3h),
ixheaacd_mult32_sat(x3i, w3l));
x3i = ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
ixheaacd_mult32_sat(x3i, w3h));
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_add32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_sub32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_sub32_sat(x2r, x3i);
x2i = ixheaac_add32_sat(x2i, x3r);
x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
for (; j < nodespacing * del; j += nodespacing) {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
w1l = *(twiddles + 2 * j + 1);
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
for (k = in_loop_cnt; k != 0; k--) {
WORD32 tmp;
WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
x1r = *data;
x1i = *(data + 1);
data += (del << 1);
x2r = *data;
x2i = *(data + 1);
data += (del << 1);
x3r = *data;
x3i = *(data + 1);
data -= 3 * (del << 1);
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x2r, w2h),
ixheaacd_mult32_sat(x2i, w2l));
x2i = ixheaac_add32_sat(ixheaacd_mult32_sat(x2r, w2l),
ixheaacd_mult32_sat(x2i, w2h));
x2r = tmp;
tmp = -ixheaac_add32_sat(ixheaacd_mult32_sat(x3r, w3l),
ixheaacd_mult32_sat(x3i, w3h));
x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
x0r = ixheaac_add32_sat(x0r, x2r);
x0i = ixheaac_add32_sat(x0i, x2i);
x2r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x2r, 1));
x2i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x2i, 1));
x1r = ixheaac_add32_sat(x1r, x3r);
x1i = ixheaac_sub32_sat(x1i, x3i);
x3r = ixheaac_sub32_sat(x1r, ixheaac_shl32_sat(x3r, 1));
x3i = ixheaac_add32_sat(x1i, ixheaac_shl32_sat(x3i, 1));
x0r = ixheaac_add32_sat(x0r, x1r);
x0i = ixheaac_add32_sat(x0i, x1i);
x1r = ixheaac_sub32_sat(x0r, ixheaac_shl32_sat(x1r, 1));
x1i = ixheaac_sub32_sat(x0i, ixheaac_shl32_sat(x1i, 1));
x2r = ixheaac_sub32_sat(x2r, x3i);
x2i = ixheaac_add32_sat(x2i, x3r);
x3i = ixheaac_add32_sat(x2r, ixheaac_shl32_sat(x3i, 1));
x3r = ixheaac_sub32_sat(x2i, ixheaac_shl32_sat(x3r, 1));
*data = x0r;
*(data + 1) = x0i;
data += (del << 1);
*data = x2r;
*(data + 1) = x2i;
data += (del << 1);
*data = x1r;
*(data + 1) = x1i;
data += (del << 1);
*data = x3i;
*(data + 1) = x3r;
data += (del << 1);
}
data -= 2 * npoints;
data += 2;
}
nodespacing >>= 2;
del <<= 2;
in_loop_cnt >>= 2;
}
if (not_power_4) {
const WORD32 *twiddles = ptr_w;
nodespacing <<= 1;
shift += 1;
for (j = del / 2; j != 0; j--) {
WORD32 w1h = *twiddles;
WORD32 w1l = *(twiddles + 1);
WORD32 tmp;
twiddles += nodespacing * 2;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += (del << 1);
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
x1r = tmp;
*ptr_y = (x0r) / 2 - (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
ptr_y -= (del << 1);
*ptr_y = (x0r) / 2 + (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
ptr_y += 2;
}
twiddles = ptr_w;
for (j = del / 2; j != 0; j--) {
WORD32 w1h = *twiddles;
WORD32 w1l = *(twiddles + 1);
WORD32 tmp;
twiddles += nodespacing * 2;
x0r = *ptr_y;
x0i = *(ptr_y + 1);
ptr_y += (del << 1);
x1r = *ptr_y;
x1i = *(ptr_y + 1);
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(x1r, w1h),
ixheaacd_mult32_sat(x1i, w1l));
x1i = ixheaac_add32_sat(ixheaacd_mult32_sat(x1r, w1l),
ixheaacd_mult32_sat(x1i, w1h));
x1r = tmp;
*ptr_y = (x0r) / 2 - (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 - (x1i) / 2;
ptr_y -= (del << 1);
*ptr_y = (x0r) / 2 + (x1r) / 2;
*(ptr_y + 1) = (x0i) / 2 + (x1i) / 2;
ptr_y += 2;
}
}
}
for (i = 0; i < nlength; i++) {
xr[i] = y[2 * i];
xi[i] = y[2 * i + 1];
}
*preshift = shift - *preshift;
return;
}
static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op,
WORD32 sign_dir) {
WORD32 add_r, sub_r;
WORD32 add_i, sub_i;
WORD32 temp_real, temp_imag, temp;
WORD32 p1, p2, p3, p4;
WORD32 sinmu;
sinmu = -1859775393 * sign_dir;
temp_real = ixheaac_add32_sat(inp[0], inp[2]);
temp_imag = ixheaac_add32_sat(inp[1], inp[3]);
add_r = ixheaac_add32_sat(inp[2], inp[4]);
add_i = ixheaac_add32_sat(inp[3], inp[5]);
sub_r = ixheaac_sub32_sat(inp[2], inp[4]);
sub_i = ixheaac_sub32_sat(inp[3], inp[5]);
p1 = add_r >> 1;
p4 = add_i >> 1;
p2 = ixheaac_mult32_shl(sub_i, sinmu);
p3 = ixheaac_mult32_shl(sub_r, sinmu);
temp = ixheaac_sub32(inp[0], p1);
op[0] = ixheaac_add32_sat(temp_real, inp[4]);
op[1] = ixheaac_add32_sat(temp_imag, inp[5]);
op[2] = ixheaac_add32_sat(temp, p2);
op[3] = ixheaac_sub32_sat(ixheaac_sub32_sat(inp[1], p3), p4);
op[4] = ixheaac_sub32_sat(temp, p2);
op[5] = ixheaac_sub32_sat(ixheaac_add32_sat(inp[1], p3), p4);
return;
}
VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
WORD32 fft_mode, WORD32 *preshift) {
WORD32 i, j;
WORD32 shift = 0;
WORD32 xr_3[384];
WORD32 xi_3[384];
WORD32 x[1024];
WORD32 y[1024];
WORD32 cnfac, npts;
WORD32 mpass = nlength;
WORD32 n = 0;
WORD32 *ptr_x = x;
WORD32 *ptr_y = y;
cnfac = 0;
while (mpass % 3 == 0) {
mpass /= 3;
cnfac++;
}
npts = mpass;
for (i = 0; i < 3 * cnfac; i++) {
for (j = 0; j < mpass; j++) {
xr_3[j] = xr[3 * j + i];
xi_3[j] = xi[3 * j + i];
}
(*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift);
for (j = 0; j < mpass; j++) {
xr[3 * j + i] = xr_3[j];
xi[3 * j + i] = xi_3[j];
}
}
while (npts >> 1) {
n++;
npts = npts >> 1;
}
if (n % 2 == 0)
shift = ((n + 4)) / 2;
else
shift = ((n + 5) / 2);
*preshift = shift - *preshift + 1;
for (i = 0; i < nlength; i++) {
ptr_x[2 * i] = (xr[i] >> 1);
ptr_x[2 * i + 1] = (xi[i] >> 1);
}
{
const WORD32 *w1r, *w1i;
WORD32 tmp;
w1r = ixheaacd_twiddle_table_3pr;
w1i = ixheaacd_twiddle_table_3pi;
if (fft_mode < 0) {
for (i = 0; i < nlength; i += 3) {
w1r++;
w1i++;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
ptr_x[2 * i + 3] =
ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)),
ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)));
ptr_x[2 * i + 2] = tmp;
w1r++;
w1i++;
tmp = ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
ptr_x[2 * i + 5] =
ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)),
ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)));
ptr_x[2 * i + 4] = tmp;
w1r += 3 * (128 / mpass - 1) + 1;
w1i += 3 * (128 / mpass - 1) + 1;
}
}
else {
for (i = 0; i < nlength; i += 3) {
w1r++;
w1i++;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)),
ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i)));
ptr_x[2 * i + 3] =
ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)),
ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)));
ptr_x[2 * i + 2] = tmp;
w1r++;
w1i++;
tmp = ixheaac_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)),
ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i)));
ptr_x[2 * i + 5] =
ixheaac_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)),
ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)));
ptr_x[2 * i + 4] = tmp;
w1r += 3 * (128 / mpass - 1) + 1;
w1i += 3 * (128 / mpass - 1) + 1;
}
}
}
for (i = 0; i < mpass; i++) {
ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode);
ptr_x = ptr_x + 6;
ptr_y = ptr_y + 6;
}
ptr_y = y;
for (i = 0; i < mpass; i++) {
xr[i] = *ptr_y++;
xi[i] = *ptr_y++;
xr[mpass + i] = *ptr_y++;
xi[mpass + i] = *ptr_y++;
xr[2 * mpass + i] = *ptr_y++;
xi[2 * mpass + i] = *ptr_y++;
}
return;
}
VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
WORD32 *preshift) {
if (nlength & (nlength - 1)) {
ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
} else
(*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
return;
}