| /* |
| Copyright (c) 2019 Hayati Ayguen ( [email protected] ) |
| */ |
| |
| #include "pffastconv.h" |
| #include "pffft.h" |
| |
| #include <stdlib.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <math.h> |
| #include <assert.h> |
| #include <string.h> |
| |
| #define FASTCONV_DBG_OUT 0 |
| |
| |
| /* detect compiler flavour */ |
| #if defined(_MSC_VER) |
| # define RESTRICT __restrict |
| #pragma warning( disable : 4244 4305 4204 4456 ) |
| #elif defined(__GNUC__) |
| # define RESTRICT __restrict |
| #endif |
| |
| |
| void *pffastconv_malloc(size_t nb_bytes) |
| { |
| return pffft_aligned_malloc(nb_bytes); |
| } |
| |
| void pffastconv_free(void *p) |
| { |
| pffft_aligned_free(p); |
| } |
| |
| int pffastconv_simd_size() |
| { |
| return pffft_simd_size(); |
| } |
| |
| |
| |
| struct PFFASTCONV_Setup |
| { |
| float * Xt; /* input == x in time domain - copy for alignment */ |
| float * Xf; /* input == X in freq domain */ |
| float * Hf; /* filterCoeffs == H in freq domain */ |
| float * Mf; /* input * filterCoeffs in freq domain */ |
| PFFFT_Setup *st; |
| int filterLen; /* convolution length */ |
| int Nfft; /* FFT/block length */ |
| int flags; |
| float scale; |
| }; |
| |
| |
| PFFASTCONV_Setup * pffastconv_new_setup( const float * filterCoeffs, int filterLen, int * blockLen, int flags ) |
| { |
| PFFASTCONV_Setup * s = NULL; |
| const int cplxFactor = ( (flags & PFFASTCONV_CPLX_INP_OUT) && (flags & PFFASTCONV_CPLX_SINGLE_FFT) ) ? 2 : 1; |
| const int minFftLen = 2*pffft_simd_size()*pffft_simd_size(); |
| int i, Nfft = 2 * pffft_next_power_of_two(filterLen -1); |
| #if FASTCONV_DBG_OUT |
| const int iOldBlkLen = *blockLen; |
| #endif |
| |
| if ( Nfft < minFftLen ) |
| Nfft = minFftLen; |
| |
| if ( flags & PFFASTCONV_CPLX_FILTER ) |
| return NULL; |
| |
| s = pffastconv_malloc( sizeof(struct PFFASTCONV_Setup) ); |
| |
| if ( *blockLen > Nfft ) { |
| Nfft = *blockLen; |
| Nfft = pffft_next_power_of_two(Nfft); |
| } |
| *blockLen = Nfft; /* this is in (complex) samples */ |
| |
| Nfft *= cplxFactor; |
| |
| if ( (flags & PFFASTCONV_DIRECT_INP) && !(flags & PFFASTCONV_CPLX_INP_OUT) ) |
| s->Xt = NULL; |
| else |
| s->Xt = pffastconv_malloc((unsigned)Nfft * sizeof(float)); |
| s->Xf = pffastconv_malloc((unsigned)Nfft * sizeof(float)); |
| s->Hf = pffastconv_malloc((unsigned)Nfft * sizeof(float)); |
| s->Mf = pffastconv_malloc((unsigned)Nfft * sizeof(float)); |
| s->st = pffft_new_setup(Nfft, PFFFT_REAL); /* with complex: we do 2 x fft() */ |
| s->filterLen = filterLen; /* filterLen == convolution length == length of impulse response */ |
| if ( cplxFactor == 2 ) |
| s->filterLen = 2 * filterLen - 1; |
| s->Nfft = Nfft; /* FFT/block length */ |
| s->flags = flags; |
| s->scale = (float)( 1.0 / Nfft ); |
| |
| memset( s->Xt, 0, (unsigned)Nfft * sizeof(float) ); |
| if ( flags & PFFASTCONV_CORRELATION ) { |
| for ( i = 0; i < filterLen; ++i ) |
| s->Xt[ ( Nfft - cplxFactor * i ) & (Nfft -1) ] = filterCoeffs[ i ]; |
| } else { |
| for ( i = 0; i < filterLen; ++i ) |
| s->Xt[ ( Nfft - cplxFactor * i ) & (Nfft -1) ] = filterCoeffs[ filterLen - 1 - i ]; |
| } |
| |
| pffft_transform(s->st, s->Xt, s->Hf, /* tmp = */ s->Mf, PFFFT_FORWARD); |
| |
| #if FASTCONV_DBG_OUT |
| printf("\n fastConvSetup(filterLen = %d, blockLen %d) --> blockLen %d, OutLen = %d\n" |
| , filterLen, iOldBlkLen, *blockLen, Nfft - filterLen +1 ); |
| #endif |
| |
| return s; |
| } |
| |
| |
| void pffastconv_destroy_setup( PFFASTCONV_Setup * s ) |
| { |
| if (!s) |
| return; |
| pffft_destroy_setup(s->st); |
| pffastconv_free(s->Mf); |
| pffastconv_free(s->Hf); |
| pffastconv_free(s->Xf); |
| if ( s->Xt ) |
| pffastconv_free(s->Xt); |
| pffastconv_free(s); |
| } |
| |
| |
| int pffastconv_apply(PFFASTCONV_Setup * s, const float *input_, int cplxInputLen, float *output_, int applyFlush) |
| { |
| const float * RESTRICT X = input_; |
| float * RESTRICT Y = output_; |
| const int Nfft = s->Nfft; |
| const int filterLen = s->filterLen; |
| const int flags = s->flags; |
| const int cplxFactor = ( (flags & PFFASTCONV_CPLX_INP_OUT) && (flags & PFFASTCONV_CPLX_SINGLE_FFT) ) ? 2 : 1; |
| const int inputLen = cplxFactor * cplxInputLen; |
| int inpOff, procLen, numOut = 0, j, part, cplxOff; |
| |
| /* applyFlush != 0: |
| * inputLen - inpOff -filterLen + 1 > 0 |
| * <=> inputLen -filterLen + 1 > inpOff |
| * <=> inpOff < inputLen -filterLen + 1 |
| * |
| * applyFlush == 0: |
| * inputLen - inpOff >= Nfft |
| * <=> inputLen - Nfft >= inpOff |
| * <=> inpOff <= inputLen - Nfft |
| * <=> inpOff < inputLen - Nfft + 1 |
| */ |
| |
| if ( cplxFactor == 2 ) |
| { |
| const int maxOff = applyFlush ? (inputLen -filterLen + 1) : (inputLen - Nfft + 1); |
| #if 0 |
| printf( "*** inputLen %d, filterLen %d, Nfft %d => maxOff %d\n", inputLen, filterLen, Nfft, maxOff); |
| #endif |
| for ( inpOff = 0; inpOff < maxOff; inpOff += numOut ) |
| { |
| procLen = ( (inputLen - inpOff) >= Nfft ) ? Nfft : (inputLen - inpOff); |
| numOut = ( procLen - filterLen + 1 ) & ( ~1 ); |
| if (!numOut) |
| break; |
| #if 0 |
| if (!inpOff) |
| printf("*** inpOff = %d, numOut = %d\n", inpOff, numOut); |
| if (inpOff + filterLen + 2 >= maxOff ) |
| printf("*** inpOff = %d, inpOff + numOut = %d\n", inpOff, inpOff + numOut); |
| #endif |
| |
| if ( flags & PFFASTCONV_DIRECT_INP ) |
| { |
| pffft_transform(s->st, X + inpOff, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD); |
| } |
| else |
| { |
| memcpy( s->Xt, X + inpOff, (unsigned)procLen * sizeof(float) ); |
| if ( procLen < Nfft ) |
| memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) ); |
| |
| pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD); |
| } |
| |
| pffft_zconvolve_no_accu(s->st, s->Xf, s->Hf, /* tmp = */ s->Mf, s->scale); |
| |
| if ( flags & PFFASTCONV_DIRECT_OUT ) |
| { |
| pffft_transform(s->st, s->Mf, Y + inpOff, s->Xf, PFFFT_BACKWARD); |
| } |
| else |
| { |
| pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD); |
| memcpy( Y + inpOff, s->Xf, (unsigned)numOut * sizeof(float) ); |
| } |
| } |
| return inpOff / cplxFactor; |
| } |
| else |
| { |
| const int maxOff = applyFlush ? (inputLen -filterLen + 1) : (inputLen - Nfft + 1); |
| const int numParts = (flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1; |
| |
| for ( inpOff = 0; inpOff < maxOff; inpOff += numOut ) |
| { |
| procLen = ( (inputLen - inpOff) >= Nfft ) ? Nfft : (inputLen - inpOff); |
| numOut = procLen - filterLen + 1; |
| |
| for ( part = 0; part < numParts; ++part ) /* iterate per real/imag component */ |
| { |
| |
| if ( flags & PFFASTCONV_CPLX_INP_OUT ) |
| { |
| cplxOff = 2 * inpOff + part; |
| for ( j = 0; j < procLen; ++j ) |
| s->Xt[j] = X[cplxOff + 2 * j]; |
| if ( procLen < Nfft ) |
| memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) ); |
| |
| pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD); |
| } |
| else if ( flags & PFFASTCONV_DIRECT_INP ) |
| { |
| pffft_transform(s->st, X + inpOff, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD); |
| } |
| else |
| { |
| memcpy( s->Xt, X + inpOff, (unsigned)procLen * sizeof(float) ); |
| if ( procLen < Nfft ) |
| memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) ); |
| |
| pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD); |
| } |
| |
| pffft_zconvolve_no_accu(s->st, s->Xf, s->Hf, /* tmp = */ s->Mf, s->scale); |
| |
| if ( flags & PFFASTCONV_CPLX_INP_OUT ) |
| { |
| pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD); |
| |
| cplxOff = 2 * inpOff + part; |
| for ( j = 0; j < numOut; ++j ) |
| Y[ cplxOff + 2 * j ] = s->Xf[j]; |
| } |
| else if ( flags & PFFASTCONV_DIRECT_OUT ) |
| { |
| pffft_transform(s->st, s->Mf, Y + inpOff, s->Xf, PFFFT_BACKWARD); |
| } |
| else |
| { |
| pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD); |
| memcpy( Y + inpOff, s->Xf, (unsigned)numOut * sizeof(float) ); |
| } |
| |
| } |
| } |
| |
| return inpOff; |
| } |
| } |
| |