/* ======================================================================== */ /* TEXAS INSTRUMENTS, INC. */ /* */ /* NAME */ /* DSPF_sp_ifftSPxSP -- Single Precision floating point mixed radix */ /* inverse FFT with complex input */ /* */ /* USAGE */ /* This routine is C-callable and can be called as: */ /* */ /* void DSPF_sp_ifftSPxSP( */ /* int n, float * ptr_x, float * ptr_w, float * ptr_y, */ /* unsigned char * brev, int n_min, int offset, int n_max); */ /* */ /* n : Length of ifft in complex samples, power of 2 such that */ /* n >=8 and n <= 16384. */ /* ptr_x : Pointer to complex data input (normal order) */ /* ptr_w : Pointer to complex twiddle factor (see below) */ /* ptr_y : Pointer to complex output data (normal order) */ /* brev : Pointer to bit reverse table containing 64 entries */ /* n_min : Smallest ifft butterfly used in computation */ /* used for decomposing ifft into subiffts, see notes */ /* offset: Index in complex samples of sub-ifft from start of */ /* main ifft */ /* n_max : Size of main ifft in complex samples */ /* */ /* DESCRIPTION */ /* */ /* The benchmark performs a mixed radix forwards ifft using */ /* a special sequence of coefficients generated in the following */ /* way: */ /* */ /* //generate vector of twiddle factors for optimized algorithm// */ /* void tw_gen(float * w, int N) */ /* { */ /* int j, k; */ /* double x_t, y_t, theta1, theta2, theta3; */ /* const double PI = 3.141592654; */ /* */ /* for (j=1, k=0; j <= N>>2; j = j<<2) */ /* { */ /* for (i=0; i < N>>2; i+=j) */ /* { */ /* theta1 = 2*PI*i/N; */ /* x_t = cos(theta1); */ /* y_t = sin(theta1); */ /* w[k] = (float)x_t; */ /* w[k+1] = (float)y_t; */ /* */ /* theta2 = 4*PI*i/N; */ /* x_t = cos(theta2); */ /* y_t = sin(theta2); */ /* w[k+2] = (float)x_t; */ /* w[k+3] = (float)y_t; */ /* */ /* theta3 = 6*PI*i/N; */ /* x_t = cos(theta3); */ /* y_t = sin(theta3); */ /* w[k+4] = (float)x_t; */ /* w[k+5] = (float)y_t; */ /* k+=6; */ /* } */ /* } */ /* } */ /* This redundant set of twiddle factors is size 2*N float samples. */ /* The function is accurate to about 130dB of signal to noise ratio */ /* to the IDFT function below: */ /* */ /* void idft(int n, float x[], float y[]) */ /* { */ /* int k,i, index; */ /* const float PI = 3.14159654; */ /* float * p_x; */ /* float arg, fx_0, fx_1, fy_0, fy_1, co, si; */ /* */ /* for(k = 0; k<n; k++) */ /* { */ /* p_x = x; */ /* fy_0 = 0; */ /* fy_1 = 0; */ /* for(i=0; i<n; i++) */ /* { */ /* fx_0 = p_x[0]; */ /* fx_1 = p_x[1]; */ /* p_x += 2; */ /* index = (i*k) % n; */ /* arg = 2*PI*index/n; */ /* co = cos(arg); */ /* si = sin(arg); */ /* fy_0 += ((fx_0 * co) - (fx_1 * si)); */ /* fy_1 += ((fx_1 * co) + (fx_0 * si)); */ /* } */ /* y[2*k] = fy_0/n; */ /* y[2*k+1] = fy_1/n; */ /* } */ /* } */ /* */ /* The function takes the table and input data and calculates the */ /* ifft producing the frequency domain data in the Y array. the */ /* output is scaled by a scaling factor of 1/N. */ /* */ /* As the ifft allows every input point to effect every output */ /* point in a cache based system such as the c6711, this causes */ /* cache thrashing. This is mitigated by allowing the main ifft */ /* of size N to be divided into several steps, allowing as much */ /* data reuse as possible. */ /* */ /* For example the following function: */ /* */ /* DSPF_sp_ifftSPxSP(1024, &x[0],&w[0],y,brev,4, 0,1024) */ /* */ /* is equvalent to: */ /* */ /* DSPF_sp_ifftSPxSP(1024,&x[2*0],&w[0],y,brev,256,0,1024) */ /* DSPF_sp_ifftSPxSP(256,&x[2*0],&w[2*768],y,brev,4,0,1024) */ /* DSPF_sp_ifftSPxSP(256,&x[2*256],&w[2*768],y,brev,4,256,1024) */ /* DSPF_sp_ifftSPxSP(256,&x[2*512],&w[2*768],y,brev,4,512,1024) */ /* DSPF_sp_ifftSPxSP(256,&x[2*768],&w[2*768],y,brev,4,768,1024) */ /* */ /* Notice how the 1st ifft function is called on the entire 1K */ /* data set it covers the 1st pass of the ifft until the butterfly */ /* size is 256. The following 4 iffts do 256 pt iffts 25% of the */ /* size. These continue down to the end when the buttefly is of */ /* size 4. They use an index to the main twiddle factor array of */ /* 0.75*2*N. This is because the twiddle factor array is composed */ /* of successively decimated versions of the main array. */ /* */ /* N not equal to a power of 4 can be used, i.e. 512. In this case */ /* to decompose the ifft the following would be needed : */ /* */ /* DSPF_sp_ifftSPxSP(512, &x[0],&w[0],y,brev,2, 0,512) */ /* */ /* is equvalent to: */ /* */ /* DSPF_sp_ifftSPxSP(512, &x[2*0], &w[0] , y,brev,128, 0,512) */ /* DSPF_sp_ifftSPxSP(128, &x[2*0], &w[2*384],y,brev,4, 0,512) */ /* DSPF_sp_ifftSPxSP(128, &x[2*128],&w[2*384],y,brev,4, 128,512) */ /* DSPF_sp_ifftSPxSP(128, &x[2*256],&w[2*384],y,brev,4, 256,512) */ /* DSPF_sp_ifftSPxSP(128, &x[2*384],&w[2*384],y,brev,4, 384,512) */ /* */ /* The twiddle factor array is composed of log4(N) sets of twiddle */ /* factors, (3/4)*N, (3/16)*N, (3/64)*N, etc. The index into this */ /* array for each stage of the ifft is calculated by summing these */ /* indices up appropriately. */ /* For multiple iffts they can share the same table by calling the */ /* small iffts from further down in the twiddle factor array. In */ /* the same way as the decomposition works for more data reuse. */ /* */ /* Thus, the above decomposition can be summarized for a general N */ /* radix "rad" as follows: */ /* DSPF_sp_ifftSPxSP(N,&x[0],&w[0],y,brev,N/4,0,N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[0],&w[2*3*N/4],y,brev,rad,0,N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[2*N/4], &w[2*3*N/4],y,brev,rad,N/4, N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[2*N/2], &w[2*3*N/4],y,brev,rad,N/2, N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[2*3*N/4],&w[2*3*N/4],y,brev,rad,3*N/4,N)*/ /* */ /* As discussed previously, N can be either a power of 4 or 2. */ /* If N is a power of 4, then rad = 4, and if N is a power of 2 */ /* and not a power of 4, then rad = 2. "rad" is used to control */ /* how many stages of decomposition are performed. It is also */ /* used to determine whether a radix-4 or radix-2 decomposition */ /* should be performed at the last stage. Hence when "rad" is set */ /* to "N/4" the first stage of the transform alone is performed */ /* and the code exits. To complete the FFT, four other calls are */ /* required to perform N/4 size FFTs.In fact, the ordering of */ /* these 4 FFTs amongst themselves does not matter and hence from */ /* a cache perspective, it helps to go through the remaining 4 */ /* FFTs in exactly the opposite order to the first. This is */ /* illustrated as follows: */ /* */ /* DSPF_sp_ifftSPxSP(N,&x[0],&w[0],y,brev,N/4,0,N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[3*N/2],&w[3*N/2],y,brev,rad,3*N/4,N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[N],&w[3*N/2],y,brev,rad,N/2,N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[N/2],&w[3*N/2],y,brev,rad,N/4,N) */ /* DSPF_sp_ifftSPxSP(N/4,&x[0],&w[3*N/2],y,brev,rad,0,N) */ /* In addition this function can be used to minimize call */ /* overhead, by completing the FFT with one function call */ /* invocation as shown below: */ /* DSPF_sp_ifftSPxSP(N,&x[0],&w[0],y,brev,rad,0,N) */ /* */ /* ------------------------------------------------------------------------ */ /* Copyright (c) 2003 Texas Instruments, Incorporated. */ /* All Rights Reserved. */ /* ======================================================================== */ #ifndef DSPF_SP_IFFTSPXSP_ #define DSPF_SP_IFFTSPXSP_ 1 void DSPF_sp_ifftSPxSP( int N, float * x, float * w, float * y, unsigned char * brev, int n_min, int offset, int n_max ); #endif /* ======================================================================== */ /* End of file: dspf_sp_ifftSPxSP.h */ /* ------------------------------------------------------------------------ */ /* Copyright (C) 2003 Texas Instruments, Incorporated. */ /* All Rights Reserved. */ /* ======================================================================== */