/* ======================================================================== */ /* */ /* TEXAS INSTRUMENTS, INC. */ /* */ /* NAME */ /* DSPF_sp_fftSPxSP - Floating point FFT with complex input */ /* */ /* USAGE */ /* This routine is C-callable and can be called as: */ /* */ /* void DSPF_sp_fftSPxSP( */ /* int N, float * ptr_x, float * ptr_w, float * ptr_y, */ /* unsigned char * brev, int n_min, int offset, int n_max); */ /* */ /* N = length of fft in complex samples, power of 2 such that */ /* N>=8 and N <= 16384. */ /* ptr_x = pointer to complex data input */ /* ptr_w = pointer to complex twiddle factor (see below) */ /* ptr_y = pointer to complex output data */ /* brev = pointer to bit reverse table containing 64 entries */ /* n_min = smallest fft butterfly used in computation */ /* used for decomposing fft into subffts, see notes */ /* offset = index in complex samples of sub-fft from start of */ /* main fft */ /* n_max = size of main fft in complex samples */ /* */ /* DESCRIPTION */ /* The benchmark performs a mixed radix forwards fft using */ /* a special sequence of coefficients generated in the following */ /* way: */ /* */ /* // generate vector of twiddle factors for optimized */ /* algorithm // */ /* void tw_gen(float * w, int N) */ /* { */ /* int j, k; */ /* double x_t, y_t, theta1, theta2, theta3; */ /* const double PI = 3.141592654; */ /* */ /* for (j=1, k=0; j <= N>>2; j = j<<2) */ /* { */ /* for (i=0; i < N>>2; i+=j) */ /* { */ /* theta1 = 2*PI*i/N; */ /* x_t = cos(theta1); */ /* y_t = sin(theta1); */ /* w[k] = (float)x_t; */ /* w[k+1] = (float)y_t; */ /* */ /* theta2 = 4*PI*i/N; */ /* x_t = cos(theta2); */ /* y_t = sin(theta2); */ /* w[k+2] = (float)x_t; */ /* w[k+3] = (float)y_t; */ /* */ /* theta3 = 6*PI*i/N; */ /* x_t = cos(theta3); */ /* y_t = sin(theta3); */ /* w[k+4] = (float)x_t; */ /* w[k+5] = (float)y_t; */ /* k+=6; */ /* } */ /* } */ /* } */ /* This redundant set of twiddle factors is size 2*N float samples. */ /* The function is accurate to about 130dB of signal to noise ratio */ /* to the DFT function below: */ /* */ /* void dft(int N, float x[], float y[]) */ /* { */ /* int k,i, index; */ /* const float PI = 3.14159654; */ /* float * p_x; */ /* float arg, fx_0, fx_1, fy_0, fy_1, co, si; */ /* */ /* for(k = 0; k<N; k++) */ /* { */ /* p_x = x; */ /* fy_0 = 0; */ /* fy_1 = 0; */ /* for(i=0; i<N; i++) */ /* { */ /* fx_0 = p_x[0]; */ /* fx_1 = p_x[1]; */ /* p_x += 2; */ /* index = (i*k) % N; */ /* arg = 2*PI*index/N; */ /* co = cos(arg); */ /* si = -sin(arg); */ /* fy_0 += ((fx_0 * co) - (fx_1 * si)); */ /* fy_1 += ((fx_1 * co) + (fx_0 * si)); */ /* } */ /* y[2*k] = fy_0; */ /* y[2*k+1] = fy_1; */ /* } */ /* } */ /* */ /* The function takes the table and input data and calculates */ /* the fft producing the frequency domain data in the Y array. */ /* As the fft allows every input point to effect every output */ /* point in a cache based system such as the c6711, this causes */ /* cache thrashing. This is mitigated by allowing the main fft */ /* of size N to be divided into several steps, allowing as much */ /* data reuse as possible. */ /* */ /* For example the following function: */ /* */ /* DSPF_sp_fftSPxSP(1024, &x[0],&w[0],y,brev,4, 0,1024); */ /* */ /* is equvalent to: */ /* */ /* DSPF_sp_fftSPxSP(1024,&x[2*0], &w[0] , y,brev,256, 0,1024) */ /* DSPF_sp_fftSPxSP(256, &x[2*0], &w[2*768],y,brev,4, 0,1024) */ /* DSPF_sp_fftSPxSP(256, &x[2*256],&w[2*768],y,brev,4, 256,1024) */ /* DSPF_sp_fftSPxSP(256, &x[2*512],&w[2*768],y,brev,4, 512,1024) */ /* DSPF_sp_fftSPxSP(256, &x[2*768],&w[2*768],y,brev,4, 768,1024) */ /* */ /* Notice how the 1st fft function is called on the entire 1K */ /* data set it covers the 1st pass of the fft until the butterfly */ /* size is 256. The following 4 ffts do 256 pt ffts 25% of the */ /* size. These continue down to the end when the buttly is of size */ /* 4. They use an index to the main twiddle factor array of */ /* 0.75*2*N.This is because the twiddle factor array is composed */ /* of successively decimated versions of the main array. */ /* */ /* N not equal to a power of 4 can be used, i.e. 512. In this case */ /* to decompose the fft the following would be needed : */ /* */ /* DSPF_sp_fftSPxSP(512, &x[0],&w[0],y,brev,2, 0,512); */ /* */ /* is equvalent to: */ /* */ /* DSPF_sp_fftSPxSP(512, &x[2*0], &w[0] , y,brev,128, 0,512) */ /* DSPF_sp_fftSPxSP(128, &x[2*0], &w[2*384],y,brev,4, 0,512) */ /* DSPF_sp_fftSPxSP(128, &x[2*128],&w[2*384],y,brev,4, 128,512) */ /* DSPF_sp_fftSPxSP(128, &x[2*256],&w[2*384],y,brev,4, 256,512) */ /* DSPF_sp_fftSPxSP(128, &x[2*384],&w[2*384],y,brev,4, 384,512) */ /* */ /* The twiddle factor array is composed of log4(N) sets of twiddle */ /* factors, (3/4)*N, (3/16)*N, (3/64)*N, etc. The index into this */ /* array for each stage of the fft is calculated by summing these */ /* indices up appropriately. */ /* For multiple ffts they can share the same table by calling the */ /* small ffts from further down in the twiddle factor array. In */ /* the same way as the decomposition works for more data reuse. */ /* */ /* Thus, the above decomposition can be summarized for a general */ /* N, radix "rad" as follows: */ /* DSPF_sp_fftSPxSP(N, &x[0], &w[0], y,brev,N/4,0, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[0], &w[2*3*N/4],y,brev,rad,0, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[2*N/4], &w[2*3*N/4],y,brev,rad,N/4, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[2*N/2], &w[2*3*N/4],y,brev,rad,N/2, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[2*3*N/4],&w[2*3*N/4],y,brev,rad,3*N/4,N) */ /* */ /* As discussed previously, N can be either a power of 4 or 2. */ /* If N is a power of 4, then rad = 4, and if N is a power of 2 */ /* and not a power of 4, then rad = 2. "rad" is used to control */ /* how many stages of decomposition are performed. It is also */ /* used to determine whether a radix-4 or radix-2 decomposition */ /* should be performed at the last stage. Hence when "rad" is set */ /* to "N/4" the first stage of the transform alone is performed */ /* and the code exits. To complete the FFT, four other calls are */ /* required to perform N/4 size FFTs.In fact, the ordering of */ /* these 4 FFTs amongst themselves does not matter and hence from */ /* a cache perspective, it helps to go through the remaining 4 */ /* FFTs in exactly the opposite order to the first. This is */ /* illustrated as follows: */ /* */ /* DSPF_sp_fftSPxSP(N, &x[0], &w[0], y,brev,N/4,0, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[2*3*N/4],&w[2*3*N/4],y,brev,rad,3*N/4,N) */ /* DSPF_sp_fftSPxSP(N/4,&x[2*N/2], &w[2*3*N/4],y,brev,rad,N/2, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[2*N/4], &w[2*3*N/4],y,brev,rad,N/4, N) */ /* DSPF_sp_fftSPxSP(N/4,&x[0], &w[2*3*N/4],y,brev,rad,0, N) */ /* */ /* In addition this function can be used to minimize call */ /* overhead, by completing the FFT with one function call */ /* invocation as shown below: */ /* DSPF_sp_fftSPxSP(N, &x[0], &w[0], y, brev, rad, 0,N) */ /* */ /* */ /* ------------------------------------------------------------------------ */ /* Copyright (c) 2003 Texas Instruments, Incorporated. */ /* All Rights Reserved. */ /* ======================================================================== */ #ifndef DSPF_SP_FFTSPXSP #define DSPF_SP_FFTSPXSP 1 void DSPF_sp_fftSPxSP(int N, float* ptr_x, float* ptr_w, float* ptr_y, unsigned char* brev, int n_min, int offset, int n_max); #endif /* ======================================================================== */ /* End of file: dspf_sp_fftSPxSP.h */ /* ------------------------------------------------------------------------ */ /* Copyright (c) 2003 Texas Instruments, Incorporated. */ /* All Rights Reserved. */ /* ======================================================================== */