/* ======================================================================== */
/*                                                                          */
/*   TEXAS INSTRUMENTS, INC.                                                */
/*                                                                          */
/*   NAME                                                                   */
/*         DSPF_sp_fftSPxSP - Floating point FFT with complex input         */
/*                                                                          */
/*     USAGE                                                                */
/*          This routine is C-callable and can be called as:                */
/*                                                                          */
/*          void DSPF_sp_fftSPxSP(                                          */
/*              int N, float * ptr_x, float * ptr_w, float * ptr_y,         */
/*              unsigned char * brev, int n_min, int offset, int n_max);    */
/*                                                                          */
/*          N = length of fft in complex samples, power of 2 such that      */
/*          N>=8 and N <= 16384.                                            */
/*          ptr_x = pointer to complex data input                           */
/*          ptr_w = pointer to complex twiddle factor (see below)           */
/*          ptr_y = pointer to complex output data                          */
/*          brev = pointer to bit reverse table containing 64 entries       */
/*          n_min = smallest fft butterfly used in computation              */
/*                  used for decomposing fft into subffts, see notes        */
/*          offset = index in complex samples of sub-fft from start of      */
/*                   main fft                                               */
/*          n_max = size of main fft in complex samples                     */
/*                                                                          */
/*    DESCRIPTION                                                           */
/*         The benchmark performs a mixed radix forwards fft using          */
/*         a special sequence of coefficients generated in the following    */
/*         way:                                                             */
/*                                                                          */
/*           // generate vector of twiddle factors for optimized            */
/*          algorithm //                                                    */
/*          void tw_gen(float * w, int N)                                   */
/*          {                                                               */
/*            int j, k;                                                     */
/*            double x_t, y_t, theta1, theta2, theta3;                      */
/*            const double PI = 3.141592654;                                */
/*                                                                          */
/*            for (j=1, k=0; j <= N>>2; j = j<<2)                           */
/*            {                                                             */
/*                for (i=0; i < N>>2; i+=j)                                 */
/*                {                                                         */
/*                    theta1 = 2*PI*i/N;                                    */
/*                    x_t = cos(theta1);                                    */
/*                    y_t = sin(theta1);                                    */
/*                    w[k]   =  (float)x_t;                                 */
/*                    w[k+1] =  (float)y_t;                                 */
/*                                                                          */
/*                    theta2 = 4*PI*i/N;                                    */
/*                    x_t = cos(theta2);                                    */
/*                    y_t = sin(theta2);                                    */
/*                    w[k+2] =  (float)x_t;                                 */
/*                    w[k+3] =  (float)y_t;                                 */
/*                                                                          */
/*                    theta3 = 6*PI*i/N;                                    */
/*                    x_t = cos(theta3);                                    */
/*                    y_t = sin(theta3);                                    */
/*                    w[k+4] =  (float)x_t;                                 */
/*                    w[k+5] =  (float)y_t;                                 */
/*                    k+=6;                                                 */
/*                }                                                         */
/*            }                                                             */
/*          }                                                               */
/*        This redundant set of twiddle factors is size 2*N float samples.  */
/*        The function is accurate to about 130dB of signal to noise ratio  */
/*        to the DFT function below:                                        */
/*                                                                          */
/*          void dft(int N, float x[], float y[])                           */
/*          {                                                               */
/*             int k,i, index;                                              */
/*             const float PI = 3.14159654;                                 */
/*             float * p_x;                                                 */
/*             float arg, fx_0, fx_1, fy_0, fy_1, co, si;                   */
/*                                                                          */
/*             for(k = 0; k<N; k++)                                         */
/*             {                                                            */
/*               p_x = x;                                                   */
/*               fy_0 = 0;                                                  */
/*               fy_1 = 0;                                                  */
/*               for(i=0; i<N; i++)                                         */
/*               {                                                          */
/*                 fx_0 = p_x[0];                                           */
/*                 fx_1 = p_x[1];                                           */
/*                 p_x += 2;                                                */
/*                 index = (i*k) % N;                                       */
/*                 arg = 2*PI*index/N;                                      */
/*                 co = cos(arg);                                           */
/*                 si = -sin(arg);                                          */
/*                 fy_0 += ((fx_0 * co) - (fx_1 * si));                     */
/*                 fy_1 += ((fx_1 * co) + (fx_0 * si));                     */
/*               }                                                          */
/*               y[2*k] = fy_0;                                             */
/*               y[2*k+1] = fy_1;                                           */
/*             }                                                            */
/*          }                                                               */
/*                                                                          */
/*         The function takes the table and input data and calculates       */
/*         the fft producing the frequency domain data in the Y array.      */
/*         As the fft allows every input point to effect every output       */
/*         point in a cache based system such as the c6711, this causes     */
/*         cache thrashing. This is mitigated by allowing the main fft      */
/*         of size N to be divided into several steps, allowing as much     */
/*         data reuse as possible.                                          */
/*                                                                          */
/*         For example the following function:                              */
/*                                                                          */
/*         DSPF_sp_fftSPxSP(1024, &x[0],&w[0],y,brev,4,  0,1024);           */
/*                                                                          */
/*         is equvalent to:                                                 */
/*                                                                          */
/*         DSPF_sp_fftSPxSP(1024,&x[2*0],  &w[0] ,   y,brev,256,  0,1024)   */
/*         DSPF_sp_fftSPxSP(256, &x[2*0],  &w[2*768],y,brev,4,    0,1024)   */
/*         DSPF_sp_fftSPxSP(256, &x[2*256],&w[2*768],y,brev,4,  256,1024)   */
/*         DSPF_sp_fftSPxSP(256, &x[2*512],&w[2*768],y,brev,4,  512,1024)   */
/*         DSPF_sp_fftSPxSP(256, &x[2*768],&w[2*768],y,brev,4,  768,1024)   */
/*                                                                          */
/*         Notice how the 1st fft function is called on the entire 1K       */
/*         data set it covers the 1st pass of the fft until the butterfly   */
/*         size is 256. The following 4 ffts do 256 pt ffts 25% of the      */
/*         size. These continue down to the end when the buttly is of size  */
/*         4. They use an index to the main twiddle factor array of         */
/*         0.75*2*N.This is because the twiddle factor array is composed    */
/*         of successively decimated versions of the main array.            */
/*                                                                          */
/*         N not equal to a power of 4 can be used, i.e. 512. In this case  */
/*         to decompose the fft the following would be needed :             */
/*                                                                          */
/*         DSPF_sp_fftSPxSP(512, &x[0],&w[0],y,brev,2,  0,512);             */
/*                                                                          */
/*         is equvalent to:                                                 */
/*                                                                          */
/*         DSPF_sp_fftSPxSP(512, &x[2*0],  &w[0] ,   y,brev,128,  0,512)    */
/*         DSPF_sp_fftSPxSP(128, &x[2*0],  &w[2*384],y,brev,4,    0,512)    */
/*         DSPF_sp_fftSPxSP(128, &x[2*128],&w[2*384],y,brev,4,  128,512)    */
/*         DSPF_sp_fftSPxSP(128, &x[2*256],&w[2*384],y,brev,4,  256,512)    */
/*         DSPF_sp_fftSPxSP(128, &x[2*384],&w[2*384],y,brev,4,  384,512)    */
/*                                                                          */
/*         The twiddle factor array is composed of log4(N) sets of twiddle  */
/*         factors, (3/4)*N, (3/16)*N, (3/64)*N, etc.  The index into this  */
/*         array for each stage of the fft is calculated by summing these   */
/*         indices up appropriately.                                        */
/*         For multiple ffts they can share the same table by calling the   */
/*         small ffts from further down in the twiddle factor array. In     */
/*         the same way as the decomposition works for more data reuse.     */
/*                                                                          */
/*         Thus, the above decomposition can be summarized for a general    */
/*         N, radix "rad" as follows:                                       */
/*         DSPF_sp_fftSPxSP(N,  &x[0],      &w[0],      y,brev,N/4,0,   N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[0],      &w[2*3*N/4],y,brev,rad,0,   N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[2*N/4],  &w[2*3*N/4],y,brev,rad,N/4, N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[2*N/2],  &w[2*3*N/4],y,brev,rad,N/2, N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[2*3*N/4],&w[2*3*N/4],y,brev,rad,3*N/4,N) */
/*                                                                          */
/*         As discussed previously, N can be either a power of 4 or 2.      */
/*         If N is a power of 4, then rad = 4, and if N is a power of 2     */
/*         and not a power of 4, then rad = 2. "rad" is used to control     */
/*         how many stages of decomposition are performed. It is also       */
/*         used to determine whether a radix-4 or radix-2 decomposition     */
/*         should be performed at the last stage. Hence when "rad" is set   */
/*         to "N/4" the first stage of the transform alone is performed     */
/*         and the code exits. To complete the FFT, four other calls are    */
/*         required to perform N/4 size FFTs.In fact, the ordering of       */
/*         these 4 FFTs amongst themselves does not matter and hence from   */
/*         a cache perspective, it helps to go through the remaining 4      */
/*         FFTs in exactly the opposite order to the first. This is         */
/*         illustrated as follows:                                          */
/*                                                                          */
/*         DSPF_sp_fftSPxSP(N,  &x[0],      &w[0],      y,brev,N/4,0,   N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[2*3*N/4],&w[2*3*N/4],y,brev,rad,3*N/4,N) */
/*         DSPF_sp_fftSPxSP(N/4,&x[2*N/2],  &w[2*3*N/4],y,brev,rad,N/2, N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[2*N/4],  &w[2*3*N/4],y,brev,rad,N/4, N)  */
/*         DSPF_sp_fftSPxSP(N/4,&x[0],      &w[2*3*N/4],y,brev,rad,0,   N)  */
/*                                                                          */
/*         In addition this function can be used to minimize call           */
/*         overhead, by completing the FFT with one function call           */
/*         invocation as shown below:                                       */
/*         DSPF_sp_fftSPxSP(N,  &x[0],      &w[0],      y, brev, rad, 0,N)  */
/*                                                                          */
/*                                                                          */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */

#ifndef DSPF_SP_FFTSPXSP
#define DSPF_SP_FFTSPXSP  1

void DSPF_sp_fftSPxSP(int N, float* ptr_x, float* ptr_w, float* ptr_y, 
                      unsigned char* brev, int n_min, int offset, int n_max);

#endif
/* ======================================================================== */
/*  End of file:  dspf_sp_fftSPxSP.h                                        */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */