/* ======================================================================== */
/*  TEXAS INSTRUMENTS, INC.                                                 */
/*                                                                          */
/*  NAME                                                                    */
/*      DSPF_sp_ifftSPxSP -- Single Precision floating point mixed radix    */
/*      inverse FFT with complex input                                      */
/*                                                                          */
/*  USAGE                                                                   */
/*          This routine is C-callable and can be called as:                */
/*                                                                          */
/*          void DSPF_sp_ifftSPxSP(                                         */
/*              int n, float * ptr_x, float * ptr_w, float * ptr_y,         */
/*              unsigned char * brev, int n_min, int offset, int n_max);    */
/*                                                                          */
/*      n   : Length of ifft in complex samples, power of 2 such that       */
/*             n >=8 and n <= 16384.                                        */
/*    ptr_x : Pointer to complex data input (normal order)                  */
/*    ptr_w : Pointer to complex twiddle factor (see below)                 */
/*    ptr_y : Pointer to complex output data (normal order)                 */
/*    brev  : Pointer to bit reverse table containing 64 entries            */
/*    n_min : Smallest ifft butterfly used in computation                   */
/*            used for decomposing ifft into subiffts, see notes            */
/*    offset: Index in complex samples of sub-ifft from start of            */
/*                   main ifft                                              */
/*    n_max : Size of main ifft in complex samples                          */
/*                                                                          */
/*  DESCRIPTION                                                             */
/*                                                                          */
/*         The benchmark performs a mixed radix forwards ifft using         */
/*         a special sequence of coefficients generated in the following    */
/*         way:                                                             */
/*                                                                          */
/*         //generate vector of twiddle factors for optimized algorithm//   */
/*          void tw_gen(float * w, int N)                                   */
/*          {                                                               */
/*            int j, k;                                                     */
/*            double x_t, y_t, theta1, theta2, theta3;                      */
/*            const double PI = 3.141592654;                                */
/*                                                                          */
/*            for (j=1, k=0; j <= N>>2; j = j<<2)                           */
/*            {                                                             */
/*                for (i=0; i < N>>2; i+=j)                                 */
/*                {                                                         */
/*                    theta1 = 2*PI*i/N;                                    */
/*                    x_t = cos(theta1);                                    */
/*                    y_t = sin(theta1);                                    */
/*                    w[k]   =  (float)x_t;                                 */
/*                    w[k+1] =  (float)y_t;                                 */
/*                                                                          */
/*                    theta2 = 4*PI*i/N;                                    */
/*                    x_t = cos(theta2);                                    */
/*                    y_t = sin(theta2);                                    */
/*                    w[k+2] =  (float)x_t;                                 */
/*                    w[k+3] =  (float)y_t;                                 */
/*                                                                          */
/*                    theta3 = 6*PI*i/N;                                    */
/*                    x_t = cos(theta3);                                    */
/*                    y_t = sin(theta3);                                    */
/*                    w[k+4] =  (float)x_t;                                 */
/*                    w[k+5] =  (float)y_t;                                 */
/*                    k+=6;                                                 */
/*                }                                                         */
/*            }                                                             */
/*          }                                                               */
/*        This redundant set of twiddle factors is size 2*N float samples.  */
/*        The function is accurate to about 130dB of signal to noise ratio  */
/*        to the IDFT function below:                                       */
/*                                                                          */
/*          void idft(int n, float x[], float y[])                          */
/*          {                                                               */
/*            int k,i, index;                                               */
/*            const float PI = 3.14159654;                                  */
/*            float * p_x;                                                  */
/*            float arg, fx_0, fx_1, fy_0, fy_1, co, si;                    */
/*                                                                          */
/*            for(k = 0; k<n; k++)                                          */
/*            {                                                             */
/*              p_x = x;                                                    */
/*              fy_0 = 0;                                                   */
/*              fy_1 = 0;                                                   */
/*              for(i=0; i<n; i++)                                          */
/*              {                                                           */
/*                fx_0 = p_x[0];                                            */
/*                fx_1 = p_x[1];                                            */
/*                p_x += 2;                                                 */
/*                index = (i*k) % n;                                        */
/*                arg = 2*PI*index/n;                                       */
/*                co = cos(arg);                                            */
/*                si = sin(arg);                                            */
/*                fy_0 += ((fx_0 * co) - (fx_1 * si));                      */
/*                fy_1 += ((fx_1 * co) + (fx_0 * si));                      */
/*              }                                                           */
/*              y[2*k] = fy_0/n;                                            */
/*              y[2*k+1] = fy_1/n;                                          */
/*            }                                                             */
/*         }                                                                */
/*                                                                          */
/*         The function takes the table and input data and calculates the   */
/*         ifft producing the frequency domain data in the Y array. the     */
/*         output is scaled by a scaling factor of 1/N.                     */
/*                                                                          */
/*         As the ifft allows every input point to effect every output      */
/*         point in a cache based system such as the c6711, this causes     */
/*         cache thrashing. This is mitigated by allowing the main ifft     */
/*         of size N to be divided into several steps, allowing as much     */
/*         data reuse as possible.                                          */
/*                                                                          */
/*         For example the following function:                              */
/*                                                                          */
/*         DSPF_sp_ifftSPxSP(1024, &x[0],&w[0],y,brev,4,  0,1024)           */
/*                                                                          */
/*         is equvalent to:                                                 */
/*                                                                          */
/*         DSPF_sp_ifftSPxSP(1024,&x[2*0],&w[0],y,brev,256,0,1024)          */
/*         DSPF_sp_ifftSPxSP(256,&x[2*0],&w[2*768],y,brev,4,0,1024)         */
/*         DSPF_sp_ifftSPxSP(256,&x[2*256],&w[2*768],y,brev,4,256,1024)     */
/*         DSPF_sp_ifftSPxSP(256,&x[2*512],&w[2*768],y,brev,4,512,1024)     */
/*         DSPF_sp_ifftSPxSP(256,&x[2*768],&w[2*768],y,brev,4,768,1024)     */
/*                                                                          */
/*         Notice how the 1st ifft function is called on the entire 1K      */
/*         data set it covers the 1st pass of the ifft until the butterfly  */
/*         size is 256. The following 4 iffts do 256 pt iffts 25% of the    */
/*         size. These continue down to the end when the buttefly is of     */
/*         size 4. They use an index to the main twiddle factor array of    */
/*         0.75*2*N. This is because the twiddle factor array is composed   */
/*         of successively decimated versions of the main array.            */
/*                                                                          */
/*         N not equal to a power of 4 can be used, i.e. 512. In this case  */
/*         to decompose the ifft the following would be needed :            */
/*                                                                          */
/*         DSPF_sp_ifftSPxSP(512, &x[0],&w[0],y,brev,2,  0,512)             */
/*                                                                          */
/*         is equvalent to:                                                 */
/*                                                                          */
/*         DSPF_sp_ifftSPxSP(512, &x[2*0],  &w[0] ,   y,brev,128,  0,512)   */
/*         DSPF_sp_ifftSPxSP(128, &x[2*0],  &w[2*384],y,brev,4,    0,512)   */
/*         DSPF_sp_ifftSPxSP(128, &x[2*128],&w[2*384],y,brev,4,  128,512)   */
/*         DSPF_sp_ifftSPxSP(128, &x[2*256],&w[2*384],y,brev,4,  256,512)   */
/*         DSPF_sp_ifftSPxSP(128, &x[2*384],&w[2*384],y,brev,4,  384,512)   */
/*                                                                          */
/*         The twiddle factor array is composed of log4(N) sets of twiddle  */
/*         factors, (3/4)*N, (3/16)*N, (3/64)*N, etc.  The index into this  */
/*         array for each stage of the ifft is calculated by summing these  */
/*         indices up appropriately.                                        */
/*         For multiple iffts they can share the same table by calling the  */
/*         small iffts from further down in the twiddle factor array. In    */
/*         the same way as the decomposition works for more data reuse.     */
/*                                                                          */
/*         Thus, the above decomposition can be summarized for a general N  */
/*         radix "rad" as follows:                                          */
/*         DSPF_sp_ifftSPxSP(N,&x[0],&w[0],y,brev,N/4,0,N)                  */
/*         DSPF_sp_ifftSPxSP(N/4,&x[0],&w[2*3*N/4],y,brev,rad,0,N)          */
/*         DSPF_sp_ifftSPxSP(N/4,&x[2*N/4],  &w[2*3*N/4],y,brev,rad,N/4, N) */
/*         DSPF_sp_ifftSPxSP(N/4,&x[2*N/2],  &w[2*3*N/4],y,brev,rad,N/2, N) */
/*         DSPF_sp_ifftSPxSP(N/4,&x[2*3*N/4],&w[2*3*N/4],y,brev,rad,3*N/4,N)*/
/*                                                                          */
/*         As discussed previously, N can be either a power of 4 or 2.      */
/*         If N is a power of 4, then rad = 4, and if N is a power of 2     */
/*         and not a power of 4, then rad = 2. "rad" is used to control     */
/*         how many stages of decomposition are performed. It is also       */
/*         used to determine whether a radix-4 or radix-2 decomposition     */
/*         should be performed at the last stage. Hence when "rad" is set   */
/*         to "N/4" the first stage of the transform alone is performed     */
/*         and the code exits. To complete the FFT, four other calls are    */
/*         required to perform N/4 size FFTs.In fact, the ordering of       */
/*         these 4 FFTs amongst themselves does not matter and hence from   */
/*         a cache perspective, it helps to go through the remaining 4      */
/*         FFTs in exactly the opposite order to the first. This is         */
/*         illustrated as follows:                                          */
/*                                                                          */
/*         DSPF_sp_ifftSPxSP(N,&x[0],&w[0],y,brev,N/4,0,N)                  */
/*         DSPF_sp_ifftSPxSP(N/4,&x[3*N/2],&w[3*N/2],y,brev,rad,3*N/4,N)    */
/*         DSPF_sp_ifftSPxSP(N/4,&x[N],&w[3*N/2],y,brev,rad,N/2,N)          */
/*         DSPF_sp_ifftSPxSP(N/4,&x[N/2],&w[3*N/2],y,brev,rad,N/4,N)        */
/*         DSPF_sp_ifftSPxSP(N/4,&x[0],&w[3*N/2],y,brev,rad,0,N)            */
/*         In addition this function can be used to minimize call           */
/*     overhead, by completing the FFT with one function call               */
/*     invocation as shown below:                                           */
/*         DSPF_sp_ifftSPxSP(N,&x[0],&w[0],y,brev,rad,0,N)                  */
/*                                                                          */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */
#ifndef DSPF_SP_IFFTSPXSP_
#define DSPF_SP_IFFTSPXSP_ 1

void DSPF_sp_ifftSPxSP(
                       int N,
                       float * x,
                       float * w,
                       float * y,
                       unsigned char * brev,
                       int n_min,
                       int offset,
                       int n_max
                       );

#endif
/* ======================================================================== */
/*  End of file: dspf_sp_ifftSPxSP.h                                        */
/* ------------------------------------------------------------------------ */
/*          Copyright (C) 2003 Texas Instruments, Incorporated.             */
/*                          All Rights Reserved.                            */
/* ======================================================================== */