//FastConvo.c FIR filter implemented using overlap-add fast convolution

#include <math.h>
#include "coeffs.h"               //time domain FIR coefficients
#define PI 3.14159265358979
#define PTS 256                   //number of points for FFT
#define SQRT_PTS 16               //used in twiddle factor calc.
#define RADIX 2			    //passed to TI FFT routines
#define DELTA (2*PI)/PTS          
typedef struct Complex_tag {float real, imag;} COMPLEX ;
#pragma DATA_ALIGN(W, sizeof(COMPLEX))
#pragma DATA_ALIGN(samples, sizeof(COMPLEX))
#pragma DATA_ALIGN(h, sizeof(COMPLEX))
COMPLEX W[PTS/RADIX] ;		    //twiddle factor array 
COMPLEX samples[PTS];             //processing buffer
COMPLEX h[PTS];                   //FIR filter coefficients
short buffercount = 0;            //buffer count for iobuffer samples
float iobuffer[PTS/2];            //primary input/output buffer
float overlap[PTS/2];             //intermediate result buffer
short i;                          //index variable 
short flag = 0;                   //set to indicate iobuffer full
float a, b;                       //variables used in complex multiply
short NUMCOEFFS = sizeof(coeffs)/sizeof(float);
short iTwid[SQRT_PTS] ;		    //PTS/2 + 1 > sqrt(PTS)

interrupt void c_int11(void)      //ISR
{
 output_sample((int)(iobuffer[buffercount]));
 iobuffer[buffercount++] = (float)(input_sample());
 if (buffercount >= PTS/2)        //for overlap-add method iobuffer
  {                               //is half size of FFT used
   buffercount = 0;
   flag = 1;
  }
}
     
main()
{                                 //set up array of twiddle factors
 digitrev_index(iTwid, PTS/RADIX, RADIX);	
 for(i = 0 ; i < PTS/RADIX ; i++)
 {
   W[i].real = cos(DELTA*i);
   W[i].imag = sin(DELTA*i);
 }
 bitrev(W, iTwid, PTS/RADIX);	    //bit reverse order W

 for (i = 0 ; i<PTS ; i++)        //initialise PTS element 
  {                               //of COMPLEX to hold real-valued
    h[i].real = 0.0;              //time domain FIR filter coefficients
    h[i].imag = 0.0;
  }                               
  for (i = 0 ; i < NUMCOEFFS ; i++)
  {                               //read FIR filter coeffs
    h[i].real = coeffs[i];        //NUMCOEFFS should be less than PTS/2
  }
  cfftr2_dit(h,W,PTS);            //transform filter coeffs 
  comm_intr();                    //initialise DSK, codec, McBSP

  while(1)                        //frame processing infinite loop
   {
     while (flag == 0);           //wait for iobuffer full
       flag = 0;
     for (i = 0 ; i<PTS/2 ; i++)  //iobuffer into first half of
     {                            //samples buffer  
      samples[i].real = iobuffer[i];
      iobuffer[i] = overlap[i];   //previously processed output
     }                            //to iobuffer
    for (i = 0 ; i<PTS/2 ; i++)       
    {                             //second half of samples to overlap
      overlap[i] = samples[i+PTS/2].real;
      samples[i+PTS/2].real = 0.0;//zero-pad input from iobuffer
    }                              
    for (i=0 ; i<PTS ; i++)         
      samples[i].imag = 0.0;      //init imag parts in samples buffer
    
    cfftr2_dit(samples,W,PTS);    //complex FFT function from TI
    
    for (i=0 ; i<PTS ; i++)       //frequency-domain representation
    {                             //complex multiply samples by h
      a = samples[i].real;
      b = samples[i].imag;
      samples[i].real = h[i].real*a - h[i].imag*b;
      samples[i].imag = h[i].real*b + h[i].imag*a;
    }
    
    icfftr2_dif(samples,W,PTS);   //inverse FFT function from TI
    
    for (i=0 ; i<PTS ; i++) 
       samples[i].real /= PTS;
    for (i=0 ; i<PTS/2 ; i++)     //add first half of samples
       overlap[i] += samples[i].real; //to overlap   
  }                               //end of while(1)
}                                 //end of main()