#include <math.h>
#include <assert.h>

#include "fxtaux.h"
#include "fxtcompl.h"

#define RX 4
#define LX 2

#define USE_INITIAL_RAD8 // recommended

// for debug:
//#include <iostream.h>
#define PR(x)   


void 
dit4c_fft(double *fr, double *fi, int ldn, int is)
//
// slightly optimized version of complex radix 4 fft
//
{
    int n,m,m2,mr;
    int j,r;
    int i0,i1,i2,i3;
    double ph0,c,s;
    complex a0,a1,a2,a3;
    complex e,e2,e3;
    complex x,y,u,v;

    n=(1<<ldn);

    PR( cout<<"\n (cmpl-ver) dit4_fft(): "<<flush; )

    scramble(fr,fi,n);

    if(n<=2)  // data length is 2
    {
     
        if(n==2)
        {
            double t;
            t=fr[0]; fr[0]+=fr[1]; fr[1]=t-fr[1];
            t=fi[0]; fi[0]+=fi[1]; fi[1]=t-fi[1];
        }

        return;
    }

    m=(ldn&1);

    if( m!=0 )  // n is not a power of 4, need a radix 2 step
    {
#define i0 r

#if defined USE_INITIAL_RAD8

        PR( cout<<"\n   initial 8:  "<< flush; )
        for(r=0; r<n; r+=8)
	{
	    PR( cout<<"\n           i0="<<i0<<"  (i1,...,i7) "<<flush; )
	    fft8cc(fr+r,fi+r,is);
	}
        m+=LX;

#else

	PR( cout<<"\n   initial 2:   "<< flush; )
        for(r=0,i1=1; r<n; r+=2, i1+=2)
	{
            double t;
	    PR( cout<<"\n           i0="<<i0<<"  i1="<<i1<<flush; )
            t=fr[i0]-fr[i1]; fr[i0]+=fr[i1]; fr[i1]=t;
            t=fi[i0]-fi[i1]; fi[i0]+=fi[i1]; fi[i1]=t;
	}
#endif  // defined USE_INITIAL_RAD8


#undef i0
    }

    m+=LX;

    if(m==LX)
    {
        PR( cout<<"\n m="<<m<< flush; )
        m2=(m<<1);
        mr=(m2>>LX);

#define i0 r 

	PR( cout<<"\n   initial 4:    "<< flush; )
        for(r=0; r<n; r+=m2)
        {
	    double xr,yr,ur,vr, xi,yi,ui,vi;
	    double t;
    
            i1=i0+mr;
            i2=i1+mr;
            i3=i2+mr;
	    PR( cout<<"\n           i0="<<i0<<"  i1="<<i1<<"  i2="<<i2<<"  i3="<<i3<<flush; )

            xr=((ur=fr[i0])+(t=fr[i1]));
            ur-=t;
            xi=((ui=fi[i0])+(t=fi[i1]));
            ui-=t;

            if(is<0)
            {
                yr=(t=fr[i2])+(vi=fr[i3]);
                vi-=t;

                yi=(vr=fi[i2])+(t=fi[i3]);
                vr-=t;
	    }
            else
            {
                yr=(vi=fr[i2])+(t=fr[i3]);
                vi-=t;

                yi=(t=fi[i2])+(vr=fi[i3]);
                vr-=t;
	    }

            fr[i3]=ur-vr;
            fr[i1]=ur+vr;

            fi[i3]=ui-vi;
            fi[i1]=ui+vi;

            fr[i0]=xr+yr;
            fr[i2]=xr-yr;

            fi[i0]=xi+yi;
            fi[i2]=xi-yi;
	}
#undef i0

        m+=LX;
    }



    for( ; m<=ldn ; m+=LX)
    {
        PR( cout<<"\n m="<<m<< flush; )

        m2=(1<<m);
        mr=(m2>>LX);
        ph0=(is>0?2.0*M_PI:-2.0*M_PI)/m2;

        for(j=0; j<mr; j++)
        {
	    PR( cout<<"\n    j="<<j<< flush; )

            sincos(&c,&s,j*ph0);

            e =complex(c,s);
            e2=e*e;
            e3=e2*e;

            for(r=0, i0=j+r; r<n; r+=m2, i0+=m2)
            {
                          a0=complex(fr[i0],fi[i0]);
                i1=i0+mr; a2=complex(fr[i1],fi[i1])*e2;
                i2=i1+mr; a1=complex(fr[i2],fi[i2])*e;
                i3=i2+mr; a3=complex(fr[i3],fi[i3])*e3;
                
		PR( cout<<"\n       r="<<r<< flush; )
		PR( cout<<"\n           i0="<<i0<<"  i1="<<i1<<"  i2="<<i2<<"  i3="<<i3<<flush; )

                x=(a0+a2);
                y=(a1+a3);

                u=(a0-a2);

                if(is<0)
                {
                    v=complex(a1.imag()-a3.imag(), a3.real()-a1.real());
                }
                else
                {
                    v=complex(a3.imag()-a1.imag(), a1.real()-a3.real());
		}
                
                a1=u+v; fr[i1]=a1.real(); fi[i1]=a1.imag();
                a3=u-v; fr[i3]=a3.real(); fi[i3]=a3.imag();
                a0=x+y; fr[i0]=a0.real(); fi[i0]=a0.imag();
                a2=x-y; fr[i2]=a2.real(); fi[i2]=a2.imag();
            }
        }
    }
}
// ============================== end COOLEY4C_FFT ==========================
