#include <math.h>
#include <assert.h>

#include "fxtaux.h"

#define RX 4
#define LX 2

#define USE_INITIAL_RAD8 // recommended

// for debug:
//#include <iostream.h>
#define PR(x)   


void 
dit4_fft(double *fr, double *fi, int ldn, int is)
//
// optimized version of radix 4 fft
//
{
    long n,m,m2,mr;
    long j,r;
    long i0,i1,i2,i3;
    double ph0,c,s;
    double c2,s2,c3,s3;

    PR( cout<<"\n (re-ver) dit4_fft(): "<<flush; )

    n=(1<<ldn);
    scramble(fr,fi,n);

    if(n<=2)  // data length is 2
    {
     
        if(n==2)
        {
            double t;
            t=fr[0]; fr[0]+=fr[1]; fr[1]=t-fr[1];
            t=fi[0]; fi[0]+=fi[1]; fi[1]=t-fi[1];
        }

        return;
    }

    m=(ldn&1);

    if( m!=0 )  // n is not a power of 4, need a radix 2 step
    {
#define i0 r

#if defined USE_INITIAL_RAD8

        PR( cout<<"\n   initial 8:  "<< flush; )
        for(r=0; r<n; r+=8)
	{
	     PR( cout<<"\n           i0="<<i0<<"  (i1,...,i7) "<<flush; )
             fft8cc(fr+r,fi+r,is);
	}
        m+=LX;

#else

	PR( cout<<"\n   initial 2:   "<< flush; )
        for(r=0,i1=1; r<n; r+=2, i1+=2)
	{
            double t;
	    PR( cout<<"\n           i0="<<i0<<"  i1="<<i1<<flush; )
            t=fr[i0]-fr[i1]; fr[i0]+=fr[i1]; fr[i1]=t;
            t=fi[i0]-fi[i1]; fi[i0]+=fi[i1]; fi[i1]=t;
	}
#endif  // defined USE_INITIAL_RAD8


#undef i0
    }

    m+=LX;

    if(m==LX)
    {
        PR( cout<<"\n m="<<m<< flush; )
        m2=(m<<1);
        mr=(m2>>LX);

#define i0 r 

	PR( cout<<"\n   initial 4:    "<< flush; )
        for(r=0; r<n; r+=m2)
        {
        double xr,yr,ur,vr, xi,yi,ui,vi;
        double t;
    
            i1=i0+mr;
            i2=i1+mr;
            i3=i2+mr;
PR( cout<<"\n           i0="<<i0<<"  i1="<<i1<<"  i2="<<i2<<"  i3="<<i3<<flush; )

            xr=((ur=fr[i0])+(t=fr[i1]));
            ur-=t;
            xi=((ui=fi[i0])+(t=fi[i1]));
            ui-=t;

            if(is<0)
            {
                yr=(t=fr[i2])+(vi=fr[i3]);
                vi-=t;

                yi=(vr=fi[i2])+(t=fi[i3]);
                vr-=t;
	    }
            else
            {
                yr=(vi=fr[i2])+(t=fr[i3]);
                vi-=t;

                yi=(t=fi[i2])+(vr=fi[i3]);
                vr-=t;
	    }

            fr[i3]=ur-vr;
            fr[i1]=ur+vr;

            fi[i3]=ui-vi;
            fi[i1]=ui+vi;

            fr[i0]=xr+yr;
            fr[i2]=xr-yr;

            fi[i0]=xi+yi;
            fi[i2]=xi-yi;
	}
#undef i0

        m+=LX;
    }


//------------- main loop :

    for( ; m<=ldn ; m+=LX)
    {
        PR( cout<<"\n m="<<m<< flush; )

        m2=(1<<m);
        mr=(m2>>LX);
        ph0=(is>0?2.0*M_PI:-2.0*M_PI)/m2;

        for(j=0; j<mr; j++)
        {
 	    PR( cout<<"\n    j="<<j<< flush; )

            sincos(&c,&s,j*ph0);

            c2=c*c-s*s;
            s2=2.0*c*s;

            c3=c2*c-s2*s;
            s3=c2*s+s2*c;

	    PR( cout<<"\n   loop:    "<< flush; )
            for(r=0, i0=j+r; r<n; r+=m2, i0+=m2)
            {
	        double a1r,a1i;
		double xr,yr,ur,vr, xi,yi,ui,vi;

                i1=i0+mr;
                i2=i1+mr;
                i3=i2+mr;

		PR( cout<<"\n           i0="<<i0<<"  i1="<<i1<<"  i2="<<i2<<"  i3="<<i3<<flush; )

                a1r=c2*(ur=fr[i1])-s2*(a1i=fi[i1]);
                a1i*=c2;
                a1i+=s2*ur;

                xr=((ur=fr[i0])+a1r);
                ur-=a1r;
                xi=((ui=fi[i0])+a1i);
                ui-=a1i;


                a1r=c*(vr=fr[i2])-s*(a1i=fi[i2]);
                a1i*=c;
                a1i+=s*vr;

	    { // block for a3r, a3i
            double a3r,a3i;

                a3r=c3*(vr=fr[i3])-s3*(a3i=fi[i3]);
                a3i*=c3;
                a3i+=s3*vr;

                if(is<0)
                {
                    yr=a1r+a3r;
                    vi=a3r-a1r;

                    yi=a1i+a3i;
                    vr=a1i-a3i;
		}
                else
                {
                    yr=a1r+a3r;
                    vi=a1r-a3r;

                    yi=a1i+a3i;
                    vr=a3i-a1i;
		}
	    }

                fr[i3]=ur-vr;
                fr[i1]=ur+vr;

                fi[i3]=ui-vi;
                fi[i1]=ui+vi;

                fr[i0]=xr+yr;
                fr[i2]=xr-yr;

                fi[i0]=xi+yi;
                fi[i2]=xi-yi;
	    }
        }
    }
}
// ============================== end COOLEY4X_FFT ==========================
