
#include <math.h>
#include <assert.h>

#include "fxtaux.h"
#include "permute.h"


static const ulong RX = 4;
static const ulong LX = 2;


void
dit4l_fft(Complex *f, ulong ldn, int is)
// decimation in time radix 4 fft
// non-optimized learners version
{
    ulong ldm;
    ulong m,m4;
    ulong j,r;
    ulong i0,i1,i2,i3;

    double ph0,phi,c,s,c2,s2,c3,s3;
    double s2pi = ( is>0 ? 2.0*M_PI : -2.0*M_PI );

    Complex a0,a1,a2,a3;
    Complex e,e2,e3;
    Complex t0,t2;
    Complex t1,t3;

    const ulong n = (1<<ldn);

    revbin_permute(f,n);

    ldm = (ldn&1);

    if ( ldm!=0 )  // n is not a power of 4, need a radix 2 step
    {
        for (r=0; r<n; r+=2)
	{
            i0 = r;
            i1 = i0+1;

            a0 = f[i0];
            a1 = f[i1];

            f[i0] = a0 + a1;
            f[i1] = a0 - a1;
	}
    }

    ldm += LX;

    for ( ; ldm<=ldn ; ldm+=LX)
    {
        m = (1<<ldm);
        m4 = (m>>LX);
        ph0 = s2pi/m;

        for (j=0; j<m4; j++)
        {
            phi = j*ph0;
            sincos(phi, &s, &c);
            sincos(2.0*phi, &s2, &c2);
            sincos(3.0*phi, &s3, &c3);

            e  = Complex(c,s);
            e2 = Complex(c2,s2);
            e3 = Complex(c3,s3);


            for (r=0, i0=j+r;  r<n;  r+=m, i0+=m)
            {
                i1 = i0+m4;
                i2 = i1+m4;
                i3 = i2+m4;

                a0 = f[i0];
                a1 = f[i1];
                a2 = f[i2];
                a3 = f[i3];

                swap(a1,a2);

                a1 *= e;
                a2 *= e2;
                a3 *= e3;

                t0 = (a0+a2) + (a1+a3);
                t2 = (a0+a2) - (a1+a3);

                t1 = (a0-a2) + Complex(0,is)*(a1-a3);
                t3 = (a0-a2) - Complex(0,is)*(a1-a3);

                f[i0] = t0;
                f[i1] = t1;
                f[i2] = t2;
                f[i3] = t3;
            }
        }
    }
}
// ============================== end ==========================
