
#include <math.h>

#include "fxtaux.h"
#include "permute.h"


void
dif2_fft_localized(Complex *f, ulong ldn, int is)
// decimation in frequency radix 2 fft
// depth-first version
// compared to usual fft this one
// - does more trig computations
// - is (far) better memory local
{
    const ulong n = (1<<ldn);
    const double pi = is*M_PI;

    for (ulong ldm=ldn; ldm>=1; --ldm)
    {
        const ulong m = (1<<ldm);
	const ulong mh = (m>>1);

        const double phi = pi/(double)(mh);

        for (ulong r=0; r<n; r+=m)
        {
            for (ulong j=0; j<mh; ++j)
            {
                ulong t1 = r+j;
                ulong t2 = t1+mh;

                Complex u = f[t1];
		Complex v = f[t2];

                f[t1] = u + v;

                double s,c;
                sincos(phi*(double)j, &s, &c);
                f[t2] = (u - v) * Complex(c,s);
            }
        }
    }

    revbin_permute(f,n);
}
// ============================== end ==========================


void
dif2_fft(Complex *f, ulong ldn, int is)
// decimation in frequency radix 2 fft
{
    const ulong n = (1<<ldn);
    const double pi = is*M_PI;

    for (ulong ldm=ldn; ldm>=1; --ldm)
    {
        const ulong m = (1<<ldm);
	const ulong mh = (m>>1);

        const double phi = pi/(double)(mh);

        for (ulong j=0; j<mh; ++j)
        {
	    double s,c;
            sincos(phi*(double)j, &s, &c);

            for (ulong r=0; r<n; r+=m)
            {
                ulong t1 = r+j;
                ulong t2 = t1+mh;
                Complex u = f[t1];
		Complex v = f[t2];

                f[t1] = u + v;
                f[t2] = (u - v) * Complex(c,s);
            }
        }
    }

    revbin_permute(f,n);
}
// ============================== end ==========================


void
dif2_fft(double *fr, double *fi, ulong ldn, int is)
// decimation in frequency radix 2 fft
{
    const ulong n = (1<<ldn);

    const double pi = is*M_PI;  // +- pi

    for (ulong ldm=ldn; ldm>=1; --ldm)
    {
        const ulong m = (1<<ldm);            // m = 2^ldm
	const ulong mh = (m>>1);             // mh = m/2

        const double phi = pi/(double)(mh);

        for (ulong j=0; j<mh; ++j)
        {
	    double s,c;
            sincos(phi*(double)j, &s, &c);

            for (ulong r=0; r<n; r+=m)
            {
                ulong t1 = r+j;
                ulong t2 = t1+mh;
		// u = f[t1]
		// v = f[t2]
		// f[t1] =  u+v
		// f[t2] =  (u-v)*exp(+-2*pi*i*j/m)

                double ur = fr[t1];
		double vr = fr[t2];
                fr[t1] += vr;
                ur -= vr;

                double ui = fi[t1];
		double vi = fi[t2];
                fi[t1] += vi;
                ui -= vi;

                fr[t2] = ur * c - ui * s;
                fi[t2] = ur * s + ui * c;
            }
        }
    }

    revbin_permute(fr,n);
    revbin_permute(fi,n);
}
// ============================== end ==========================
