
#include <math.h>

#include "complextype.h" // split_radix_di?_fft_core()
#include "fxtdefs.h"  // SUMDIFF, CSQR, CMULT
#include "revbinpermute.h"
#include "sincos.h"


// tuning parameter:
//#define USE_SINCOS3  // default = off
// whether sincos is used for 3*angle
// else: use algebraic relation


void
split_radix_dif_fft_core(Complex *f, ulong ldn)
// split radix decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order
{
    const ulong n = (1<<ldn);
    if ( n<=1 )  return;

    double s2pi = 2.0*M_PI;  // pi*2*isign
    ulong n2 = 2*n;
    for (ulong k=1; k<ldn; k++)
    {
        n2 >>= 1;  // == n>>(k-1) == n, n/2, n/4, ..., 4
        const ulong n4 = n2 >> 2;  // == n/4, n/8, ..., 1
        const double e = s2pi / n2;

        {  // j==0:
            const ulong j = 0;
            ulong ix = j;
            ulong id = (n2<<1);
            while ( ix<n-1 )
            {
                for (ulong i0=ix; i0<n; i0+=id)
                {
                    ulong i1 = i0 + n4;
                    ulong i2 = i1 + n4;
                    ulong i3 = i2 + n4;

                    Complex t0, t1;
                    SUMDIFF3(f[i0], f[i2], t0);
                    SUMDIFF3(f[i1], f[i3], t1);

//                    t1 *= Complex(0, 1);  // +isign, optimized away by gcc
                    t1 = Complex(-t1.imag(), t1.real());

                    SUMDIFF2(t0, t1);
                    f[i2] = t0; // * Complex(cc1, ss1);
                    f[i3] = t1; // * Complex(cc3, ss3);
               }

               ix = (id<<1) - n2 + j;
               id <<= 2;
            }
        }

        for (ulong j=1; j<n4; j++)
        {
            double a = j * e;
            double cc1,ss1, cc3,ss3;
            sincos(a, &ss1, &cc1);

#if defined USE_SINCOS3
            sincos(3.0*a, &ss3, &cc3);
#else
            SINCOS3ALG(cc1, ss1, cc3, ss3);
#endif

            ulong ix = j;
            ulong id = (n2<<1);
            while ( ix<n-1 )
            {
                for (ulong i0=ix; i0<n; i0+=id)
                {
                    ulong i1 = i0 + n4;
                    ulong i2 = i1 + n4;
                    ulong i3 = i2 + n4;
                    // f[i0] = f[i0] + f[i2]
                    // f[i1] = f[i1] + f[i3]
                    // f[i2] = (f[i0]-f[i2] + (is*I) * (f[i1]-f[i3])) * sincos(a)
                    // f[i3] = (f[i0]-f[i2] - (is*I) * (f[i1]-f[i3])) * sincos(3*a)

                    Complex t0, t1;
                    SUMDIFF3(f[i0], f[i2], t0);
                    SUMDIFF3(f[i1], f[i3], t1);

//                    t1 *= Complex(0, is);
//                    t1 *= Complex(0, 1);  // +isign, optimized away by gcc
                    t1 = Complex(-t1.imag(), t1.real());

                    SUMDIFF2(t0, t1);
                    f[i2] = t0 * Complex(cc1, ss1);
                    f[i3] = t1 * Complex(cc3, ss3);
               }

               ix = (id<<1) - n2 + j;
               id <<= 2;
            }
        }
    }

    for (ulong ix=0, id=4;  ix<n;  id*=4)
    {
        for (ulong i0=ix; i0<n; i0+=id)  SUMDIFF2(f[i0], f[i0+1]);
        ix = 2*(id-1);
    }
}
//========================== end ===========================


void
split_radix_dit_fft_core(Complex *f, ulong ldn)
// split radix decimation in time fft
// isign = -1
// output data is in revbin_permuted order
{
    const ulong n = (1<<ldn);
    if ( n<=1 )  return;

    for (ulong ix=0, id=4;  ix<n;  id*=4)
    {
        for (ulong i0=ix; i0<n; i0+=id)  SUMDIFF2(f[i0], f[i0+1]);
        ix = 2*(id-1);
    }

    double s2pi = -2.0*M_PI;  // pi*2*isign
    ulong n2 = 2;
    for (ulong k=ldn-1; k>=1; k--)
    {
        n2 <<= 1;  // == 4, 8, 16, ..., n
        ulong n4 = n2 >> 2;
        double e = s2pi / n2;  // +isign

        {  // j==0:
            const ulong j = 0;
            ulong ix = j;
            ulong id = (n2<<1);
            while ( ix<n-1 )
            {
                for (ulong i0=ix; i0<n; i0+=id)
                {
                    ulong i1 = i0 + n4;
                    ulong i2 = i1 + n4;
                    ulong i3 = i2 + n4;

                    Complex t0 = f[i3];
                    Complex t1 = f[i2];

                    SUMDIFF2(t0, t1);
//                    t1 *= Complex(0, 1);  // -isign, optimized away by gcc
                    t1 = Complex(-t1.imag(), t1.real());

                    SUMDIFF3(f[i0], t0, f[i2]);
                    SUMDIFF3(f[i1], t1, f[i3]);
               }

               ix = (id<<1) - n2 + j;
               id <<= 2;
            }
        }

        for (ulong j=1; j<n4; j++)
        {
            double a = j * e;
            double cc1,ss1, cc3,ss3;
            sincos(a, &ss1, &cc1);

#if defined USE_SINCOS3
            sincos(3.0*a, &ss3, &cc3);
#else
            SINCOS3ALG(cc1, ss1, cc3, ss3);
#endif

            ulong ix = j;
            ulong id = (n2<<1);
            while ( ix<n-1 )
            {
                for (ulong i0=ix; i0<n; i0+=id)
                {
                    ulong i1 = i0 + n4;
                    ulong i2 = i1 + n4;
                    ulong i3 = i2 + n4;

                    Complex t0 = f[i3] * Complex(cc3, ss3);
                    Complex t1 = f[i2] * Complex(cc1, ss1);

                    SUMDIFF2(t0, t1);
//                    t1 *= Complex(0, -is);  // t1 /= Complex(0, is);
//                    t1 *= Complex(0, 1);  // -isign, optimized away by gcc
                    t1 = Complex(-t1.imag(), t1.real());

                    SUMDIFF3(f[i0], t0, f[i2]);
                    SUMDIFF3(f[i1], t1, f[i3]);
               }

               ix = (id<<1) - n2 + j;
               id <<= 2;
            }
        }
    }
}
//========================== end ===========================


void
split_radix_fft(Complex *f, ulong ldn, int is)
// fast fourier transform
// split radix algorithm
{
    if ( is>0 )
    {
        split_radix_dif_fft_core(f, ldn);
        revbin_permute(f, 1<<ldn);
    }
    else
    {
        revbin_permute(f, 1<<ldn);
        split_radix_dit_fft_core(f, ldn);
    }
}
//========================== end ===========================
