
#include "fxt.h"
#include "fxtdefs.h"
#include "revbinpermute.h"
#include "revbinpermute0.h"
#include "sincos.h"


// tuning parameter:
#define  USE_SINCOS3  0  // 0 to use algebra (default) or 1 for sincos call


// tuning parameter:
#define  POSITIVE_ISIGN  0 // 0 (isign=-1, default) or 1 (isign=+1, extra work!)
// isign (default = -1) may be changed here
// set to 1 to have same isign (+1) as in
// fht_real_complex_fft(), wrap_real_complex_fft(),
// and easy_ordering_real_complex_fft()

// original Fortran code by Sorensen; published in H.V. Sorensen, D.L. Jones,
// M.T. Heideman, C.S. Burrus(1987)Real-valued fast fourier transform
// algorithms.  IEEE Trans on Acoustics, Speech, & Signal Processing, 35,
// 849-863.  Adapted to C by Bill Simpson, 1995  wsimpson@uwinnipeg.ca
// derived from: static char RCSreal2herm_c[] =
// "$Id: real2herm.c,v 1.1 1996/09/02 01:47:12 wedgingt Beta $";
// further editing by joerg arndt  arndt@jjj.de

#if ( POSITIVE_ISIGN==1 )
#warning 'FYI: split_radix_real_complex_fft() (and its inverse) have isign=+1'
#warning 'FYI: ... this cause extra work (negation of imag part)'
#endif

void
split_radix_real_complex_fft(double *x, ulong ldn)
//
// isign = -1 (unless POSITIVE_ISIGN is #defined to 1)
// length is n=2**ldn
//
// ordering on output:
//
// f[0]     = re[0] (==zero frequency, purely real)
// f[1]     = re[1]
//         ...
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2]    (==nyquist frequency, purely real)
//
// f[n/2+1] = im[n/2-1]
// f[n/2+2] = im[n/2-2]
//         ...
// f[n-1]   = im[1]
//
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n-i]
//
// the order of imaginary parts is the same as in fht_real_complex_fft
// (reversed wrt. easy_ordering_real_complex_fft())
//
{
    revbin_permute(x, 1<<ldn);
    split_radix_real_complex_fft_dit_core(x, ldn);
}
// =============== end ===========


void
split_radix_real_complex_fft0(double *x, ulong ldn)
// version for zero padded arrays
{
    revbin_permute0(x, 1<<ldn);
    split_radix_real_complex_fft_dit_core(x, ldn);
}
// =============== end ===========


void
split_radix_complex_real_fft(double *x, ulong ldn)
//
// isign = -1 (unless POSITIVE_ISIGN is #defined to 1)
// length is n=2**ldn
//
// expected ordering on input:
// like the output of split_radix_real_complex_fft()
//
{
    split_radix_complex_real_fft_dif_core(x, ldn);
    revbin_permute(x, 1<<ldn);
}
// =============== end ===========


void
split_radix_real_complex_fft_dit_core(double *x, ulong ldn)
{
    if ( ldn==0 )  return;

    const ulong n = (1<<ldn);
    ulong n4;
    ulong i1, i2, i3, i4, i5, i6, i7, i8;
    ulong n2, n8, i0, j;

    double a, e;
    double t1, t2, t3, t4, t5, t6;

    for (ulong ix=0, id=4;  ix<n;  id*=4)
    {
        for (ulong i0=ix; i0<n; i0+=id)  SUMDIFF2(x[i0], x[i0+1]);
        ix = 2*(id-1);
    }

    n2 = 2;
    ulong nn = n>>1;
    while ( nn>>=1 )
    {
        ulong ix, id;
        ix = 0;
        n2 <<= 1;
        id = n2<<1;
        n4 = n2>>2;
        n8 = n2>>3;

        do  // ix
        {
            for (i0=ix; i0<n; i0+=id)
            {
                i1 = i0;
                i2 = i1 + n4;
                i3 = i2 + n4;
                i4 = i3 + n4;

                DIFFSUM3R(x[i3], x[i4], t1);

                SUMDIFF3(x[i1], t1, x[i3]);

                if ( n4!=1 )
                {
                    i1 += n8;
                    i2 += n8;
                    i3 += n8;
                    i4 += n8;

                    SUMDIFF4(x[i3], x[i4], t1, t2);
                    t1 = -t1 * SQRT1_2;
                    t2 *= SQRT1_2;

                    SUMDIFF4(t1, x[i2], x[i4], x[i3]);

                    SUMDIFF3(x[i1], t2, x[i2]);
                }
            }

            ix = (id<<1) - n2;
            id <<= 2;
        }
        while ( ix<n );

        e = 2.0*M_PI/n2;
        a = e;

//      ulong dil = n/n2;
//      a = dil;

        for (j=2; j<=n8; j++)
        {
            double cc1, ss1, cc3, ss3;
//          ulong a3 = (a+(a<<1))&(n-1);
//          cc1 = cn[a];
//          ss1 = sn[a];
//          cc3 = cn[a3];
//          ss3 = sn[a3];
//          a = (a+dil)&(n-1);

            sincos(a, &ss1, &cc1);

#if ( USE_SINCOS3==1 )
            sincos(3.0*a, &ss3, &cc3);
#else
            SINCOS3ALG(cc1, ss1, cc3, ss3);
#endif
            a = (double)j * e;

            ix = 0;
            id = n2<<1;
            do
            {
                for (i0=ix; i0<n; i0+=id)
                {
                    i1 = i0 + j - 1;
                    i2 = i1 + n4;
                    i3 = i2 + n4;
                    i4 = i3 + n4;

                    i5 = i0 + n4 - j + 1;
                    i6 = i5 + n4;
                    i7 = i6 + n4;
                    i8 = i7 + n4;

                    CMULT6(cc1, ss1, x[i7], x[i3], t2, t1);

                    CMULT6(cc3, ss3, x[i8], x[i4], t4, t3);

                    DIFFSUM3(t2, t4, t6);
                    SUMDIFF4(t6, x[i6], x[i8], x[i3]);

                    DIFFSUM3R(t1, t3, t5);
                    SUMDIFF4(t3, x[i2], x[i4], x[i7]);

                    SUMDIFF3(x[i1], t5, x[i6]);
                    DIFFSUM3R(t4, x[i5], x[i2]);
                }

                ix = (id<<1) - n2;
                id <<= 2;

            }
            while ( ix<n );
        }
    }

#if ( POSITIVE_ISIGN==1 )
    if ( ldn>=2 )
    {
        ulong nh = (n>>1);
        negate(x+nh+1, nh-1);
    }
#endif // POSITIVE_ISIGN

}
// ============== end ================


void
split_radix_complex_real_fft_dif_core(double *x, ulong ldn)
{
    const ulong n = (1<<ldn);

#if  ( POSITIVE_ISIGN==1 )
    if ( ldn>=2 )
    {
        ulong nh = (n>>1);
        negate(x+nh+1, nh-1);
    }
#endif // POSITIVE_ISIGN

    ulong n4;
    ulong i1, i2, i3, i4, i5, i6, i7, i8;
    ulong nn = n>>1;
    ulong ix, id;
    ulong n2, n8;
    ulong i0, j;

    double a, e;
    double t1, t2, t3, t4, t5;


    n2 = n<<1;
    while ( nn >>= 1 )
    {
        ix = 0;
        id = n2;
        n2 >>= 1;
        n4 = n2>>2;
        n8 = n4>>1;

        do  // ix
        {
            for (i0=ix; i0<n; i0+=id)
            {
                i1 = i0;
                i2 = i1 + n4;
                i3 = i2 + n4;
                i4 = i3 + n4;

                SUMDIFF3(x[i1], x[i3], t1);

                x[i2] += x[i2];
                x[i4] += x[i4];

                SUMDIFF3R(x[i4], t1, x[i3]);

                if ( n4!=1 )
                {
                    i1 += n8;
                    i2 += n8;
                    i3 += n8;
                    i4 += n8;

                    SUMDIFF3(x[i1], x[i2], t1);

                    SUMDIFF4(x[i4], x[i3], t2, x[i2]);

                    t2 = -t2 * SQRT2;
                    t1 *= SQRT2;
                    SUMDIFF4(t2, t1, x[i3], x[i4]);
                }
            }

            ix = (id<<1) - n2;
            id <<= 2;
        }
        while ( ix<n );

//      ulong dil = n/n2;
//      a = dil;

        e = 2.0*M_PI/n2;
        a = e;

        for (j=2; j<=n8; j++)
        {
            double cc1, ss1, cc3, ss3;
//          ulong a3 = (a+(a<<1))&(n-1);
//          cc1 = cn[a];
//          ss1 = sn[a];
//          cc3 = cn[a3];
//          ss3 = sn[a3];
//          a = (a+dil)&(n-1);

            sincos(a, &ss1, &cc1);

#if  ( USE_SINCOS3==1 )
            sincos(3.0*a, &ss3, &cc3);
#else
            SINCOS3ALG(cc1, ss1, cc3, ss3);
#endif
            a = (double)j*e;

            ix = 0;
            id = n2<<1;

            do  // ix-loop
            {
                for (i0=ix; i0<n; i0+=id)
                {
                    i1 = i0 + j - 1;
                    i2 = i1 + n4;
                    i3 = i2 + n4;
                    i4 = i3 + n4;

                    i5 = i0 + n4 - j + 1;
                    i6 = i5 + n4;
                    i7 = i6 + n4;
                    i8 = i7 + n4;

                    SUMDIFF3(x[i1], x[i6], t1);
                    SUMDIFF3(x[i5], x[i2], t2);

                    SUMDIFF4(x[i8], x[i3], t3, x[i6]);
                    SUMDIFF4(x[i4], x[i7], t4, x[i2]);

                    SUMDIFF3(t1, t4, t5);
                    SUMDIFF3(t2, t3, t4);

                    CMULT6(ss1, cc1, t5, t4, x[i7], x[i3]);
                    CMULT6(cc3, ss3, t1, t2, x[i4], x[i8]);
                }

                ix = (id<<1) - n2;
                id <<= 2;
            }
            while ( ix<n );
        }
    }

    for (ulong ix=0, id=4;  ix<n;  id*=4)
    {
        for (ulong i0=ix; i0<n; i0+=id)  SUMDIFF2(x[i0], x[i0+1]);
        ix = 2*(id-1);
    }
}
// ============== end ================ 
