
// MACHINE GENERATED FILE, DO NOT EDIT !
// this file was generated from fhtsplitradixdif.cc
#include "complextype.h"

#include "fxtdefs.h"  // SUMDIFF, CSQR, CMULT
#include "sincos.h"
#include "revbinpermute.h"

#include <math.h> // M_PI

// tuning parameter:
// define to use trig recurrence:
// (and possibly lose some precision, see below)
//#define TRIG_REC
// with type 'long Complex' slight speed loss on my machine,
// with type 'Complex' little speed gain.
//
#if defined TRIG_REC
#warning 'FYI: fht(Complex *, ulong) uses trig recursion'
#endif

// tuning parameter:
#define  FINAL_RADIX_16  1  // 0 or 1 (default)
//
#if  ( FINAL_RADIX_16==1 )
#warning 'FYI: FINAL_RADIX_16 set in split_radix_dif_fht(Complex *, ulong)'
#else
#warning 'FYI: FINAL_RADIX_16 is NOT SET in split_radix_dif_fht(Complex *, ulong)'
#endif


void
split_radix_dif_fht_core(Complex *f, ulong ldn)
// fast hartley transform
// split radix decimation in frequency algorithm
// output data is in revbin_permuted order
{
    if ( ldn<=2 )
    {
        if ( ldn==1 )  // two point fht
        {
            SUMDIFF2(f[0], f[1]);
        }
        else if ( ldn==2 )  // four point fht
        {
            Complex f0, f1, f2, f3;
            SUMDIFF4(f[0], f[2], f0, f1);
            SUMDIFF4(f[1], f[3], f2, f3);
            SUMDIFF4(f0, f2, f[0], f[1]);
            SUMDIFF4(f1, f3, f[2], f[3]);
        }
        return;
    }

    const ulong n = (1<<ldn);
    const Complex *fn = f + n;
    ulong ldk = ldn - 2;

#if  ( FINAL_RADIX_16==1 )
    for (  ; ldk>2;  ldk-=2)
#else // FINAL_RADIX_16
    for (  ; ldk>1;  ldk-=2)
#endif // FINAL_RADIX_16
    {
        ulong k   = 1 << ldk;
        ulong kh  = k >> 1;
        ulong k2  = k << 1;
        ulong k3  = k2 + k;
        ulong k4  = k2 << 1;

        for (Complex *fi=f, *gi=f+kh;  fi<fn;  fi+=k4, gi+=k4)
        {
            Complex f0, f1, f2, f3;
            SUMDIFF4(fi[0], fi[k], f0, f1);
            SUMDIFF4(fi[k2], fi[k3], f2, f3);
            SUMDIFF4(f0, f2, fi[0], fi[k2]);
            SUMDIFF4(f1, f3, fi[k], fi[k3]);

            SUMDIFF4(gi[0], gi[k2], f0, f2);
            SUMDIFF4(gi[k], gi[k3], f1, f3);
            gi[k3] = SQRT2 * f3;
            gi[k2] = SQRT2 * f2;
            SUMDIFF4(f0, f1, gi[0], gi[k]);
        }

        double tt = M_PI/4/kh;  // jjkeep
#if defined TRIG_REC
        double s1 = 0.0,  c1 = 1.0;  // jjkeep
        double al = sin(0.5*tt);  // jjkeep
        al *= (2.0*al);
        double be = sin(tt);  // jjkeep
#endif // TRIG_REC

        for (ulong i=1; i<kh; i++)
        {
#if defined TRIG_REC
            c1 -= (al*(tt=c1)+be*s1);
            s1 -= (al*s1-be*tt);
#else
            double s1, c1;  // jjkeep
            sincos(tt*i, &s1, &c1);
#endif // TRIG_REC

            double c2, s2;  // jjkeep
            CSQR4(c1, s1, c2, s2);

            for (Complex *fi=f+i, *gi=f+k-i;  fi<fn;  fi+=k4, gi+=k4)
            {
                Complex a, b, g0, f0, f1, g1, f2, g2, f3, g3;

                SUMDIFF4(gi[0], gi[k2], g0, a);
                SUMDIFF4(fi[k], fi[k3], f1, b);
                CMULT6M(c1, s1, b, a, g2, f3);

                SUMDIFF4(fi[0], fi[k2], f0, a);
                SUMDIFF4(gi[k], gi[k3], g1, b);
                CMULT6M(s1, c1, b, a, f2, g3);

                SUMDIFF4(f2, f3, fi[k2], a);
                SUMDIFF4(g2, g3, gi[k2], b);
                CMULT6M(s2, c2, b, a, fi[k3], gi[k3]);

                SUMDIFF4(f0, f1, fi[0], a);
                SUMDIFF4(g0, g1, gi[0], b);
                CMULT6M(s2, c2, b, a, fi[k], gi[k]);
            }
        }
    }


    if ( ldk & 1 )
    {
        for (Complex *fi=f; fi<fn; fi+=8)  // radix 8 step
        {
            Complex g0, f0, f1, g1;
            SUMDIFF4(fi[0], fi[4], f0, g0);
            SUMDIFF4(fi[2], fi[6], f1, g1);
            SUMDIFF2(f0, f1);
            SUMDIFF2(g0, g1);
            Complex s1, c1, s2, c2;
            SUMDIFF4(fi[1], fi[5], s1, c1);
            SUMDIFF4(fi[3], fi[7], s2, c2);
            SUMDIFF2(s1, s2);
            SUMDIFF4(f0, s1, fi[0], fi[1]);
            SUMDIFF4(f1, s2, fi[2], fi[3]);
            c1 *= SQRT2;
            c2 *= SQRT2;
            SUMDIFF4(g0, c1, fi[4], fi[5]);
            SUMDIFF4(g1, c2, fi[6], fi[7]);
        }
    }
    else
    {
#if  ( FINAL_RADIX_16==1 )
        // ldk == 4
        for (Complex *fi=f; fi<fn; fi+=16)  // radix 16 step
        {
            Complex f0, f1, f2, f3;
            SUMDIFF4(fi[0], fi[8], f0, f1);
            SUMDIFF4(fi[4], fi[12], f2, f3);
            SUMDIFF4(f0, f2, fi[0], fi[4]);
            SUMDIFF4(f1, f3, fi[8], fi[12]);

            SUMDIFF4(fi[2], fi[10], f0, f1);
            SUMDIFF4(fi[6], fi[14], f2, f3);
            SUMDIFF4(f0, f2, fi[2], fi[6]);
            SUMDIFF4(f1, f3, fi[10], fi[14]);

            SUMDIFF4(fi[1], fi[9], f0, f1);
            SUMDIFF4(fi[5], fi[13], f2, f3);
            SUMDIFF4(f0, f2, fi[1], fi[5]);
            SUMDIFF4(f1, f3, fi[9], fi[13]);

            SUMDIFF4(fi[3], fi[11], f0, f1);
            SUMDIFF4(fi[7], fi[15], f2, f3);
            SUMDIFF4(f0, f2, fi[3], fi[7]);
            SUMDIFF4(f1, f3, fi[11], fi[15]);

            SUMDIFF4(fi[0], fi[2], f0, f1);
            SUMDIFF4(fi[1], fi[3], f2, f3);
            SUMDIFF4(f0, f2, fi[0], fi[1]);
            SUMDIFF4(f1, f3, fi[2], fi[3]);
            SUMDIFF4(fi[4], fi[6], f0, f1);
            f3 = SQRT2 * fi[7];
            f2 = SQRT2 * fi[5];
            SUMDIFF4(f0, f2, fi[4], fi[5]);
            SUMDIFF4(f1, f3, fi[6], fi[7]);

            Complex a, b, g0, g1, g2, g3;
            SUMDIFF4(fi[10], fi[14], a, b);
            a *= SQRT1_2;
            b *= SQRT1_2;
            SUMDIFF4(fi[8], a, f0, f1);
            SUMDIFF4(fi[12], b, g0, g1);
            SUMDIFF4(fi[11], fi[15], a, b);
            a *= SQRT1_2;
            b *= SQRT1_2;
            SUMDIFF4(fi[9], a, f2, f3);
            SUMDIFF4(fi[13], b, g2, g3);
            Complex c1 = COS_1_PI_8;
            Complex s1 = SIN_1_PI_8;
            CMULT6(s1, c1, f2, g3, b, a);
            SUMDIFF4(f0, a, fi[8], fi[9]);
            SUMDIFF4(g1, b, fi[14], fi[15]);
            CMULT6(c1, s1, g2, f3, b, a);
            SUMDIFF4(g0, a, fi[12], fi[13]);
            SUMDIFF4(f1, b, fi[10], fi[11]);        
        }
#else // FINAL_RADIX_16
        // ldk == 2
        for (Complex *fi=f; fi<fn; fi+=4)  // radix 4 step
        {
            Complex f0, f1, f2, f3;
            SUMDIFF4(fi[0], fi[2], f0, f1);
            SUMDIFF4(fi[1], fi[3], f2, f3);
            SUMDIFF4(f0, f2, fi[0], fi[1]);
            SUMDIFF4(f1, f3, fi[2], fi[3]);
 
        }
#endif // FINAL_RADIX_16
    }
}
// ===================== end =====================


void
split_radix_dif_fht(Complex *f, ulong ldn)
// fast hartley transform
// split radix decimation in frequency algorithm
{
    if ( ldn<=2 )
    {
        if ( ldn==1 )  // two point fht
        {
            SUMDIFF2(f[0], f[1]);
        }
        else if ( ldn==2 )  // four point fht
        {
            Complex f0, f1, f2, f3;
            SUMDIFF4(f[0], f[2], f0, f1);
            SUMDIFF4(f[1], f[3], f2, f3);
            SUMDIFF4(f0, f2, f[0], f[2]);
            SUMDIFF4(f1, f3, f[1], f[3]);
        }
        return;
    }

    split_radix_dif_fht_core(f, ldn);
    
    revbin_permute(f, 1<<ldn);
}
// ===================== end =====================
