
#include "fxt.h"
#include "misc.h"
#include "evenoddrevpermute.h"


static void
coseno(double *x, ulong ldn)
// called by dct_zapata():
// input:
// c[k] = \sum_{m=0}^{n-1}{ \prod_{i=0}^{ldn-1}{ 2^{b_i} \cos(b_i 2^i \Theta_m) x(m)} }
// where n=2^ldn, \Theta_m=\pi (1+4m)/(2n), b_i is bit #i of k
// output:
// c[k] = \sum_{m=0}^{n-1}{ \cos(\Theta_m) x(m)}
{
    for (int ldm=ldn-1; ldm>0; --ldm)
    {
        int m = 1<<ldm;
        int mh = m>>1;

        for (int j=(1<<(ldn-ldm))-2; j>=0; --j)
        {
            for (int k=1; k<mh; ++k)  x[(j+1)*m+k] += x[j*m+k];

            int j2 = 2*j;
            x[(j2+3)*mh] -= x[(j2+1)*mh];
        }
    }
}
// =============== end ===========


void
dct_zapata(double *x, ulong ldn, double *tmp/*=0*/)
// transform wrt. basis: cos(k*(i+0.5)*PI/n) * (k==0?1.0:sqrt(2))
// algorithm as described in F.Arguello, E.L.Zapata:
// "Fast Cosine Transform on the Succesive Doubling Method"
//
// if a (size-n) srcatchspace is supplied (tmp!=0)
// then the slightly faster version of inverse_evenoddrev_permute is used
// the inverse_evenoddrev_permute step is not mentioned in the paper !
{
    if ( ldn==0 )  return;

    const ulong n = 1<<ldn;

    double *y;
    if ( tmp )
    {
        y = tmp;
        inverse_evenoddrev_permute(x, y, n);
    }
    else
    {
        y = x;
        inverse_evenoddrev_permute(x, n);
    }

    const double phi0 = 0.5*M_PI/n;
    for (ulong ldm=1; ldm<=ldn; ++ldm)
    {
        ulong m = 1<<ldm;
        ulong mh = m>>1;
        ulong nm = 1<<(ldn-ldm); // == n/m

        double phi1 = mh*phi0;
        double phi4 = 4*phi1;
        for (ulong k=0; k<nm; ++k)
        {
            double c = 2 * cos(phi1+k*phi4); // ==2*cos(M_PI*(4*k+1)*mh/(2*n));

            for (ulong j=0; j<mh; ++j)
            {
                ulong t1 = k+j*2*nm;
                ulong t2 = t1+nm;

                double a = x[t1];
                double b = x[t2];
                x[t1] += b;
                x[t2] = (a-b)*c;
            }
        }
    }

    revbin_permute(x, n);

    coseno(x, ldn);

    multiply(x+1, n-1, 1.0/sqrt(2));
}
// =============== end ===========
