#include "mfft.h"
#include "modaux.h"

#define LX 2

void 
mod_fft_dif4(mod *f, ulong ldn, int is)
//
// radix 4 decimation in frequency mod fft
//
{
    const ulong n=(1<<ldn);

    const mod imag=(is>0?root(4):invroot(4));
    const mod rn = (is>0?root(n):invroot(n));

    for(ulong ldm=ldn; ldm>=LX; ldm-=LX)
    {
	const ulong m=(1<<ldm);
        const ulong m4=(m>>LX);
	
	const mod dw=pow2pow(rn,(ldn-ldm));

	mod w=(mod::one);
	mod w2=w;
	mod w3=w;

        for(ulong j=0; j<m4; j++)
        {
            for(ulong r=0, i0=j+r; r<n; r+=m, i0+=m)
            {
                const ulong i1=i0+m4;
                const ulong i2=i1+m4;
                const ulong i3=i2+m4;

                mod a0=f[i0];
                mod a1=f[i1];
                mod a2=f[i2];
                mod a3=f[i3];

		mod t02=a0+a2;
		mod t13=a1+a3;

                f[i0]= (t02+t13);
		f[i1]= (t02-t13)*w2;

		t02=a0-a2;
		t13=a1-a3;
		t13 *= imag;

		f[i2]= (t02+t13)*w;
		f[i3]= (t02-t13)*w3;
            }

	    w *= dw;
	    w2 = w*w;
	    w3 = w*w2;
        }
    }

    // n is not a power of 4, need a radix 2 step:
    if( (ldn&1)!=0 )  
    {
	for (ulong r=0; r<n; r+=2)  sumdiff2(f[r],f[r+1]);
    }

    scramble(f,n);
}
// ============================== end MOD_FFT_SD4 ==========================
