

#include "aux4step.h"


//
// The (four step) algorithm is, compared to the 'usual' FFTs, 
// much more memory local. For systems with hierarchical 
// memory ('cache') this may boost the performance.
// (e.g. systems with very high CPU clock)   
//


void 
four_step_fft(double *fr, double *fi, ulong ldn, int is)
//
// set zp to 1 if hi half of data is zero
// else set to 0 
//
{
    ulong n1,n2;
    ulong ldn1,ldn2;

    ldn1=(ldn>>1);
    ldn2=ldn-ldn1;

    n1=(1<<ldn1);
    n2=(1<<ldn2);  // n2>=n1 


    mat_col_fft(fr,fi,n1,n2,is,0); 

    mat_exp(fr,fi,n1,n2,is);


    // mat_col_fft_exp() replaces the calls 
    // to mat_col_fft() and mat_exp():
    //    mat_col_fft_exp(fr,fi,n1,n2,is,zp); 


    mat_row_fft(fr,fi,n1,n2,is,0); 


    mat_transpose2(fr,n2,n1);
    mat_transpose2(fi,n2,n1);

} 
// =================== end FOUR_STEP_FFT ===================



void 
four_step_fft0(double *fr, double *fi, ulong ldn, int is)
//
// set zp to 1 if hi half of data is zero
// else set to 0 
//
{
    ulong n1,n2;
    ulong ldn1,ldn2;

    ldn1=(ldn>>1);
    ldn2=ldn-ldn1;

    n1=(1<<ldn1);
    n2=(1<<ldn2);  // n2>=n1 


    mat_col_fft(fr,fi,n1,n2,is,1); 

    mat_exp(fr,fi,n1,n2,is);


    // mat_col_fft_exp() replaces the calls 
    // to mat_col_fft() and mat_exp():
    //    mat_col_fft_exp(fr,fi,n1,n2,is,zp); 


    mat_row_fft(fr,fi,n1,n2,is,0); 


    mat_transpose2(fr,n2,n1);
    mat_transpose2(fi,n2,n1);

} 
// =================== end FOUR_STEP_FFT ===================


