
#include "fxt.h"
#include "auxbit.h"
#include "copy.h"
#include "fxtdefs.h"
#include "ldn2rc.h"

#define  FFT(fr,fi,ldn,is)  fht_fft(fr,fi,ldn,is)

// tuning parameter:
#define  CP_ROWS  1  // 0 or 1 (use scratch space, default)
#if  ( CP_ROWS==1 )
#warning 'FYI: matrix_auto_convolution() does row FFTs in scratch space'
#else
#warning 'FYI: matrix_auto_convolution() does row FFTs inplace'
#endif


void
matrix_auto_convolution(double *fr, ulong ldn)
// fr[] = fr[] (*) fr[]
{
    ulong r, c;
    ldn2rc(ldn, r, c);
    matrix_auto_convolution(fr, r, c, 0);
}
// =========== end ============


void
matrix_auto_convolution0(double *fr, ulong ldn)
// fr[] = fr[] (*) fr[]
// version for zero padded data (i.e. linear convolution)
{
    ulong r, c;
    ldn2rc(ldn, r, c);
    matrix_auto_convolution(fr, r, c, 1);
}
// =========== end ============


void
matrix_auto_convolution(double *fr, ulong r, ulong c, int zp/*=0*/)
// fr[] = fr[] (*) fr[]
// call with zp==1 if high half of data is zero (for linear convolution)
{
    ulong n = r * c;
    ulong nh = n / 2;
    ulong ldc = ld(c);

#if  ( CP_ROWS==1 )
    ulong mrc = 2 * (r>c ? r : c);
    double *tr = (double *)operator new( 2*mrc * sizeof(double) ); // jjnote: mem allocation
    double *ti = tr + mrc;
#else
    double *tr = (double *)operator new( r * sizeof(double) ); // jjnote: mem allocation  
#endif

    column_real_complex_ffts(fr, r, c, zp, tr);

    double v = 1.0/n;
    // row #0:
    fht(fr, ldc);
    fht_auto_convolution_core(fr, ldc, v);
    fht(fr, ldc);

    if ( (r>1) && !(r&1) )  // r>1 and r even
    {
        fht_negacyclic_auto_convolution(fr+nh, ldc, v);  // row #n/2
    }


    double *pr = fr,  *pi = fr + n;
    for (ulong k=1; k<r/2; k++)
    {
        pr += c;
        pi -= c; // jjnote: dependent of order of imag part
        double w = (double)k/r;

#if  ( CP_ROWS==1 )
        copy(pr, tr , c);
        copy(pi, ti , c);
        weighted_complex_auto_convolution(tr, ti, ldc, w, v);
        copy(tr, pr , c);
        copy(ti, pi , c);
#else // CP_ROWS
        weighted_complex_auto_convolution(pr, pi, ldc, w, v);  // jjnote: cache problem !
#endif // CP_ROWS
    }

    column_complex_real_ffts(fr, r, c, tr);
    operator delete(tr);
}
// =========== end ============

