
#include "fxt.h"
#include "copy.h"
#include "auxbit.h"
#include "fxtdefs.h"
#include "ldn2rc.h"

#define  FFT(fr,fi,ldn,is)  fht_fft(fr,fi,ldn,is)

void
matrix_convolution(double *fr, double *fi, ulong ldn)
// fi[] = fr[] (*) fi[]
{
    ulong r, c;
    ldn2rc(ldn, r, c);
    matrix_convolution(fr, fi, r, c, 0);
}
// =============== end ====================


void
matrix_convolution0(double *fr, double *fi, ulong ldn)
// fi[] = fr[] (*) fi[]
// version for zero padded data (i.e. linear convolution)
{
    ulong r, c;
    ldn2rc(ldn, r, c);
    matrix_convolution(fr, fi, r, c, 1);
}
// =============== end ====================


//#define  TRANSPOSE  0 // 0 (off, better) or 1 (on, slow)
//
//#if  ( TRANSPOSE==1 )
//#warning  'TRANSPOSE used in matrix_convolution() =--> slowdown (!)'
//#include <assert.h>
//#endif

// tuning parameter:
#define  CP_ROWS  1  // 0 or 1 (default)
#if  ( CP_ROWS==1 )
#warning 'FYI: matrix_convolution() does row FFTs in scratch space'
#else
#warning 'FYI: matrix_convolution() does row FFTs inplace'
#endif

void
matrix_convolution(double *fr, double *fi, ulong r, ulong c, int zp/*=0*/)
// fi[] = fr[] (*) fi[]
// call with zp==1 if high half of data is zero (for linear convolution)
{
    const ulong n = c * r;
    const int is = 1;

    ulong mrc = 2 * (r>c ? r : c);
    double *tr = (double *)operator new ( 2*mrc * sizeof(double) );  // jjnote: mem allocation
    double *ti = tr + mrc;

    column_ffts(fr, fi, r, c, is, zp, tr, ti);

    ulong ldc = ld(c);
    double v = 1.0/(2*n);
    double *pr = fr,  *pi = fi;
    for (ulong k=0; k<r; k++)
    {
        double w = (double)k/r;

#if  ( CP_ROWS==1 )
        copy(pr, tr , c);
        copy(pi, ti , c);
        weighted_complex_auto_convolution(tr, ti, ldc, w, v);
        copy(tr, pr , c);
        copy(ti, pi , c);
#else // CP_ROWS
        weighted_complex_auto_convolution(pr, pi, ldc, w, v);  // jjnote: cache problem !
#endif // CP_ROWS

        pr += c;
        pi += c; // jjnote: dependent of order of imag part
    }

    // save half of the work by:
    column_complex_imag_ffts(fr, fi, r, c, tr);

    operator delete(tr);
}
// ================== end ===================
