// -*- C++ -*-

// ===== FUNCTIONS declared in include/fxt.h: =====
// ... matching "fft"


// ----- SRCFILE=fht/fhtfft.cc: -----
void fht_fft(double *fr, double *fi, ulong ldn, int is);
// fft based on fht

void fht_fft0(double *fr, double *fi, ulong ldn, int is);
// fft based on fht
// version for zero padded data

void fht_fft_pre_processing(double *fr, double *fi, ulong ldn, int is); // aux
// preprocessing to use two length-n fhts
// to compute a length-n complex fft

void fht_fft_post_processing(double *fr, double *fi, ulong ldn, int is); // aux
// postprocessing to use two length-n fhts
// to compute a length-n complex fft

// ----- SRCFILE=fht/fhtcfft.cc: -----
void fht_fft(Complex *f, ulong ldn, int is);
// fft based on fht

void fht_fft0(Complex *f, ulong ldn, int is);
// fft based on fht
// version for zero padded data

void fht_fft_pre_processing(Complex *f, ulong ldn, int is); // aux
// preprocessing to one length-n (complex) fht
// to compute a length-n complex fft

void fht_fft_post_processing(Complex *f, ulong ldn, int is); // aux
// postprocessing to one length-n (complex) fht
// to compute a length-n complex fft

// ----- SRCFILE=fft/fftdif4.cc: -----
void dif4_fft_core(double *fr, double *fi, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order

void dif4_fft_core(Complex *f, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order

void dif4_fft(double *fr, double *fi, ulong ldn, int is);
// fast fourier transform
// radix 4 decimation in frequency algorithm

// ----- SRCFILE=fft/fftdit4.cc: -----
void dit4_fft_core(double *fr, double *fi, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = +1
// input data must be in revbin_permuted order

void dit4_fft_core(Complex *f, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = -1
// input data must be in revbin_permuted order

void dit4_fft(double *fr, double *fi, ulong ldn, int is);
// fast fourier transform
// radix 4 decimation in time algorithm

// ----- SRCFILE=fft/cfftsplitradix.cc: -----
void split_radix_dif_fft_core(Complex *f, ulong ldn); // aux
// split radix decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order

void split_radix_dit_fft_core(Complex *f, ulong ldn); // aux
// split radix decimation in time fft
// isign = -1
// output data is in revbin_permuted order

void split_radix_fft(Complex *f, ulong ldn, int is);
// fast fourier transform
// split radix algorithm

// ----- SRCFILE=fft/fftsplitradix.cc: -----
void split_radix_fft_dif_core(double *fr, double *fi, ulong ldn); // aux
// split-radix decimation in frequency fft
// output data is in revbin_permuted order

void split_radix_fft(double *fr, double *fi, ulong ldn, int is);
// fast fourier transform
// split radix algorithm

// ----- SRCFILE=fft/cfftwrap.cc: -----
void complex_fft(Complex *c, ulong ldn, int is);
// FFT wrapper to use the routines that use the data
// in the real/imag form for type complex data

void real_imag_fft(double *fr, double *fi, ulong ldn, int is);
// FFT wrapper to use the routines that use the data
// in the complex form for data in real/imag form

// ----- SRCFILE=fft/skipfft.cc: -----
void skip_fft(double *fr, double *fi, ulong n, ulong d,
         double *wr, double *wi, int is); // aux
// compute fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_fft0(double *fr, double *fi, ulong n, ulong d,
          double *wr, double *wi, int is); // aux
// compute fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]
// version for zero padded data

// ----- SRCFILE=fft/fft8difcore.cc: -----
void fft8_dif_core(Complex *f); // aux
// 8-point decimation in frequency fft, isign = +1
// output data is in revbin_permuted order

void fft8_dif_core(double *fr, double *fi); // aux
// 8-point decimation in frequency fft, isign = +1
// output data is in revbin_permuted order

// ----- SRCFILE=fft/fft8ditcore.cc: -----
void fft8_dit_core(Complex *f); // aux
// 8-point decimation in time fft, isign = -1
// input data must be in revbin_permuted order

void fft8_dit_core(double *fr, double *fi); // aux
// 8-point decimation in time fft, isign = +1
// input data must be in revbin_permuted order

// ----- SRCFILE=fft/fft9.cc: -----
void fft9(Complex *x); // aux
// 9-point fft,  isign = -1

void fft9(double *xr, double *xi); // aux
// 9-point fft,  isign = -1

// ----- SRCFILE=matrix/matrixfft.cc: -----
void matrix_fft(double *fr, double *fi, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache

void matrix_fft0(double *fr, double *fi, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache
// version for zero padded data

void matrix_fft(Complex *f, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache

void matrix_fft0(Complex *f, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache
// version for zero padded data

// ----- SRCFILE=matrix/rowffts.cc: -----
void row_ffts(double *fr, double *fi, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

void row_weighted_ffts(double *fr, double *fi, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

void row_ffts(Complex *f, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

void row_weighted_ffts(Complex *f, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

// ----- SRCFILE=matrix/columnffts.cc: -----
void column_ffts(double *fr, double *fi, ulong r, ulong c, int is, 
            int zp, double *tmpr, double *tmpi); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_ffts(Complex *f, ulong r, ulong c, int is, int zp, Complex *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_real_complex_ffts(double *f, ulong r, ulong c, int zp, double *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_complex_real_ffts(double *f, ulong r, ulong c, double *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_complex_imag_ffts(const double *fr, double *fi, ulong r, ulong c, double *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c
// only the imag part of the result is computed

// ----- SRCFILE=realfft/realfftbyfht.cc: -----
void fht_real_complex_fft(double *f, ulong ldn);
// isign = +1
// ordering on output:
// f[0]     = re[0]
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2](==nyquist freq)
// f[n/2+1] = im[n/2-1]  (wrt. complex fft with is=+1)
// f[n/2+2] = im[n/2-2]
// f[n-1]   = im[1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n-i]
// note that the order of imaginary parts
// is reversed wrt. fft_real_complex_fft()

void fht_real_complex_fft0(double *f, ulong ldn);
// version for zero padded data
// ordering on output:
// see fht_real_complex_fft()

void fht_complex_real_fft(double *f, ulong ldn);
// isign = +1
// ordering on input:
// like the output of fht_real_complex_fft()

// ----- SRCFILE=realfft/realfftwrap.cc: -----
void wrap_real_complex_fft(double *f, ulong ldn);
// isign = +1
// ordering on output:
// f[0]     = re[0]
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2](==nyquist freq)
// f[n/2+1] = im[1]  (wrt. compl fft with isign=+1)
// f[n/2+2] = im[2]
// f[n-1]   = im[n/2-1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n/2+i]
// note that the order of imaginary parts
// is reversed wrt. fht_real_complex_fft()

void wrap_real_complex_fft0(double *f, ulong ldn);
// isign = +1
// version for zero padded data

void wrap_complex_real_fft(double *f, ulong ldn);
// isign = +1
// ordering on input:
// like the output of wrap_real_complex_fft()

// ----- SRCFILE=realfft/realffteasyord.cc: -----
void easy_ordering_real_complex_fft(double *f, ulong ldn);
// isign = +1
// ordering on output is same as in wrap_real_complex_fft():
// f[0]     = re[0]
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2](==nyquist freq)
// f[n/2+1] = im[1]  (wrt. compl fft with isign=+1)
// f[n/2+2] = im[2]
// f[n-1]   = im[n/2-1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n/2+i]

void easy_ordering_real_complex_fft0(double *f, ulong ldn);
// version for zero padded data
// ordering on output:
// see easy_ordering_real_complex_fft()

void easy_ordering_complex_real_fft(double *f, ulong ldn);
// isign = +1
// expected ordering on input:
// like the output of easy_ordering_real_complex_fft()

// ----- SRCFILE=realfft/realfftsplitradix.cc: -----
void split_radix_real_complex_fft(double *x, ulong ldn);
// isign = -1 (unless POSITIVE_ISIGN is #defined to 1)
// length is n=2**ldn
// ordering on output:
// f[0]     = re[0] (==zero frequency, purely real)
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2]    (==nyquist frequency, purely real)
// f[n/2+1] = im[n/2-1]
// f[n/2+2] = im[n/2-2]
// f[n-1]   = im[1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n-i]
// the order of imaginary parts is the same as in fht_real_complex_fft
// (reversed wrt. easy_ordering_real_complex_fft())

void split_radix_real_complex_fft0(double *x, ulong ldn);
// version for zero padded arrays

void split_radix_complex_real_fft(double *x, ulong ldn);
// isign = -1 (unless POSITIVE_ISIGN is #defined to 1)
// length is n=2**ldn
// expected ordering on input:
// like the output of split_radix_real_complex_fft()

void split_radix_real_complex_fft_dit_core(double *x, ulong ldn); // aux

void split_radix_complex_real_fft_dif_core(double *x, ulong ldn); // aux

// ----- SRCFILE=realfft/skiprealfft.cc: -----
void skip_real_complex_fft(double *f, ulong n, ulong d, double *w); // aux
// compute real to complex fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_real_complex_fft0(double *f, ulong n, ulong d, double *w); // aux
// compute real_fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_complex_real_fft(double *f, ulong n, ulong d, double *w); // aux
// compute complex to real fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

// ----- SRCFILE=fft/fftcnvl.cc: -----
void fht_fft_convolution(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void split_radix_fft_convolution(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void fht_fft_convolution0(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void split_radix_fft_convolution0(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void fft_convolution_core1(double *f, double *g, ulong ldn, double v/*=0.0*/); // aux

void fft_convolution_core2(double *f, double *g, ulong ldn, double v/*=0.0*/); // aux

// ----- SRCFILE=fft/fftcnvla.cc: -----
void fht_fft_auto_convolution(double *f, ulong ldn);
// f[] = f[] (*) f[]

void split_radix_fft_auto_convolution(double *f, ulong ldn);
// f[] = f[] (*) f[]

void fht_fft_auto_convolution0(double *f, ulong ldn);
// f[] = f[] (*) f[]

void split_radix_fft_auto_convolution0(double *f, ulong ldn);
// f[] = f[] (*) f[]

void fft_auto_convolution_core1(double *f, ulong ldn, double v/*=0.0*/); // aux

void fft_auto_convolution_core2(double *f, ulong ldn, double v/*=0.0*/); // aux

// ----- SRCFILE=fft/fftcorr.cc: -----
void fft_correlation0(double *f, double *g, ulong ldn);
// result in g

void fft_auto_correlation0(double *f, ulong ldn);

// ----- SRCFILE=fft/fftcocnvl.cc: -----
void fft_auto_convolution(Complex *f, ulong ldn,
                     double v/*=0.0*/);

void fft_convolution(Complex *f, Complex *g,
                ulong ldn, double v/*=0.0*/);
// _cyclic_ convolution
// (use zero padded data for usual conv.)
// f, g  must not overlap
// result in g

void fft_complex_auto_convolution(double *fr, double *fi,
                             ulong ldn, double v/*=0.0*/);
// _cyclic_ (self-)convolution
// (use zero padded data for usual conv.)
// fr,fi must not overlap

void fft_complex_convolution(double *fr, double *fi,
                        double *gr, double *gi,
                        ulong ldn, double v/*=0.0*/);
// _cyclic_ convolution
// (use zero padded data for usual conv.)
// fr,fi,gr,gi must be pairwise non-overlapping
// result in gr,gi

// ----- SRCFILE=fft/fftcocorr.cc: -----
void fft_complex_auto_correlation(double *fr, double *fi, ulong ldn);
// _cyclic_ (self-)correlation
// (use zero padded data for usual corr.)
// fr,fi must not overlap

void fft_complex_correlation(double *fr, double *fi,
                        double *gr, double *gi,
                        ulong ldn);
// _cyclic_ correlation
// (use zero padded data for usual conv.)
// fr,fi,gr,gi must be pairwise non-overlapping
// result in gr,gi

// ----- SRCFILE=fft/fftspect.cc: -----
void fft_spectrum(double *f, ulong ldn, int phasesq/*=0*/);
// power_spectrum computed with fft
// phasesq != 0  requests computation of phases
// phase[i] is in f[n-1]  (i=1...n/2-1)
// phase[0] == 0,  phase[n/2] == 0
// output is not normalized

// ----- SRCFILE=chirp/fftarblen.cc: -----
void fft_arblen(double *x, double *y, ulong n, int is);
// arbitrary length fft

// ----- SRCFILE=chirp/fftfract.cc: -----
void fft_fract(double *x, double *y, ulong n, double v);
// fractional (fast) fourier transform
// for complex array c[0...n]
// compute \sum_{x=0}^{n}{c_x*exp(is*v*2*i*\pi*x*k/n)}
//  (for v==1.0 this is just the usual fft)
//  LITTLE TESTED !
// use n*k == n^2/2 + k^2/2 - (k-n)^2/2
// (see: nussbaumer: FFT and convolution algorithms 5.1)
// ( could use n*k == - n^2/2 - k^2/2 + (k+n)^2/2 instead )
// nn is the smallest power of 2 >=2*n
// worst case if n=2^x+1:
//   then nn=4*2^x
//   work is about 12 times a fft of length 2^x
//   and allocated workspace =4*nn

// ----- SRCFILE=ndimfft/twodimfft.cc: -----
void twodim_fft(double *fr, double *fi, ulong r, ulong c, int is);

// ----- SRCFILE=ndimfft/ndimfft.cc: -----
void ndim_fft(double *fr, double *fi, ulong ndim, const ulong *ldn, int is);
// ndim must be 1,2,3,4 or 5
// ldn[] contains base 2 logarithms of dimensions

// ----- SRCFILE=chirp/makechirp.cc: -----
void make_fft_chirp(double *wr, double *wi, ulong n, int is); // aux
// for k=0..n-1:
// Complex(wr[k],wi[k]) == exp(sqrt(-1)*k*k*ph0/2)
// for k=n..nn-1:
// Complex(wr[k],wi[k]) == (0,0)

void complete_fft_chirp(double *wr, double *wi, ulong n, ulong nn); // aux
// exp(sqrt(-1)*pi*k*k/n) for k>=n :
// k==n+j, k*k==n*n+2*j*n+j*j
// exp(sqrt(-1)*pi/n*k*k)==exp(+-sqrt(-1)*pi*j*j/n)
// with plus for n even, minus for n odd

void make_fft_fract_chirp(double *wr, double *wi, double v, ulong n, ulong nn); // aux
// for k=0..nn:
// Complex(wr[k],wi[k]) == exp(sqrt(-1)*k*k*2*pi*/n/2)


// ----- SRCFILE=weighted/weightedfft.cc: -----
void weighted_fft(double *fr, double *fi, ulong ldn, int is, double w);

void weighted_inverse_fft(double *fr, double *fi, ulong ldn, int is, double w);
// inverse of above iff signs of both w _and_ is are changed

// ----- SRCFILE=learn/recfftdit2.cc: -----
static void evenodd_permute(const Complex *a, Complex *ev, Complex *od, ulong n);

static void recursive_dit2_fft_core(const Complex *a, ulong n, Complex *x, int is);

void recursive_dit2_fft(Complex *a, ulong ldn, int is);
// very inefficient, just here to demonstrate the
// recursive fast fourier transform

// ----- SRCFILE=learn/recfftdif2.cc: -----
static void leftright(const Complex *a, Complex *le, Complex *ri, ulong n);

static void recursive_dif2_fft_core(const Complex *a, ulong n, Complex *x, int is);

void recursive_dif2_fft(Complex *a, ulong ldn, int is);
// very inefficient, just here to demonstrate the
// recursive fast fourier transform

// ----- SRCFILE=learn/fftdif2.cc: -----
void dif2_fft_localized(Complex *f, ulong ldn, int is);
// decimation in frequency radix 2 fft
// depth-first version
// compared to usual fft this one
// - does more trig computations
// - is (far) better memory local

void dif2_fft(Complex *f, ulong ldn, int is);
// decimation in frequency radix 2 fft

void dif2_fft(double *fr, double *fi, ulong ldn, int is);
// decimation in frequency radix 2 fft

// ----- SRCFILE=learn/fftdit2.cc: -----
void dit2_fft_localized(Complex *f, ulong ldn, int is);
// decimation in time radix 2 fft
// depth-first version
// compared to usual fft this one
// - does more trig computations
// - is (far) better memory local

void dit2_fft(Complex *f, ulong ldn, int is);
// decimation in time radix 2 fft

void dit2_fft(double *fr, double *fi, ulong ldn, int is);
// decimation in time radix 2 fft

// ----- SRCFILE=learn/fftdif4l.cc: -----
void dif4l_fft(Complex *f, ulong ldn, int is);
// decimation in frequency radix 4 fft
// non-optimized learners version

// ----- SRCFILE=learn/fftdit4l.cc: -----
void dit4l_fft(Complex *f, ulong ldn, int is);
// decimation in time radix 4 fft
// non-optimized learners version

inline void fft(double *fr, double *fi, ulong ldn, int is);

inline void fft(Complex *f, ulong ldn, int is);

