// -*- C++ -*-

// ===== FUNCTIONS declared in include/fxt.h: =====


// General format of arguments: (double *f, ulong ldn)
// f := pointer to data array,
// ldn := base 2 log of array length (length n=2**n)
//
// A function some_func0(...) (note the '0')
// expects input data with higher half zeros.
//
// Data in the arrays without const modifier is generally modified.
//
// declarations with a comment 'aux' at the line end
// are called by other routines and might not be of
// direct use for users of the library

// ------------- HARTLEY transforms --------------------
// ----- SRCFILE=fht/fhtsplitradixdit.cc: -----
// tuning parameter:
// define to use trig recurrence:
// (and possibly lose some precision, see below)
//#define TRIG_REC
// with type 'long double' slight speed loss on my machine,
// with type 'double' little speed gain.

// tuning parameter:
#define  INITIAL_RADIX_16  1  // 0 or 1 (default)
//
#if  ( INITIAL_RADIX_16==1 )
#warning 'FYI: INITIAL_RADIX_16 set in split_radix_dit_fht(double *, ulong)'
#else
#warning 'FYI: INITIAL_RADIX_16 is NOT SET in split_radix_dit_fht(double *, ulong)'
#endif

void split_radix_dit_fht_core(double *f, ulong ldn); // aux
// fast hartley transform
// split radix decimation in time algorithm
// input data must be in revbin_permuted order

void split_radix_dit_fht(double *f, ulong ldn);
// fast hartley transform
// split radix decimation in time algorithm

// ----- SRCFILE=fht/fhtsplitradixdif.cc: -----
// tuning parameter:
// define to use trig recurrence:
// (and possibly lose some precision, see below)
//#define TRIG_REC
// with type 'long double' slight speed loss on my machine,
// with type 'double' little speed gain.
//
#if defined TRIG_REC
#warning 'FYI: fht(double *, ulong) uses trig recursion'
#endif

// tuning parameter:
#define  FINAL_RADIX_16  1  // 0 or 1 (default)
//
#if  ( FINAL_RADIX_16==1 )
#warning 'FYI: FINAL_RADIX_16 set in split_radix_dif_fht(double *, ulong)'
#else
#warning 'FYI: FINAL_RADIX_16 is NOT SET in split_radix_dif_fht(double *, ulong)'
#endif

void split_radix_dif_fht_core(double *f, ulong ldn); // aux
// fast hartley transform
// split radix decimation in frequency algorithm
// output data is in revbin_permuted order

void split_radix_dif_fht(double *f, ulong ldn);
// fast hartley transform
// split radix decimation in frequency algorithm

// ----- SRCFILE=fht/cfhtsplitradixdit.cc: -----
// tuning parameter:
// define to use trig recurrence:
// (and possibly lose some precision, see below)
//#define TRIG_REC
// with type 'long Complex' slight speed loss on my machine,
// with type 'Complex' little speed gain.

// tuning parameter:
#define  INITIAL_RADIX_16  1  // 0 or 1 (default)
//
#if  ( INITIAL_RADIX_16==1 )
#warning 'FYI: INITIAL_RADIX_16 set in split_radix_dit_fht(Complex *, ulong)'
#else
#warning 'FYI: INITIAL_RADIX_16 is NOT SET in split_radix_dit_fht(Complex *, ulong)'
#endif

void split_radix_dit_fht_core(Complex *f, ulong ldn); // aux
// fast hartley transform
// split radix decimation in time algorithm
// input data must be in revbin_permuted order

void split_radix_dit_fht(Complex *f, ulong ldn);
// fast hartley transform
// split radix decimation in time algorithm

// ----- SRCFILE=fht/cfhtsplitradixdif.cc: -----
// tuning parameter:
// define to use trig recurrence:
// (and possibly lose some precision, see below)
//#define TRIG_REC
// with type 'long Complex' slight speed loss on my machine,
// with type 'Complex' little speed gain.
//
#if defined TRIG_REC
#warning 'FYI: fht(Complex *, ulong) uses trig recursion'
#endif

// tuning parameter:
#define  FINAL_RADIX_16  1  // 0 or 1 (default)
//
#if  ( FINAL_RADIX_16==1 )
#warning 'FYI: FINAL_RADIX_16 set in split_radix_dif_fht(Complex *, ulong)'
#else
#warning 'FYI: FINAL_RADIX_16 is NOT SET in split_radix_dif_fht(Complex *, ulong)'
#endif

void split_radix_dif_fht_core(Complex *f, ulong ldn); // aux
// fast hartley transform
// split radix decimation in frequency algorithm
// output data is in revbin_permuted order

void split_radix_dif_fht(Complex *f, ulong ldn);
// fast hartley transform
// split radix decimation in frequency algorithm

// ----- SRCFILE=fht/fht0.cc: -----
// tuning parameter:
#if defined TRIG_REC
#warning 'FYI: fht0(double *, ulong) uses trig recursion'
#include <math.h>
#endif

// tuning parameter:
#define  INITIAL_RADIX_16  1 // 0 or 1 (default)
//
#if  ( INITIAL_RADIX_16==1 )
#warning 'FYI: INITIAL_RADIX_16 set in fht0(double *, ulong)'
#else
#warning 'FYI: INITIAL_RADIX_16 is NOT SET in fht0(double *, ulong)'
#endif

void fht0(double *f, ulong ldn);
// fast hartley transform
// version for zero padded data (i.e. f[n/2]...f[n-1] are zero)
// split radix decimation in time algorithm

// ----- SRCFILE=fht/cfht0.cc: -----
// tuning parameter:
#if defined TRIG_REC
#warning 'FYI: fht0(Complex *, ulong) uses trig recursion'
#include <math.h>
#endif

// tuning parameter:
#define  INITIAL_RADIX_16  1 // 0 or 1 (default)
//
#if  ( INITIAL_RADIX_16==1 )
#warning 'FYI: INITIAL_RADIX_16 set in fht0(Complex *, ulong)'
#else
#warning 'FYI: INITIAL_RADIX_16 is NOT SET in fht0(Complex *, ulong)'
#endif

void fht0(Complex *f, ulong ldn);
// fast hartley transform
// version for zero padded data (i.e. f[n/2]...f[n-1] are zero)
// split radix decimation in time algorithm

// ----- SRCFILE=fht/hartleyshift.cc: -----
void hartley_shift_05(double *a, ulong n); // aux
// hartley analogue to fourier_shift(a,n,0.5)
// used for negacyclic convolution
// and recursive (radix 2) fhts

// ----- SRCFILE=fht/skipfht.cc: -----
void skip_fht(double *f, ulong n, ulong d, double *w);
// compute fht of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_fht0(double *f, ulong n, ulong d, double *w);
// compute fht of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

// ----- SRCFILE=fht/twodimfht.cc: -----
void row_column_fht(double *f, ulong r, ulong c); // aux
// fht over rows and columns
// this is _not_ a two dimensional fht

void y_transform(double *f, ulong r, ulong c); // aux
// transforms row-column-fht to 2dim fht
// self-inverse

void twodim_fht(double *f, ulong r, ulong c);
// two dimensional fast hartley transform

// ----- SRCFILE=fht/twodimfhtcnvl.cc: -----
inline void fht_cnvl_core_core(const double *fp, const double *fm, double *gp, double *gm);

void twodim_fht_convolution_core(const double *f, double *g, ulong r, ulong c);
//   ! UNTESTED !

// --------------- FOURIER transforms ------------------

// typical format of arguments: (double *fr, double *fi, ulong ldn, int is)
// fr := pointer to data array (real part),
// fi := pointer to data array (imag part),
// ldn := base 2 log of array length
// is := sign of exponent in fourier kernel

// naming (semi-) conventions:
// blabla_fft() := fft implementation with blabla-algorithm
// blabla_fft0() := fft for zero paddad data
//             (i.e. second half of the data is zero)

// ----- SRCFILE=fht/fhtfft.cc: -----
void fht_fft(double *fr, double *fi, ulong ldn, int is);
// fft based on fht

void fht_fft0(double *fr, double *fi, ulong ldn, int is);
// fft based on fht
// version for zero padded data

void fht_fft_pre_processing(double *fr, double *fi, ulong ldn, int is); // aux
// preprocessing to use two length-n fhts
// to compute a length-n complex fft

void fht_fft_post_processing(double *fr, double *fi, ulong ldn, int is); // aux
// postprocessing to use two length-n fhts
// to compute a length-n complex fft

// ----- SRCFILE=fht/fhtcfft.cc: -----
void fht_fft(Complex *f, ulong ldn, int is);
// fft based on fht

void fht_fft0(Complex *f, ulong ldn, int is);
// fft based on fht
// version for zero padded data

void fht_fft_pre_processing(Complex *f, ulong ldn, int is); // aux
// preprocessing to one length-n (complex) fht
// to compute a length-n complex fft

void fht_fft_post_processing(Complex *f, ulong ldn, int is); // aux
// postprocessing to one length-n (complex) fht
// to compute a length-n complex fft

// ----- SRCFILE=fft/fftdif4.cc: -----
void dif4_fft_core(double *fr, double *fi, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order

void dif4_fft_core(Complex *f, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order

void dif4_fft(double *fr, double *fi, ulong ldn, int is);
// fast fourier transform
// radix 4 decimation in frequency algorithm

// ----- SRCFILE=fft/fftdit4.cc: -----
void dit4_fft_core(double *fr, double *fi, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = +1
// input data must be in revbin_permuted order

void dit4_fft_core(Complex *f, ulong ldn); // aux
// radix 4 decimation in frequency fft
// isign = -1
// input data must be in revbin_permuted order

void dit4_fft(double *fr, double *fi, ulong ldn, int is);
// fast fourier transform
// radix 4 decimation in time algorithm

// ----- SRCFILE=fft/cfftsplitradix.cc: -----
// tuning parameter:
//#define USE_SINCOS3  // default = off
// whether sincos is used for 3*angle
// else: use algebraic relation

void split_radix_dif_fft_core(Complex *f, ulong ldn); // aux
// split radix decimation in frequency fft
// isign = +1
// output data is in revbin_permuted order

void split_radix_dit_fft_core(Complex *f, ulong ldn); // aux
// split radix decimation in time fft
// isign = -1
// output data is in revbin_permuted order

void split_radix_fft(Complex *f, ulong ldn, int is);
// fast fourier transform
// split radix algorithm

// ----- SRCFILE=fft/fftsplitradix.cc: -----
// tuning parameter:
//#define USE_SINCOS3  // default = off
// whether sincos is used for 3*angle
// else: use algebraic relation

void split_radix_fft_dif_core(double *fr, double *fi, ulong ldn); // aux
// split-radix decimation in frequency fft
// output data is in revbin_permuted order

void split_radix_fft(double *fr, double *fi, ulong ldn, int is);
// fast fourier transform
// split radix algorithm

// ----- SRCFILE=fft/cfftwrap.cc: -----
void complex_to_real_imag(Complex *c, long n); // aux
// this routine transforms
// complex data into two seperate fields
// with real and imag data (inplace)

void real_imag_to_complex(double *fr, double *fi, long n); // aux
// this routine transforms
// two seperate fields with real and imag
// data into complex data (inplace)
// (the data must lie in contiguous memory)

void complex_fft(Complex *c, ulong ldn, int is);
// FFT wrapper to use the routines that use the data
// in the real/imag form for type complex data

void real_imag_fft(double *fr, double *fi, ulong ldn, int is);
// FFT wrapper to use the routines that use the data
// in the complex form for data in real/imag form

// ----- SRCFILE=fft/fouriershift.cc: -----
void fourier_shift(Complex *a, ulong n, double v); // aux
// a[k] *= exp(k*v*sqrt(-1)*2*pi/n)
// k = 0...n-1

void fourier_shift(double *fr, double *fi, ulong n, double v); // aux
// (fr[k],fi[k]) *= exp(k*v*sqrt(-1)*2*pi/n)
// k = 0...n-1

void fourier_shift0(double *fr, double *fi, ulong n, double v); // aux
// (fr[k],fi[k]) *= exp(k*v*sqrt(-1)*2*pi/n)
// k = 0...n-1
// assume fi[] is zero

void fourier_shift(double *fr, double *fi, ulong n, double v, ulong k0, ulong kn); // aux
// (fr[k],fi[k]) *= exp((k0+k)*v*sqrt(-1)*2*pi/n)
// k = 0...kn-1

void fourier_shift0(double *fr, double *fi, ulong n, double v, ulong k0, ulong kn); // aux
// (fr[k],fi[k]) *= exp((k0+k)*v*sqrt(-1)*2*pi/n)
// k = 0...kn-1
// assume fi[] is zero

// ----- SRCFILE=fft/skipfft.cc: -----
void skip_fft(double *fr, double *fi, ulong n, ulong d,
         double *wr, double *wi, int is); // aux
// compute fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_fft0(double *fr, double *fi, ulong n, ulong d,
          double *wr, double *wi, int is); // aux
// compute fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]
// version for zero padded data

// ----- SRCFILE=fft/fft8difcore.cc: -----
void fft8_dif_core(Complex *f); // aux
// 8-point decimation in frequency fft, isign = +1
// output data is in revbin_permuted order

void fft8_dif_core(double *fr, double *fi); // aux
// 8-point decimation in frequency fft, isign = +1
// output data is in revbin_permuted order

// ----- SRCFILE=fft/fft8ditcore.cc: -----
void fft8_dit_core(Complex *f); // aux
// 8-point decimation in time fft, isign = -1
// input data must be in revbin_permuted order

void fft8_dit_core(double *fr, double *fi); // aux
// 8-point decimation in time fft, isign = +1
// input data must be in revbin_permuted order

// ----- SRCFILE=fft/fft9.cc: -----
void fft9(Complex *x); // aux
// 9-point fft,  isign = -1

void fft9(double *xr, double *xi); // aux
// 9-point fft,  isign = -1

// ---------- MATRIX (aka four step) transforms --------------
// ----- SRCFILE=matrix/matrixfft.cc: -----
void matrix_fft(double *fr, double *fi, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache

void matrix_fft0(double *fr, double *fi, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache
// version for zero padded data

void matrix_fft(Complex *f, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache

void matrix_fft0(Complex *f, ulong ldn, int is);
// matrix (aka four-step) fft
// useful for arrays larger than 2nd-level cache
// version for zero padded data

// ----- SRCFILE=matrix/rowffts.cc: -----
void row_ffts(double *fr, double *fi, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

void row_weighted_ffts(double *fr, double *fi, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

void row_ffts(Complex *f, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

void row_weighted_ffts(Complex *f, ulong r, ulong c, int is); // aux
// r x c matrix (r rows of length c)

// ----- SRCFILE=matrix/rowcnvls.cc: -----
void row_weighted_auto_convolutions(double *fr, double *fi, ulong r, ulong c, double v); // aux
// r x c matrix (r rows, c columns)
// v!=0.0 chooses alternative normalization

void row_weighted_auto_convolutions(Complex *f, ulong r, ulong c, double v); // aux
// r x c matrix (r rows, c columns)
// v!=0.0 chooses alternative normalization

// ----- SRCFILE=matrix/columnffts.cc: -----
void column_ffts(double *fr, double *fi, ulong r, ulong c, int is, 
            int zp, double *tmpr, double *tmpi); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_ffts(Complex *f, ulong r, ulong c, int is, int zp, Complex *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_real_complex_ffts(double *f, ulong r, ulong c, int zp, double *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_complex_real_ffts(double *f, ulong r, ulong c, double *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c

void column_complex_imag_ffts(const double *fr, double *fi, ulong r, ulong c, double *tmp); // aux
// r x c matrix (r rows, c columns)
// length of each col is r
// length of each row is c
// only the imag part of the result is computed

// ----- SRCFILE=matrix/matrixcnvla.cc: -----
// tuning parameter:
#define  CP_ROWS  1  // 0 or 1 (use scratch space, default)
#if  ( CP_ROWS==1 )
#warning 'FYI: matrix_auto_convolution() does row FFTs in scratch space'
#else
#warning 'FYI: matrix_auto_convolution() does row FFTs inplace'
#endif

void matrix_auto_convolution(double *fr, ulong ldn);
// fr[] = fr[] (*) fr[]

void matrix_auto_convolution0(double *fr, ulong ldn);
// fr[] = fr[] (*) fr[]
// version for zero padded data (i.e. linear convolution)

void matrix_auto_convolution(double *fr, ulong r, ulong c, int zp/*=0*/);
// fr[] = fr[] (*) fr[]
// call with zp==1 if high half of data is zero (for linear convolution)

// ----- SRCFILE=matrix/matrixcnvl.cc: -----
void matrix_convolution(double *fr, double *fi, ulong ldn);
// fi[] = fr[] (*) fi[]

void matrix_convolution0(double *fr, double *fi, ulong ldn);
// fi[] = fr[] (*) fi[]
// version for zero padded data (i.e. linear convolution)

// tuning parameter:
#define  CP_ROWS  1  // 0 or 1 (default)
#if  ( CP_ROWS==1 )
#warning 'FYI: matrix_convolution() does row FFTs in scratch space'
#else
#warning 'FYI: matrix_convolution() does row FFTs inplace'
#endif

void matrix_convolution(double *fr, double *fi, ulong r, ulong c, int zp/*=0*/);
// fi[] = fr[] (*) fi[]
// call with zp==1 if high half of data is zero (for linear convolution)

// ----- SRCFILE=matrix/matrixcocnvla.cc: -----
// tuning parameter:
#define  USE_OLD_ALGORITHM    0  // 0 (revbin_permute for transpose)  or 1 
#define  USE_SPLRX  0  // whether split radix dif/dit is used (default=0)
//
#if  ( USE_OLD_ALGORITHM==1 )
#warning "FYI: using simple algorithm for matrix_auto_convolution(Complex *f, ...)"
#else  // USE_OLD_ALGORITHM
#warning "FYI: using revbin_permute for transpose for matrix_auto_convolution(Complex *f, ...)"
#if  ( USE_SPLRX==1 )
#warning 'FYI: matrix_complex_auto_convolution() uses split radix ffts'
#define  DIT_FFT_CORE  split_radix_fft_dit_core
#define  DIF_FFT_CORE  split_radix_fft_dif_core
#else
#warning 'FYI: matrix_complex_auto_convolution() uses radix 4 ffts'
#define  DIT_FFT_CORE  dit4_fft_core
#define  DIF_FFT_CORE  dif4_fft_core
#endif
#endif  // USE_OLD_ALGORITHM

void matrix_auto_convolution(Complex *f, ulong ldn);
// f[] = f[] (*) f[]

void matrix_auto_convolution0(Complex *f, ulong ldn);
// f[] = f[] (*) f[]
// version for zero padded data

void matrix_auto_convolution(Complex *f, ulong r, ulong c, int zp/*=0*/);
// f[] = f[] (*) f[]

void matrix_complex_auto_convolution(double *fr, double *fi, ulong ldn);
// f[] = f[] (*) f[]  where f[] := (fr[], fi[])

void matrix_complex_auto_convolution0(double *fr, double *fi, ulong ldn);
// f[] = f[] (*) f[]  where f[] := (fr[], fi[])
// version for zero padded data

void matrix_complex_auto_convolution(double *fr, double *fi, ulong r, ulong c, int zp/*=0*/);
// f[] = f[] (*) f[]  where f[] := (fr[], fi[])

// ----- SRCFILE=matrix/transpos2.cc: -----
void matrix_transpose2(double *x, ulong rw, ulong cl, double *tmp/*=0*/); // aux
//  matrix transpose
//  only for n*n or n*2n or 2n*n matrix !
// scratchspace, if given must have size max(rw,cl)

// ----- SRCFILE=matrix/ctranspos2.cc: -----
void matrix_transpose2(Complex *x, ulong rw, ulong cl, Complex *tmp/*=0*/); // aux
//  matrix transpose
//  only for n*n or n*2n or 2n*n matrix !
// scratchspace, if given must have size max(rw,cl)

// --------------- REAL FFT ---------------------
// ----- SRCFILE=realfft/realfftbyfht.cc: -----
void fht_real_complex_fft(double *f, ulong ldn);
// isign = +1
// ordering on output:
// f[0]     = re[0]
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2](==nyquist freq)
// f[n/2+1] = im[n/2-1]  (wrt. complex fft with is=+1)
// f[n/2+2] = im[n/2-2]
// f[n-1]   = im[1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n-i]
// note that the order of imaginary parts
// is reversed wrt. fft_real_complex_fft()

void fht_real_complex_fft0(double *f, ulong ldn);
// version for zero padded data
// ordering on output:
// see fht_real_complex_fft()

void fht_complex_real_fft(double *f, ulong ldn);
// isign = +1
// ordering on input:
// like the output of fht_real_complex_fft()

void realisator(double *gr, const double *gi, ulong n, int is); // aux
// get (only) the real part of a fft by the code:
// realisator(gr,gi,n, is);
// fht_complex_real_fft(gr, ldn);

void imaginator(const double *gr, double *gi, ulong n, int is); // aux
// get (only) the imag part of a fft by the code:
// imaginator(gr,gi,n, is);
// fht_complex_real_fft(gi, ldn);

// ----- SRCFILE=realfft/realfftwrap.cc: -----
void complex_real_pre_processing(double *re, double *im, ulong nh); // aux
// preprocessing to use a complex length nh fft 
// as complex-to-real fft of length 2*nh

void real_complex_post_processing(double *re, double *im, ulong nh); // aux
// postprocessing to use a complex length nh fft 
// as real-to-complex fft of length 2*nh

void wrap_real_complex_fft(double *f, ulong ldn);
// isign = +1
// ordering on output:
// f[0]     = re[0]
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2](==nyquist freq)
// f[n/2+1] = im[1]  (wrt. compl fft with isign=+1)
// f[n/2+2] = im[2]
// f[n-1]   = im[n/2-1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n/2+i]
// note that the order of imaginary parts
// is reversed wrt. fht_real_complex_fft()

void wrap_real_complex_fft0(double *f, ulong ldn);
// isign = +1
// version for zero padded data

void wrap_complex_real_fft(double *f, ulong ldn);
// isign = +1
// ordering on input:
// like the output of wrap_real_complex_fft()

// ----- SRCFILE=realfft/realffteasyord.cc: -----
void easy_ordering_real_complex_fft(double *f, ulong ldn);
// isign = +1
// ordering on output is same as in wrap_real_complex_fft():
// f[0]     = re[0]
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2](==nyquist freq)
// f[n/2+1] = im[1]  (wrt. compl fft with isign=+1)
// f[n/2+2] = im[2]
// f[n-1]   = im[n/2-1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n/2+i]

void easy_ordering_real_complex_fft0(double *f, ulong ldn);
// version for zero padded data
// ordering on output:
// see easy_ordering_real_complex_fft()

void easy_ordering_complex_real_fft(double *f, ulong ldn);
// isign = +1
// expected ordering on input:
// like the output of easy_ordering_real_complex_fft()

// ----- SRCFILE=realfft/realfftsplitradix.cc: -----
// tuning parameter:
#define  USE_SINCOS3  0  // 0 to use algebra (default) or 1 for sincos call

// tuning parameter:
#define  POSITIVE_ISIGN  0 // 0 (isign=-1, default) or 1 (isign=+1, extra work!)
// isign (default = -1) may be changed here
// set to 1 to have same isign (+1) as in
// fht_real_complex_fft(), wrap_real_complex_fft(),
// and easy_ordering_real_complex_fft()

void split_radix_real_complex_fft(double *x, ulong ldn);
// isign = -1 (unless POSITIVE_ISIGN is #defined to 1)
// length is n=2**ldn
// ordering on output:
// f[0]     = re[0] (==zero frequency, purely real)
// f[1]     = re[1]
// f[n/2-1] = re[n/2-1]
// f[n/2]   = re[n/2]    (==nyquist frequency, purely real)
// f[n/2+1] = im[n/2-1]
// f[n/2+2] = im[n/2-2]
// f[n-1]   = im[1]
// corresponding real and imag parts (with the exception of
// zero and nyquist freq) are found in f[i] and f[n-i]
// the order of imaginary parts is the same as in fht_real_complex_fft
// (reversed wrt. easy_ordering_real_complex_fft())

void split_radix_real_complex_fft0(double *x, ulong ldn);
// version for zero padded arrays

void split_radix_complex_real_fft(double *x, ulong ldn);
// isign = -1 (unless POSITIVE_ISIGN is #defined to 1)
// length is n=2**ldn
// expected ordering on input:
// like the output of split_radix_real_complex_fft()

void split_radix_real_complex_fft_dit_core(double *x, ulong ldn); // aux

void split_radix_complex_real_fft_dif_core(double *x, ulong ldn); // aux

// ----- SRCFILE=realfft/skiprealfft.cc: -----
void skip_real_complex_fft(double *f, ulong n, ulong d, double *w); // aux
// compute real to complex fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_real_complex_fft0(double *f, ulong n, ulong d, double *w); // aux
// compute real_fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

void skip_complex_real_fft(double *f, ulong n, ulong d, double *w); // aux
// compute complex to real fft of the n elements
// [0],[d],[2d],[3d],...,[(n-1)*d]

// --------------- REAL CONVOLUTION ------------------
// ----- SRCFILE=fht/fhtcnvl.cc: -----
// tuning parameter:
#define  FHT_CONVOLUTION_VERSION  0  // 0 (default) or 1
//
#if  ( FHT_CONVOLUTION_VERSION==0 )
#warning 'FYI fht_convolution(double *, ulong) using revbin_permuted_core'
#else
#warning 'FYI fht_convolution(double *, ulong) using normal core'
#endif

void fht_convolution(double *x, double *y, ulong ldn);
// y[] = x[] (*) y[]

void fht_convolution0(double *x, double *y, ulong ldn);
// y[] = x[] (*) y[]
// version for zero padded data (i.e. linear convolution)

void fht_convolution_core(double *x, double *y, ulong ldn,
                     double v/*=0.0*/); // aux
// v!=0.0 chooses alternative normalization

void fht_convolution_revbin_permuted_core(double *f, double *g, ulong ldn,
                               double v/*=0.0*/); // aux
// as fht_convolution_core() with data access in revbin order

// ----- SRCFILE=fht/fhtcnvla.cc: -----
// tuning parameter:
#define  FHT_AUTO_CONVOLUTION_VERSION  0  // 0 (default) or 1
//
#if  ( FHT_AUTO_CONVOLUTION_VERSION==0 )
#warning 'FYI fht_auto_convolution(double *, ulong) using revbin_permuted_core'
#else
#warning 'FYI fht_auto_convolution(double *, ulong) using normal core'
#endif

void fht_auto_convolution(double *x, ulong ldn);
// y[] = x[] (*) x[]

void fht_auto_convolution0(double *x, ulong ldn);
// y[] = x[] (*) x[]
// version for zero padded data (i.e. linear convolution)

void fht_auto_convolution_core(double *x, ulong ldn,
                          double v/*=0.0*/); // aux
// v!=0.0 chooses alternative normalization

void fht_auto_convolution_revbin_permuted_core(double *f, ulong ldn,
                                    double v/*=0.0*/); // aux
// as above with data access in revbin order

// ----- SRCFILE=fht/fhtnegacnvla.cc: -----
void fht_negacyclic_auto_convolution(double *x, ulong ldn, double v/*=0.0*/);
// v!=0.0 chooses alternative normalization

void fht_negacyclic_auto_convolution_core(double *x, ulong ldn, double v/*=0.0*/); // aux
// v!=0.0 chooses alternative normalization

// ----- SRCFILE=fft/fftcnvl.cc: -----
void fht_fft_convolution(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void split_radix_fft_convolution(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void fht_fft_convolution0(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void split_radix_fft_convolution0(double *f, double *g, ulong ldn);
// g[] = f[] (*) g[]

void fft_convolution_core1(double *f, double *g, ulong ldn, double v/*=0.0*/); // aux

void fft_convolution_core2(double *f, double *g, ulong ldn, double v/*=0.0*/); // aux

// ----- SRCFILE=fft/fftcnvla.cc: -----
void fht_fft_auto_convolution(double *f, ulong ldn);
// f[] = f[] (*) f[]

void split_radix_fft_auto_convolution(double *f, ulong ldn);
// f[] = f[] (*) f[]

void fht_fft_auto_convolution0(double *f, ulong ldn);
// f[] = f[] (*) f[]

void split_radix_fft_auto_convolution0(double *f, ulong ldn);
// f[] = f[] (*) f[]

void fft_auto_convolution_core1(double *f, ulong ldn, double v/*=0.0*/); // aux

void fft_auto_convolution_core2(double *f, ulong ldn, double v/*=0.0*/); // aux

// ----- SRCFILE=mult/diskcnvla.cc: -----
void disk_weighted_complex_auto_convolution(int fd1, int fd2,
                                       double *fr, ulong fn, ulong al,
                                       double w,
                                       double nx,
                                       int zq1/*=0*/, int zq3/*=0*/);
// fd1/fd2: real/imag part of data  (#= fn*al)
// fr[0,...,fn-1]: workspace
// w:  weight for whole convolution
// nx: additional normalization factor (cf. disk_row_pass())

// -------------- REAL CORRELATION ---------------
// ----- SRCFILE=fht/fhtcorr.cc: -----
void fht_correlation0(double *f, double *g, ulong ldn);
// result in g

void fht_auto_correlation0(double *f, ulong ldn);

// ----- SRCFILE=fft/fftcorr.cc: -----
void fft_correlation0(double *f, double *g, ulong ldn);
// result in g

void fft_auto_correlation0(double *f, ulong ldn);

// ------------ COmplex COnvolution & COrrelation -----------------
// ----- SRCFILE=fht/cfhtcnvl.cc: -----
// tuning parameter:
#define  FHT_CONVOLUTION_VERSION  0  // 0 (default) or 1
//
#if  ( FHT_CONVOLUTION_VERSION==0 )
#warning 'FYI fht_convolution(Complex *, ulong) using revbin_permuted_core'
#else
#warning 'FYI fht_convolution(Complex *, ulong) using normal core'
#endif

void fht_convolution(Complex *x, Complex *y, ulong ldn);
// y[] = x[] (*) y[]

void fht_convolution0(Complex *x, Complex *y, ulong ldn);
// y[] = x[] (*) y[]
// version for zero padded data (i.e. linear convolution)

void fht_convolution_core(Complex *x, Complex *y, ulong ldn,
                     double v/*=0.0*/); // aux
// v!=0.0 chooses alternative normalization

void fht_convolution_revbin_permuted_core(Complex *f, Complex *g, ulong ldn,
                               double v/*=0.0*/); // aux
// as fht_convolution_core() with data access in revbin order

// ----- SRCFILE=fht/cfhtcnvla.cc: -----
// tuning parameter:
#define  FHT_AUTO_CONVOLUTION_VERSION  0  // 0 (default) or 1
//
#if  ( FHT_AUTO_CONVOLUTION_VERSION==0 )
#warning 'FYI fht_auto_convolution(Complex *, ulong) using revbin_permuted_core'
#else
#warning 'FYI fht_auto_convolution(Complex *, ulong) using normal core'
#endif

void fht_auto_convolution(Complex *x, ulong ldn);
// y[] = x[] (*) x[]

void fht_auto_convolution0(Complex *x, ulong ldn);
// y[] = x[] (*) x[]
// version for zero padded data (i.e. linear convolution)

void fht_auto_convolution_core(Complex *x, ulong ldn,
                          double v/*=0.0*/); // aux
// v!=0.0 chooses alternative normalization

void fht_auto_convolution_revbin_permuted_core(Complex *f, ulong ldn,
                                    double v/*=0.0*/); // aux
// as above with data access in revbin order

// ----- SRCFILE=fft/fftcocnvl.cc: -----
// tuning parameter:
#define  USE_SPLRX  0  // whether split radix dif/dit is used (default=0)
#if ( USE_SPLRX==1 )
#warning 'FYI: complex_(auto)_convolution() use split radix ffts'
#define  DIT_FFT_CORE  split_radix_fft_dit_core
#define  DIF_FFT_CORE  split_radix_fft_dif_core
#else
#warning 'FYI: complex_(auto)_convolution() use radix 4 ffts'
#define  DIT_FFT_CORE  dit4_fft_core
#define  DIF_FFT_CORE  dif4_fft_core
#endif

void fft_auto_convolution(Complex *f, ulong ldn,
                     double v/*=0.0*/);

void fft_convolution(Complex *f, Complex *g,
                ulong ldn, double v/*=0.0*/);
// _cyclic_ convolution
// (use zero padded data for usual conv.)
// f, g  must not overlap
// result in g

void fft_complex_auto_convolution(double *fr, double *fi,
                             ulong ldn, double v/*=0.0*/);
// _cyclic_ (self-)convolution
// (use zero padded data for usual conv.)
// fr,fi must not overlap

void fft_complex_convolution(double *fr, double *fi,
                        double *gr, double *gi,
                        ulong ldn, double v/*=0.0*/);
// _cyclic_ convolution
// (use zero padded data for usual conv.)
// fr,fi,gr,gi must be pairwise non-overlapping
// result in gr,gi

// ----- SRCFILE=fft/fftcocorr.cc: -----
void fft_complex_auto_correlation(double *fr, double *fi, ulong ldn);
// _cyclic_ (self-)correlation
// (use zero padded data for usual corr.)
// fr,fi must not overlap

void fft_complex_correlation(double *fr, double *fi,
                        double *gr, double *gi,
                        ulong ldn);
// _cyclic_ correlation
// (use zero padded data for usual conv.)
// fr,fi,gr,gi must be pairwise non-overlapping
// result in gr,gi

// --------------- SPECTRUM ------------------
// ----- SRCFILE=fht/fhtspect.cc: -----
void  fht_spectrum(double *f, ulong ldn, int phasesq/*=0*/);
// power_spectrum computed with fht
// phasesq != 0  requests computation of phases
// phase[i] is in f[n-1]  (i=1...n/2-1)
// phase[0] == 0,  phase[n/2] == 0
// output is not normalized

// ----- SRCFILE=fft/fftspect.cc: -----
void fft_spectrum(double *f, ulong ldn, int phasesq/*=0*/);
// power_spectrum computed with fft
// phasesq != 0  requests computation of phases
// phase[i] is in f[n-1]  (i=1...n/2-1)
// phase[0] == 0,  phase[n/2] == 0
// output is not normalized

// -------- OTHER FOURIER STUFF ------------------
// ----- SRCFILE=chirp/fftarblen.cc: -----
void fft_arblen(double *x, double *y, ulong n, int is);
// arbitrary length fft

// ----- SRCFILE=chirp/fftfract.cc: -----
void fft_fract(double *x, double *y, ulong n, double v);
// fractional (fast) fourier transform
// for complex array c[0...n]
// compute \sum_{x=0}^{n}{c_x*exp(is*v*2*i*\pi*x*k/n)}
//  (for v==1.0 this is just the usual fft)
//  LITTLE TESTED !
// use n*k == n^2/2 + k^2/2 - (k-n)^2/2
// (see: nussbaumer: FFT and convolution algorithms 5.1)
// ( could use n*k == - n^2/2 - k^2/2 + (k+n)^2/2 instead )
// nn is the smallest power of 2 >=2*n
// worst case if n=2^x+1:
//   then nn=4*2^x
//   work is about 12 times a fft of length 2^x
//   and allocated workspace =4*nn

// ----- SRCFILE=ndimfft/twodimfft.cc: -----
void twodim_fft(double *fr, double *fi, ulong r, ulong c, int is);

// ----- SRCFILE=ndimfft/ndimfft.cc: -----
void ndim_fft(double *fr, double *fi, ulong ndim, const ulong *ldn, int is);
// ndim must be 1,2,3,4 or 5
// ldn[] contains base 2 logarithms of dimensions

// ----- SRCFILE=chirp/makechirp.cc: -----
void make_fft_chirp(double *wr, double *wi, ulong n, int is); // aux
// for k=0..n-1:
// Complex(wr[k],wi[k]) == exp(sqrt(-1)*k*k*ph0/2)
// for k=n..nn-1:
// Complex(wr[k],wi[k]) == (0,0)

void complete_fft_chirp(double *wr, double *wi, ulong n, ulong nn); // aux
// exp(sqrt(-1)*pi*k*k/n) for k>=n :
// k==n+j, k*k==n*n+2*j*n+j*j
// exp(sqrt(-1)*pi/n*k*k)==exp(+-sqrt(-1)*pi*j*j/n)
// with plus for n even, minus for n odd

void make_fft_fract_chirp(double *wr, double *wi, double v, ulong n, ulong nn); // aux
// for k=0..nn:
// Complex(wr[k],wi[k]) == exp(sqrt(-1)*k*k*2*pi*/n/2)

// ---------- COSINE/SINE TRANSFORM ----------
// ----- SRCFILE=dctdst/dst.cc: -----
void dst(double *f, ulong ldn, double *tmp/*=0*/);
// basis: sin(k*i*M_PI/n)
// self-inverse

// ----- SRCFILE=dctdst/dcth.cc: -----
static void cos_rot(const double *x, double *y, ulong n);

void dcth(double *x, ulong ldn, double *tmp/*=0*/);
// transform wrt. basis: cos(k*(i+0.5)*PI/n) * (k==0?1.0:sqrt(2))
// H.Malvars algorithm: dct by fht
// if a (size-n) srcatchspace is supplied (tmp!=0)
// then the slightly faster version of inverse_evenoddrev_permute is used

void idcth(double *x, ulong ldn, double *tmp/*=0*/);
// inverse transform wrt. basis: cos(k*(i+0.5)*PI/n) * (k==0?1.0:sqrt(2))
// H.Malvars algorithm: idct by fht
// if a (size-n) srcatchspace is supplied (tmp!=0)
// then the slightly faster version of inverse_evenoddrev_permute is used

// ----- SRCFILE=dctdst/dctzapata.cc: -----
static void coseno(double *x, ulong ldn);
// called by dct_zapata():
// input:
// c[k] = \sum_{m=0}^{n-1}{ \prod_{i=0}^{ldn-1}{ 2^{b_i} \cos(b_i 2^i \Theta_m) x(m)} }
// where n=2^ldn, \Theta_m=\pi (1+4m)/(2n), b_i is bit #i of k
// output:
// c[k] = \sum_{m=0}^{n-1}{ \cos(\Theta_m) x(m)}

void dct_zapata(double *x, ulong ldn, double *tmp/*=0*/);
// transform wrt. basis: cos(k*(i+0.5)*PI/n) * (k==0?1.0:sqrt(2))
// algorithm as described in F.Arguello, E.L.Zapata:
// "Fast Cosine Transform on the Succesive Doubling Method"
// if a (size-n) srcatchspace is supplied (tmp!=0)
// then the slightly faster version of inverse_evenoddrev_permute is used
// the inverse_evenoddrev_permute step is not mentioned in the paper !

// ----- SRCFILE=dctdst/dsth.cc: -----
void dsth(double *x, ulong ldn, double *tmp/*=0*/);
// basis: sin((k+1)*(i+0.5)*M_PI/n) * sqrt(2)

void idsth(double *x, ulong ldn, double *tmp/*=0*/);

// ---------- WALSH TRANSFORM ----------
// ----- SRCFILE=walsh/walshdif2.cc: -----
void dif2_walsh_wak(double *f, ulong ldn);
// transform wrt. to walsh-kronecker basis (wak-functions)
// the basis: (sequency at end of lines, '*':=1, ' ':=-1)
// 0: [* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *] ( 0)
// 1: [*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *  ] (31)
// 2: [* *     * *     * *     * *     * *     * *     * *     * *    ] (15)
// 3: [*     * *     * *     * *     * *     * *     * *     * *     *] (16)
// 4: [* * * *         * * * *         * * * *         * * * *        ] ( 7)
// 5: [*   *     *   * *   *     *   * *   *     *   * *   *     *   *] (24)
// 6: [* *         * * * *         * * * *         * * * *         * *] ( 8)
// 7: [*     *   * *   *     *   * *   *     *   * *   *     *   * *  ] (23)
// 8: [* * * * * * * *                 * * * * * * * *                ] ( 3)
// 9: [*   *   *   *     *   *   *   * *   *   *   *     *   *   *   *] (28)
//10: [* *     * *         * *     * * * *     * *         * *     * *] (12)
//11: [*     * *     *   * *     * *   *     * *     *   * *     * *  ] (19)
//12: [* * * *                 * * * * * * * *                 * * * *] ( 4)
//13: [*   *     *   *   *   * *   *   *   *     *   *   *   * *   *  ] (27)
//14: [* *         * *     * * * *     * *         * *     * * * *    ] (11)
//15: [*     *   * *     * *   *     * *     *   * *     * *   *     *] (20)
//16: [* * * * * * * * * * * * * * * *                                ] ( 1)
//17: [*   *   *   *   *   *   *   *     *   *   *   *   *   *   *   *] (30)
//18: [* *     * *     * *     * *         * *     * *     * *     * *] (14)
//19: [*     * *     * *     * *     *   * *     * *     * *     * *  ] (17)
//20: [* * * *         * * * *                 * * * *         * * * *] ( 6)
//21: [*   *     *   * *   *     *   *   *   * *   *     *   * *   *  ] (25)
//22: [* *         * * * *         * *     * * * *         * * * *    ] ( 9)
//23: [*     *   * *   *     *   * *     * *   *     *   * *   *     *] (22)
//24: [* * * * * * * *                                 * * * * * * * *] ( 2)
//25: [*   *   *   *     *   *   *   *   *   *   *   * *   *   *   *  ] (29)
//26: [* *     * *         * *     * *     * *     * * * *     * *    ] (13)
//27: [*     * *     *   * *     * *     * *     * *   *     * *     *] (18)
//28: [* * * *                 * * * *         * * * * * * * *        ] ( 5)
//29: [*   *     *   *   *   * *   *     *   * *   *   *   *     *   *] (26)
//30: [* *         * *     * * * *         * * * *     * *         * *] (10)
//31: [*     *   * *     * *   *     *   * *   *     * *     *   * *  ] (21)
// self-inverse

void dif2_walsh_wal(double *f, ulong ldn);
// transform wrt. to walsh-kaczmarz basis (wal-functions)
// the basis: (sequency at end of lines, '*':=1, ' ':=-1)
// 0: [* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *] ( 0)
// 1: [* * * * * * * * * * * * * * * *                                ] ( 1)
// 2: [* * * * * * * *                                 * * * * * * * *] ( 2)
// 3: [* * * * * * * *                 * * * * * * * *                ] ( 3)
// 4: [* * * *                 * * * * * * * *                 * * * *] ( 4)
// 5: [* * * *                 * * * *         * * * * * * * *        ] ( 5)
// 6: [* * * *         * * * *                 * * * *         * * * *] ( 6)
// 7: [* * * *         * * * *         * * * *         * * * *        ] ( 7)
// 8: [* *         * * * *         * * * *         * * * *         * *] ( 8)
// 9: [* *         * * * *         * *     * * * *         * * * *    ] ( 9)
//10: [* *         * *     * * * *         * * * *     * *         * *] (10)
//11: [* *         * *     * * * *     * *         * *     * * * *    ] (11)
//12: [* *     * *         * *     * * * *     * *         * *     * *] (12)
//13: [* *     * *         * *     * *     * *     * * * *     * *    ] (13)
//14: [* *     * *     * *     * *         * *     * *     * *     * *] (14)
//15: [* *     * *     * *     * *     * *     * *     * *     * *    ] (15)
//16: [*     * *     * *     * *     * *     * *     * *     * *     *] (16)
//17: [*     * *     * *     * *     *   * *     * *     * *     * *  ] (17)
//18: [*     * *     *   * *     * *     * *     * *   *     * *     *] (18)
//19: [*     * *     *   * *     * *   *     * *     *   * *     * *  ] (19)
//20: [*     *   * *     * *   *     * *     *   * *     * *   *     *] (20)
//21: [*     *   * *     * *   *     *   * *   *     * *     *   * *  ] (21)
//22: [*     *   * *   *     *   * *     * *   *     *   * *   *     *] (22)
//23: [*     *   * *   *     *   * *   *     *   * *   *     *   * *  ] (23)
//24: [*   *     *   * *   *     *   * *   *     *   * *   *     *   *] (24)
//25: [*   *     *   * *   *     *   *   *   * *   *     *   * *   *  ] (25)
//26: [*   *     *   *   *   * *   *     *   * *   *   *   *     *   *] (26)
//27: [*   *     *   *   *   * *   *   *   *     *   *   *   * *   *  ] (27)
//28: [*   *   *   *     *   *   *   * *   *   *   *     *   *   *   *] (28)
//29: [*   *   *   *     *   *   *   *   *   *   *   * *   *   *   *  ] (29)
//30: [*   *   *   *   *   *   *   *     *   *   *   *   *   *   *   *] (30)
//31: [*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *  ] (31)
// the wal functions are sequency- ordered
// self-inverse

void dif2_walsh_pal(double *f, ulong ldn);
// transform wrt. to walsh-paley basis (pal-functions)
// the basis: (sequency at end of lines, '*':=1, ' ':=-1)
// 0: [* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *] ( 0)
// 1: [* * * * * * * * * * * * * * * *                                ] ( 1)
// 2: [* * * * * * * *                 * * * * * * * *                ] ( 3)
// 3: [* * * * * * * *                                 * * * * * * * *] ( 2)
// 4: [* * * *         * * * *         * * * *         * * * *        ] ( 7)
// 5: [* * * *         * * * *                 * * * *         * * * *] ( 6)
// 6: [* * * *                 * * * * * * * *                 * * * *] ( 4)
// 7: [* * * *                 * * * *         * * * * * * * *        ] ( 5)
// 8: [* *     * *     * *     * *     * *     * *     * *     * *    ] (15)
// 9: [* *     * *     * *     * *         * *     * *     * *     * *] (14)
//10: [* *     * *         * *     * * * *     * *         * *     * *] (12)
//11: [* *     * *         * *     * *     * *     * * * *     * *    ] (13)
//12: [* *         * * * *         * * * *         * * * *         * *] ( 8)
//13: [* *         * * * *         * *     * * * *         * * * *    ] ( 9)
//14: [* *         * *     * * * *     * *         * *     * * * *    ] (11)
//15: [* *         * *     * * * *         * * * *     * *         * *] (10)
//16: [*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *  ] (31)
//17: [*   *   *   *   *   *   *   *     *   *   *   *   *   *   *   *] (30)
//18: [*   *   *   *     *   *   *   * *   *   *   *     *   *   *   *] (28)
//19: [*   *   *   *     *   *   *   *   *   *   *   * *   *   *   *  ] (29)
//20: [*   *     *   * *   *     *   * *   *     *   * *   *     *   *] (24)
//21: [*   *     *   * *   *     *   *   *   * *   *     *   * *   *  ] (25)
//22: [*   *     *   *   *   * *   *   *   *     *   *   *   * *   *  ] (27)
//23: [*   *     *   *   *   * *   *     *   * *   *   *   *     *   *] (26)
//24: [*     * *     * *     * *     * *     * *     * *     * *     *] (16)
//25: [*     * *     * *     * *     *   * *     * *     * *     * *  ] (17)
//26: [*     * *     *   * *     * *   *     * *     *   * *     * *  ] (19)
//27: [*     * *     *   * *     * *     * *     * *   *     * *     *] (18)
//28: [*     *   * *   *     *   * *   *     *   * *   *     *   * *  ] (23)
//29: [*     *   * *   *     *   * *     * *   *     *   * *   *     *] (22)
//30: [*     *   * *     * *   *     * *     *   * *     * *   *     *] (20)
//31: [*     *   * *     * *   *     *   * *   *     * *     *   * *  ] (21)
// self-inverse

// ----- SRCFILE=walsh/walshdit2.cc: -----
void dit2_walsh_wak(double *f, ulong ldn);
// transform wrt. to walsh-kronecker basis (wak-functions)
// self-inverse
// same transform as dif2_walsh_wak()

void dit2_walsh_wal(double *f, ulong ldn);
// transform wrt. to walsh-kaczmarz basis (wal-functions)
// the wal functions are sequency- ordered
// self-inverse
// same transform as dif2_walsh_wal()

void dit2_walsh_pal(double *f, ulong ldn);
// transform wrt. to walsh-paley basis (pal-functions)
// self-inverse
// same transform as dif2_walsh_pal()

// ----- SRCFILE=walsh/walshseq.cc: -----
void walsh_seq(double *f, ulong ldn);
// walsh transform wrt. the basis (sequency at end of lines, '*':=1, ' ':=-1)
// 0: [* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *] ( 0)
// 1: [* * * * * * * * * * * * * * * *                                ] ( 1)
// 2: [                * * * * * * * * * * * * * * * *                ] ( 2)
// 3: [* * * * * * * *                 * * * * * * * *                ] ( 3)
// 4: [        * * * * * * * *                 * * * * * * * *        ] ( 4)
// 5: [        * * * * * * * *         * * * *                 * * * *] ( 5)
// 6: [        * * * *         * * * * * * * *         * * * *        ] ( 6)
// 7: [* * * *         * * * *         * * * *         * * * *        ] ( 7)
// 8: [    * * * *         * * * *         * * * *         * * * *    ] ( 8)
// 9: [    * * * *         * * * *     * *         * * * *         * *] ( 9)
//10: [* *         * *     * * * *         * * * *     * *         * *] (10)
//11: [    * * * *     * *         * *     * * * *     * *         * *] (11)
//12: [    * *     * * * *     * *         * *     * * * *     * *    ] (12)
//13: [    * *     * * * *     * *     * *     * *         * *     * *] (13)
//14: [    * *     * *     * *     * * * *     * *     * *     * *    ] (14)
//15: [* *     * *     * *     * *     * *     * *     * *     * *    ] (15)
//16: [  * *     * *     * *     * *     * *     * *     * *     * *  ] (16)
//17: [  * *     * *     * *     * *   *     * *     * *     * *     *] (17)
//18: [*     * *     *   * *     * *     * *     * *   *     * *     *] (18)
//19: [  * *     * *   *     * *     *   * *     * *   *     * *     *] (19)
//20: [*     *   * *     * *   *     * *     *   * *     * *   *     *] (20)
//21: [*     *   * *     * *   *     *   * *   *     * *     *   * *  ] (21)
//22: [*     *   * *   *     *   * *     * *   *     *   * *   *     *] (22)
//23: [  * *   *     *   * *   *     *   * *   *     *   * *   *     *] (23)
//24: [  *   * *   *     *   * *   *     *   * *   *     *   * *   *  ] (24)
//25: [  *   * *   *     *   * *   *   *   *     *   * *   *     *   *] (25)
//26: [*   *     *   *   *   * *   *     *   * *   *   *   *     *   *] (26)
//27: [  *   * *   *   *   *     *   *   *   * *   *   *   *     *   *] (27)
//28: [  *   *   *   * *   *   *   *     *   *   *   * *   *   *   *  ] (28)
//29: [  *   *   *   * *   *   *   *   *   *   *   *     *   *   *   *] (29)
//30: [  *   *   *   *   *   *   *   * *   *   *   *   *   *   *   *  ] (30)
//31: [*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *  ] (31)

void inverse_walsh_seq(double *f, ulong ldn);
// walsh transform wrt. the basis (sequency at end of lines, '*':=1, ' ':=-1)
// 0: [* *   *       *     *         *     *   * * *       *         *] (16)
// 1: [* *   *       *     *         * * *   *       * * *   * * * *  ] (15)
// 2: [* *   *       * * *   * * * *   * *   *       *     *         *] (16)
// 3: [* *   *       * * *   * * * *       *   * * *   * *   * * * *  ] (15)
// 4: [* *   * * * *   * *   *       *     *         * * *   *       *] (16)
// 5: [* *   * * * *   * *   *       * * *   * * * *       *   * * *  ] (15)
// 6: [* *   * * * *       *   * * *   * *   * * * *   * *   *       *] (16)
// 7: [* *   * * * *       *   * * *       *         *     *   * * *  ] (15)
// 8: [* * *   * *   *       * * *   *       *     *         * * *   *] (16)
// 9: [* * *   * *   *       * * *   * * * *   * *   * * * *       *  ] (15)
//10: [* * *   * *   * * * *       *   * * *   * *   *       * * *   *] (16)
//11: [* * *   * *   * * * *       *         *     *   * * *       *  ] (15)
//12: [* * *       *   * * *   * *   *       * * *   * * * *   * *   *] (16)
//13: [* * *       *   * * *   * *   * * * *       *         *     *  ] (15)
//14: [* * *       *         *     *   * * *       *   * * *   * *   *] (16)
//15: [* * *       *         *     *         * * *   *       *     *  ] (15)
//16: [*   * *   * * *   *       * * *   *     *         *       * * *] (16)
//17: [*   * *   * * *   *       * * * *   * *   * * * *   * * *      ] (15)
//18: [*   * *   * * * *   * * *       *   * *   * * *   *       * * *] (16)
//19: [*   * *   * * * *   * * *         *     *       *   * * *      ] (15)
//20: [*   * * *       *   * *   * * *   *       * * * *   * *   * * *] (16)
//21: [*   * * *       *   * *   * * * *   * * *         *     *      ] (15)
//22: [*   * * *         *     *       *   * * *       *   * *   * * *] (16)
//23: [*   * * *         *     *         *       * * *   *     *      ] (15)
//24: [*       *   * *   * * * *   * *   * * *   *       * * * *   * *] (16)
//25: [*       *   * *   * * * *   * * *       *   * * *         *    ] (15)
//26: [*       *   * * *         *     *       *   * *   * * * *   * *] (16)
//27: [*       *   * * *         *       * * *   *     *         *    ] (15)
//28: [*         *     *       *   * *   * * * *   * * *       *   * *] (16)
//29: [*         *     *       *   * * *         *       * * *   *    ] (15)
//30: [*         *       * * *   *     *         *     *       *   * *] (16)
//31: [*         *       * * *   *       * * * *   * *   * * *   *    ] (15)

// ----- SRCFILE=walsh/walshcirc.cc: -----
void walsh_circ(double *f, ulong ldn);
// transform wrt. the basis (sequency at end of lines), '*':=1, ' ':=-1:
// 0: [* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *] ( 0)
// 1: [* * * * * * * *                                 * * * * * * * *] ( 2)
// 2: [* * * *                 * * * * * * * *                 * * * *] ( 4)
// 3: [* * * *         * * * *                 * * * *         * * * *] ( 6)
// 4: [* *         * * * *         * * * *         * * * *         * *] ( 8)
// 5: [* *         * *     * * * *         * * * *     * *         * *] (10)
// 6: [* *     * *         * *     * * * *     * *         * *     * *] (12)
// 7: [* *     * *     * *     * *         * *     * *     * *     * *] (14)
// 8: [*     * *     * *     * *     * *     * *     * *     * *     *] (16)
// 9: [*     * *     *   * *     * *     * *     * *   *     * *     *] (18)
//10: [*     *   * *     * *   *     * *     *   * *     * *   *     *] (20)
//11: [*     *   * *   *     *   * *     * *   *     *   * *   *     *] (22)
//12: [*   *     *   * *   *     *   * *   *     *   * *   *     *   *] (24)
//13: [*   *     *   *   *   * *   *     *   * *   *   *   *     *   *] (26)
//14: [*   *   *   *     *   *   *   * *   *   *   *     *   *   *   *] (28)
//15: [*   *   *   *   *   *   *   *     *   *   *   *   *   *   *   *] (30)
//16: [*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *  ] (31)
//17: [*   *   *   *     *   *   *   *   *   *   *   * *   *   *   *  ] (29)
//18: [*   *     *   *   *   * *   *   *   *     *   *   *   * *   *  ] (27)
//19: [*   *     *   * *   *     *   *   *   * *   *     *   * *   *  ] (25)
//20: [*     *   * *   *     *   * *   *     *   * *   *     *   * *  ] (23)
//21: [*     *   * *     * *   *     *   * *   *     * *     *   * *  ] (21)
//22: [*     * *     *   * *     * *   *     * *     *   * *     * *  ] (19)
//23: [*     * *     * *     * *     *   * *     * *     * *     * *  ] (17)
//24: [* *     * *     * *     * *     * *     * *     * *     * *    ] (15)
//25: [* *     * *         * *     * *     * *     * * * *     * *    ] (13)
//26: [* *         * *     * * * *     * *         * *     * * * *    ] (11)
//27: [* *         * * * *         * *     * * * *         * * * *    ] ( 9)
//28: [* * * *         * * * *         * * * *         * * * *        ] ( 7)
//29: [* * * *                 * * * *         * * * * * * * *        ] ( 5)
//30: [* * * * * * * *                 * * * * * * * *                ] ( 3)
//31: [* * * * * * * * * * * * * * * *                                ] ( 1)
// self-inverse

// ----- SRCFILE=walsh/walsh0.cc: -----
void walsh_wak0(double *f, ulong ldn);
// transform wrt. to walsh-kronecker basis (wak-functions)
// version for zero padded data

void walsh_wal0(double *f, ulong ldn);
// transform wrt. to walsh-kaczmarz basis (wal-functions)
// the wal functions are sequency- ordered

void walsh_pal0(double *f, ulong ldn);
// transform wrt. to walsh-paley basis (pal-functions)
// version for zero padded data

// ----- SRCFILE=walsh/dyadiccnvl.cc: -----
void dyadic_convolution(double *f, double *g, ulong ldn);

void dyadic_auto_convolution(double *f, ulong ldn);

void dyadic_convolution0(double *f, double *g, ulong ldn);

void dyadic_auto_convolution0(double *f, ulong ldn);

// ---------- HAAR TRANSFORM ----------
// ----- SRCFILE=haar/haar.cc: -----
void haar(double *f, ulong ldn, double *ws/*=0*/);
// transform wrt. to haar base

void inverse_haar(double *f, ulong ldn, double *ws/*=0*/);
// inverse transform wrt. to haar base

// ----- SRCFILE=haar/inthaar.cc: -----
void int_haar(double *f, ulong ldn, double *ws/*=0*/);
// transform wrt. to haar base
// integer input gives integer output

void inverse_int_haar(double *f, ulong ldn, double *ws/*=0*/);
// inverse transform wrt. to haar base
// integer input gives integer output

// ----- SRCFILE=haar/haarinplace.cc: -----
void inplace_haar(double *f, ulong ldn);
// transform wrt. to haar base
// alternative ordering of basis 
// allows inplace operation

void inverse_inplace_haar(double *f, ulong ldn);
// inverse transform wrt. to haar base
// alternative ordering of basis 
// allows inplace operation

// ---------- WAVELET TRANSFORM ----------

// wavelet/waveletfilter.h and
// wavelet/waveletfilter.cc:
class wavelet_filter;

// ----- SRCFILE=wavelet/wavelet.cc: -----
void wavelet(double *f, ulong ldn, const wavelet_filter &wf);
// wavelet transform

// ----- SRCFILE=wavelet/invwavelet.cc: -----
void inverse_wavelet(double *f, ulong ldn, const wavelet_filter &wf);
// inverse wavelet transform

// ----- SRCFILE=wavelet/harmonicwavelet.cc: -----
void harmonic_wavelet(double *fr, double *fi, ulong ldn);
// harmonic wavelet transform
// cf. crandall, "topics in advanced
// scientific computation", p.177
// here real input is not assumed

void inverse_harmonic_wavelet(double *fr, double *fi, ulong ldn);

// ---------- WEIGHTED TRANSFORM ----------
// ----- SRCFILE=weighted/weightedfft.cc: -----
void weighted_fft(double *fr, double *fi, ulong ldn, int is, double w);

void weighted_inverse_fft(double *fr, double *fi, ulong ldn, int is, double w);
// inverse of above iff signs of both w _and_ is are changed

// ----- SRCFILE=weighted/weightedconv.cc: -----
void weighted_complex_auto_convolution(double *fr, double *fi, ulong ldn,
                                  double w, double v/*=0.0*/);
// w = weight:
// +0.25 for right angle convolution (-0.25 negates result in fi[])
// +0.5  for negacyclic  convolution (also -0.5)
// +1.0  for cyclic  convolution (also -1.0)
// v!=0.0 chooses alternative normalization

void negacyclic_complex_auto_convolution(double *fr, double *fi, ulong ldn,
                                    double v/*=0.0*/);
// negacyclic autoconvolution of fr[],fi[]
// v!=0.0 chooses alternative normalization

void right_angle_complex_auto_convolution(double *fr, double *fi, ulong ldn,
                                     double v/*=0.0*/);
// right angle autoconvolution of fr[],fi[]
// useful if fi[] all zero: then the result is the
//   acyclic autoconvolution of fr[]
//   result is in fr[] (index 0,1,...,n-1) and fi[] (index n,...,2*n-1)
// v!=0.0 chooses alternative normalization

void weighted_complex_auto_convolution(Complex *f, ulong ldn, double w, double v/*=0.0*/);
// w = weight:
// +0.25 for right angle convolution (-0.25 negates result in fi[])
// +0.5  for negacyclic  convolution (also -0.5)
// +1.0  for cyclic  convolution (also -1.0)
// v!=0.0 chooses alternative normalization

void negacyclic_complex_auto_convolution(Complex *f, ulong ldn, double v/*=0.0*/);
// negacyclic autoconvolution of f[]
// v!=0.0 chooses alternative normalization

void right_angle_complex_auto_convolution(Complex *f, ulong ldn, double v/*=0.0*/);
// right angle autoconvolution of f[]
// v!=0.0 chooses alternative normalization

// ------------- LEARNERS STRAIGHT & SIMPLE VERSIONS: ------------
// ----- SRCFILE=learn/recfftdit2.cc: -----
static void evenodd_permute(const Complex *a, Complex *ev, Complex *od, ulong n);

static void recursive_dit2_fft_core(const Complex *a, ulong n, Complex *x, int is);

void recursive_dit2_fft(Complex *a, ulong ldn, int is);
// very inefficient, just here to demonstrate the
// recursive fast fourier transform

// ----- SRCFILE=learn/recfftdif2.cc: -----
static void leftright(const Complex *a, Complex *le, Complex *ri, ulong n);

static void recursive_dif2_fft_core(const Complex *a, ulong n, Complex *x, int is);

void recursive_dif2_fft(Complex *a, ulong ldn, int is);
// very inefficient, just here to demonstrate the
// recursive fast fourier transform

// ----- SRCFILE=learn/fftdif2.cc: -----
void dif2_fft_localized(Complex *f, ulong ldn, int is);
// decimation in frequency radix 2 fft
// depth-first version
// compared to usual fft this one
// - does more trig computations
// - is (far) better memory local

void dif2_fft(Complex *f, ulong ldn, int is);
// decimation in frequency radix 2 fft

void dif2_fft(double *fr, double *fi, ulong ldn, int is);
// decimation in frequency radix 2 fft

// ----- SRCFILE=learn/fftdit2.cc: -----
void dit2_fft_localized(Complex *f, ulong ldn, int is);
// decimation in time radix 2 fft
// depth-first version
// compared to usual fft this one
// - does more trig computations
// - is (far) better memory local

void dit2_fft(Complex *f, ulong ldn, int is);
// decimation in time radix 2 fft

void dit2_fft(double *fr, double *fi, ulong ldn, int is);
// decimation in time radix 2 fft

// ----- SRCFILE=learn/fftdif4l.cc: -----
void dif4l_fft(Complex *f, ulong ldn, int is);
// decimation in frequency radix 4 fft
// non-optimized learners version

// ----- SRCFILE=learn/fftdit4l.cc: -----
void dit4l_fft(Complex *f, ulong ldn, int is);
// decimation in time radix 4 fft
// non-optimized learners version

// ----- SRCFILE=learn/fhtdit2.cc: -----
void dit2_fht_localized(double *f, ulong ldn);
// decimation in time radix 2 fht
// depth-first version
// compared to usual fht this one
// - does more trig computations
// - is (far) better memory local

// ----- SRCFILE=learn/fhtdif2.cc: -----
void dif2_fht_localized(double *f, ulong ldn);
// decimation in frequency radix 2 fht
// depth-first version
// compared to usual fht this one
// - does more trig computations
// - is (far) better memory local

// ----- SRCFILE=learn/recfhtdit2.cc: -----
static void evenodd_permute(const double *a, double *ev, double *od, ulong n);

static void recursive_dit2_fht_core(const double *a, ulong n, double *x);

void recursive_dit2_fht(double *a, ulong ldn);
// very inefficient, just here to demonstrate the
// recursive fast hartley transform

// ----- SRCFILE=learn/recfhtdif2.cc: -----
static void leftright(const double *a, double *le, double *ri, ulong n);

static void recursive_dif2_fht_core(const double *a, ulong n, double *x);

void recursive_dif2_fht(double *a, ulong ldn);
// very inefficient, just here to demonstrate the
// recursive fast hartley transform

// ---------- SLOW TRANSFORMS (mostly for testing) ----------
// ----- SRCFILE=slow/slowht.cc: -----
void slow_ht(double *f, ulong n);
// (slow) hartley transform

// ----- SRCFILE=slow/slowtwodimht.cc: -----
void slow_twodim_ht(double *f, ulong r, ulong c);
// slow 2dim hartley transform

// ----- SRCFILE=slow/slowrowcolht.cc: -----
void slow_row_column_ht(double *f, ulong r, ulong c);

// ----- SRCFILE=slow/slowtwodimft.cc: -----
void slow_twodim_ft(double *fr, double *fi, ulong r, ulong c, int is);

// ----- SRCFILE=slow/slowcnvl.cc: -----
void slow_convolution(const double *f, const double *g, double *r, ulong nu);
// _cyclic_ convolution
// result in g

void slow_convolution0(const double *f, const double *g, double *r, ulong nu);
// linear convolution
// (zero padded data expected)

void slow_auto_convolution(const double *f, double *r, ulong nu);
// _cyclic_ self-convolution

void slow_auto_convolution0(const double *f, double *r, ulong nu);
// linear self-convolution
// (zero padded data expected)

// ----- SRCFILE=slow/slowtwodimcnvl.cc: -----
void slow_twodim_convolution(const double *f, double *g, ulong ru, ulong cu);
// _cyclic_ convolution
// result in g

// ----- SRCFILE=slow/slowcorr.cc: -----
void slow_correlation(const double *f, double *g, ulong nu);
// _cyclic_ correlation
// (use zero padded data for usual co.)
// result in g

void slow_correlation0(const double *f, double *g, ulong nu);
// correlation
// (expects zero padded data)
// result in g

void slow_auto_correlation(double *f, ulong nu);
// _cyclic_ self-correlation

void slow_auto_correlation0(double *f, ulong nu);
// _cyclic_ self-correlation
// (expects zero padded data)

// ----- SRCFILE=slow/slowcocorr.cc: -----
void slow_complex_correlation(const double *wr, const double *wi,
                         double *fr, double *fi, ulong nu);
// _cyclic_ correlation
// (use zero padded data for usual co.)
// result in fr,fi

void slow_complex_auto_correlation(double *fr, double *fi, ulong nu);
// _cyclic_ self-correlation
// (use zero padded data for usual co.)

// ----- SRCFILE=slow/slowcocnvl.cc: -----
void slow_convolution(const Complex *w , Complex *f, ulong nu);
// _cyclic_ convolution
// (use zero padded data for usual co.)
// result in f

void slow_complex_convolution(const double *wr, const double *wi,
                         double *fr, double *fi, ulong nu);
// _cyclic_ convolution
// (use zero padded data for usual co.)
// result in fr,fi

void slow_complex_auto_convolution(double *fr, double *fi, ulong nu);
// _cyclic_ self-convolution
// (use zero padded data for usual co.)

void slow_auto_convolution(Complex *f, ulong nu);
// _cyclic_ self-convolution
// (use zero padded data for usual co.)

// ----- SRCFILE=slow/slowft.cc: -----
void slow_ft(double *fr, double *fi, ulong n, int is);
// (slow) fourier transform

void slow_rotated_ft(double *fr, double *fi, ulong n, int is, int r);
// (slow) rotated fourier transform

void slow_ft(Complex *f, long n, int is);
// (slow) fourier transform

// ----- SRCFILE=slow/slowfracft.cc: -----
void slow_fract_ft(double *fr, double *fi, ulong n, double v);
// (slow) fractional fourier transform
// (for v=+-1 this is the usual fourier transform)

void slow_fract_ft(Complex *f, ulong n, double v);
// (slow) fractional fourier transform
// (for v=+-1 this is the usual fourier transform)

// ----- SRCFILE=slow/slowwalsh.cc: -----
void dit2_walsh_wak_slow(double *f, ulong ldn);
// transform wrt. to walsh-kronecker basis (wak-functions)
// same result as dit2_walsh_wak() but the loops
//  with loop variables j and r are swapped (as in ffts)
// ==> nonlocal ==> SLOW!

void dif2_walsh_wak_slow(double *f, ulong ldn);
// transform wrt. to walsh-kronecker basis (wak-functions)
// same result as dif2_walsh_wak() but the loops
//  with loop variables j and r are swapped (as in ffts)
// ==> nonlocal ==> SLOW!

// ----- SRCFILE=slow/slowzt.cc: -----
void slow_zt(double *f, ulong n, double z);
// (slow) z-transform

// ------- inlines give default implementations -------
inline void fft(double *fr, double *fi, ulong ldn, int is);

inline void fft(Complex *f, ulong ldn, int is);

inline void fht(double *f, ulong ldn);

inline void fht(Complex *f, ulong ldn);

inline void walsh_wal(double *f, ulong ldn);

inline void walsh_pal(double *f, ulong ldn);

inline void walsh_wak(double *f, ulong ldn);

