#define CHIP_6416
#include <dsk6416.h>
#include <string.h> /* memcpy() */
#include "IMG_wave_horz.h"
#include "IMG_wave_vert.h"

#include "image.h" /* image\kernel dimensions, example pixel data */
#pragma DATA_ALIGN (wcoefs, 8);
#pragma DATA_SECTION (wcoefs, "SDRAM");

/* D4 WAVELET FILTER COEFFICIENTS */
#pragma DATA_ALIGN (d4_qmf_Q15, 8);
#pragma DATA_ALIGN (d4_mqmf_Q15, 8);
short d4_qmf_Q15[]  = {-4240,7345,27411,15826,0,0,0,0},
	  d4_mqmf_Q15[] = {-15826,27411,-7345,-4240,0,0,0,0};

/* BUFFERS USED DURING WAVELET TRANSFORM */
#pragma DATA_ALIGN (wvlt_in_buf, 8);
short wvlt_in_buf[Y_SIZE*8]; /* scratch (input) buffer */

#pragma DATA_ALIGN (wvlt_out_buf, 8);
short wvlt_out_buf[Y_SIZE*8]; /* scratch (output) buffer */

short *pwvbufs[8]; /* IMG_wave_vert() input */

#pragma DATA_ALIGN (horzcoefs, 8);
#pragma DATA_SECTION (horzcoefs, "SDRAM"); 
short horzcoefs[N_PIXELS]; /* IMG_wave_horz() output */

/* horizontal wavelet transform, output goes into horzcoefs */
void transform_rows()
{
  const int nBlocks = X_SIZE>>3; /* rows/8 */
  int iRow=0, iBlock=0, kk;
  short *pin, *pout;

  /* pass rows through DWT, in groups of 8 scan-lines*/
  do {
    /* fetch the next group of 8 rows */
    memcpy(wvlt_in_buf, &wcoefs[iRow*Y_SIZE], 8*Y_SIZE*sizeof(short));

	pin = wvlt_in_buf;
	pout = wvlt_out_buf;
	for (kk=0; kk<8; ++kk, pin+=Y_SIZE, pout+=Y_SIZE)
	  IMG_wave_horz(pin, d4_qmf_Q15, d4_mqmf_Q15, pout, Y_SIZE);
	
	/* page out horizontal wavelet coeffs to ext mem storage */
	memcpy(horzcoefs+iRow*Y_SIZE, wvlt_out_buf, 8*Y_SIZE*sizeof(short));
	iRow += 8;
  } while (++iBlock <= nBlocks);
}

/* grab next two lines for IMG_wave_vert, see Figure 6-9 */
inline
int fetch_horz_wavelet_scanlines(int r)
{
	short *ptemp1 = pwvbufs[0], *ptemp2 = pwvbufs[1];
	pwvbufs[0] = pwvbufs[2];
	pwvbufs[1] = pwvbufs[3];
	pwvbufs[2] = pwvbufs[4];
	pwvbufs[3] = pwvbufs[5];
	pwvbufs[4] = pwvbufs[6];
	pwvbufs[5] = pwvbufs[7];
	pwvbufs[6] = ptemp1;
	pwvbufs[7] = ptemp2;
	memcpy(pwvbufs[6], horzcoefs+r*Y_SIZE, sizeof(short)*Y_SIZE);
	memcpy(pwvbufs[7], horzcoefs+(r+1)*Y_SIZE, sizeof(short)*Y_SIZE);
	return r+2;
}

/* vertical wavelet transform, output goes into wcoefs */
void transform_cols()
{
  const int nRowsDiv2 = X_SIZE>>1,
            circular = nRowsDiv2-1;
  int lpRow = nRowsDiv2-3, /* low-pass vert output */
      hpRow = nRowsDiv2,   /* high-pass vert output */
      fetchRow = 2, iRow;
  short *plpvc = wvlt_out_buf, /* ptr to low-pass vert coeffs */
        *phpvc = wvlt_out_buf+Y_SIZE; /* high-pass vert coeffs */

  /* setup scan-lines for DWT down the columns */
  memcpy(wvlt_in_buf, horzcoefs+N_PIXELS-6*Y_SIZE, 6*Y_SIZE);
  memcpy(wvlt_in_buf+6*Y_SIZE, horzcoefs, 2*Y_SIZE);
  pwvbufs[0] = wvlt_in_buf; 
  pwvbufs[1] = wvlt_in_buf+Y_SIZE;
  pwvbufs[2] = wvlt_in_buf+2*Y_SIZE;
  pwvbufs[3] = wvlt_in_buf+3*Y_SIZE;
  pwvbufs[4] = wvlt_in_buf+4*Y_SIZE;
  pwvbufs[5] = wvlt_in_buf+5*Y_SIZE;
  pwvbufs[6] = wvlt_in_buf+6*Y_SIZE;
  pwvbufs[7] = wvlt_in_buf+7*Y_SIZE;

  for (iRow=0; iRow<nRowsDiv2; ++iRow) {
    IMG_wave_vert(pwvbufs, d4_qmf_Q15, d4_mqmf_Q15, plpvc, phpvc, Y_SIZE);
    memcpy(wcoefs+lpRow*Y_SIZE, plpvc, sizeof(short)*Y_SIZE);
    memcpy(wcoefs+hpRow*Y_SIZE, phpvc, sizeof(short)*Y_SIZE);
    fetchRow = fetch_horz_wavelet_scanlines(fetchRow);
    lpRow += 1; if (lpRow>circular) lpRow=0; /* lpRow = (lpRow+1) % nRowsDiv2 */
    hpRow += 1;
  }
}

/* single wavelet decomposition */
void dwt2d()
{
  transform_rows();
  transform_cols();
}

int main(void)
{
  DSK6416_init(); /* initialize the DSK board support library */
  dwt2d();
}
