#include <board.h> /* EVM library */
#include <stdio.h> /* printf() */
#include <string.h> /* memset() */
#include <img_corr_gen.h>
#include <time.h> /* clock() */

#include "..\image.h"
#pragma DATA_ALIGN (in_img, 4);
#pragma DATA_SECTION (in_img, "SBSRAM");

#pragma DATA_ALIGN (out_img, 4);
#pragma DATA_SECTION (out_img, "SBSRAM"); 
unsigned char out_img[N_PIXELS]; /* the filtered image */

/* filter coefficients in Q.15 format */
#define NH 5
#define BOUNDARY (NH/2) /* 1st and last BOUNDARY rows/cols in output set to 0 */
#pragma DATA_ALIGN (H, 4)
short H[NH][NH] = {
/*	Gaussian, sigma = 1 */
/*
	{102, 441,  723,  441,  102},
	{441, 1959, 3226, 1959, 441},
	{723, 3226, 5316, 3226, 723},
	{441, 1959, 3226, 1959, 441},
	{102, 441,  723,  441, 102}
*/
/* Averaging filter, 1/25 */
	{1310, 1310, 1310, 1310, 1310},
	{1310, 1310, 1310, 1310, 1310},
	{1310, 1310, 1310, 1310, 1310},
	{1310, 1310, 1310, 1310, 1310},
	{1310, 1310, 1310, 1310, 1310}
};

/* temporary buffers for IMG_corr_gen */
#pragma DATA_ALIGN (out_corr, 4)
short out_corr[NH][Y_SIZE-2*BOUNDARY];
#define N_OUT_CORR NH*(Y_SIZE-2*BOUNDARY)

#define N_PIXELS_2_FILTER Y_SIZE-2*BOUNDARY

/*
 * This function was adapted from the behavorial C code
 * given in img_corr_gen.h and SPRU400 (5.3.2)
 */
void corr_gen (short *in_data, short *h, short *out_data, int m, int cols)
{
  /**********************************************************************/
  /* For all columns compute an M-tap filter. Add */
  /* correlation sum to value, to allow for a generalized 2-D */
  /* correlation to be built using several 1-D correlations */
  /**********************************************************************/

  int i, j, sum; // sum is a 32-bit accumulator
  for (j = 0; j < cols; j++) {
    sum = out_data[j];
	for (i = 0; i < m; i++)
	  sum += in_data[i + j] * h[i]; /* perform Q.15 multiplication */
    out_data[j] = (short) (sum>>15);  /* cast output to 16 bits */
  }	
}

void filter_image()
{
  unsigned char *p; /* ptr into output image buffer */
  int ii, irow, icol, ih, sum;
  
  /* set 1st BOUNDARY rows to zero */
  memset(out_img, 0, BOUNDARY*Y_SIZE*sizeof(char));
  
  /* filter the interior region of the image matrix */
  for (irow=BOUNDARY; irow<X_SIZE-BOUNDARY; ++irow) {
    /* zero out the correlation output array */
    memset(out_corr, 0, N_OUT_CORR);

    /* IMG_corr_gen doesn't work, corr_gen does	*/		
	for (ih=0; ih<NH; ++ih)
	  /*IMG_*/corr_gen(&in_img[(irow-BOUNDARY+ih)*Y_SIZE], 
	                   H[ih], 
	                   out_corr[ih], 
	                   NH, 
	                   N_PIXELS_2_FILTER);

    /* 1st BOUNDARY cols are zero */
    p = out_img+irow*Y_SIZE;
    for (ii=0; ii<BOUNDARY; ++ii) *p++ = 0;

    /* sum up correlation results */
    for (icol=0; icol<N_PIXELS_2_FILTER; ++icol) {
      sum = 0;
      for (ih=0; ih<NH; ++ih)
        sum += (out_corr[ih][icol]);
      *p++ = sum;
    }
    
    /* last BOUNDARY cols are zero */
    for (ii=0; ii<BOUNDARY; ++ii) *p++ = 0;   
  }
  
  /* last BOUNDARY rows are zero */
  memset(out_img+(X_SIZE-BOUNDARY)*Y_SIZE, 0, BOUNDARY*Y_SIZE*sizeof(char));
}

int main(void)
{
  clock_t start, stop, overhead, t = 0; /* timing */
  const int N = 10; /* how many times to profile */
  int ii = 0;
      
  evm_init(); /* initialize the board */
  start = clock(); /* calculate overhead of calling clock*/
  stop = clock();  /* and subtract this value from The results*/
  overhead = stop - start;
	
  for (; ii<N; ++ii) {
    start = clock(); /* begin "profile area" */
      filter_image();
    stop = clock(); /* end "profile area" */
    t += stop-start-overhead;
    printf("# cycles to filter image: %d\n", stop-start-overhead);
  }
  
  printf("avg time is %.2f cycles.\n", (float)t/(float)N);
}
