#define CHIP_6416
#include <dsk6416.h>
#include <stdio.h> /* printf() */
#include <string.h> /* memset() */
#include <img_corr_3x3.h>
#include <csl_timer.h>

#include "..\image.h" /* image\kernel dimensions, example pixel data */
#pragma DATA_ALIGN (in_img, 8);
#pragma DATA_SECTION (in_img, "SDRAM");

#pragma DATA_ALIGN (out_img, 8);
#pragma DATA_SECTION (out_img, "SDRAM"); 
unsigned char out_img[N_PIXELS]; /* filtered image */

/* filter dimensions and coefficients */
#define NH 3 /* kernel is of size NHxNH (needs to be 3 for this program) */
#define BOUNDARY (NH/2) /* 1st and last BOUNDARY rows/cols in output set to 0 */
#pragma DATA_ALIGN (H, 8)
char H[NH*NH] = {
	1, 2, 1, /* 1/16 2/16 1/16 */
	2, 4, 2, /* 2/16 4/16 2/16 */
	1, 2, 1, /* 1/16 2/16 1/16 */
};
#define SHIFT 4 /* right-shift by 4 (div by 16) */

#define N_COLS_FILTERED Y_SIZE-2*BOUNDARY

/* 
 * Faster than memset(), count must be a multiple of  
 * 8 and greater than or equal to 32
 */
void memclear( void * ptr, int count )
{
  long *lptr = ptr;
  _nassert((int)lptr%8==0);
  #pragma MUST_ITERATE (32);
  for (count>>=3; count>0; count--)
    *lptr++ = 0;
}

void filter_image()
{
  unsigned char *p = out_img+BOUNDARY*Y_SIZE;
  int ii, irow;
  
  /* set 1st BOUNDARY rows to zero */
  memclear(out_img, BOUNDARY*Y_SIZE);
  
  /* filter the interior region of the image matrix */
  for (irow=BOUNDARY; irow<X_SIZE-BOUNDARY; ++irow)
  {
    /* 1st BOUNDARY cols are zero */
    for (ii=0; ii<BOUNDARY; ++ii) *p++ = 0;
    
    /* 
     * IMG_conv_3x3 requires 3rd arg to be a multiple of 8,
     * that's why we pass in Y_SIZE instead of N_COLS_FILTERED
     * (last few filtered pixels are ignored)
     */  
    IMG_conv_3x3(&in_img[(irow-BOUNDARY)*Y_SIZE], 
	             p, 
	             Y_SIZE,
	             H, 
	             SHIFT);
    
    /* last BOUNDARY cols are zero */
    p += N_COLS_FILTERED;
    for (ii=0; ii<BOUNDARY; ++ii) *p++ = 0;   
  }
  
  /* last BOUNDARY rows are zero */
  memclear(out_img+(X_SIZE-BOUNDARY)*Y_SIZE, BOUNDARY*Y_SIZE);
}

int main(void)
{
  TIMER_Handle hTimer;
  unsigned int start, stop, overhead, total = 0, t; /* timing */
  const int N = 10; /* how many times to profile */
  int ii = 0;
      
  DSK6416_init(); /* initialize the DSK board support library */
  
  /* configure timer */
  hTimer = TIMER_open(TIMER_DEVANY,0); 
  TIMER_configArgs(hTimer, 0x000002C0, 0xFFFFFFFF, 0x00000000);  

  /* compute overhead of calling the timer. */  
  start    = TIMER_getCount(hTimer);  /* called twice to avoid L1D miss.  */
  start    = TIMER_getCount(hTimer); 
  stop     = TIMER_getCount(hTimer); 
  overhead = stop - start;
	
  for (; ii<N; ++ii) {
    start = TIMER_getCount(hTimer); /* begin "profile area" */
      filter_image();
    stop = TIMER_getCount(hTimer); /* end "profile area" */
    t = (stop-start-overhead) * 8;
    total += t;
    printf("# cycles to filter image: %d\n", t);
  }
  
  printf("avg time is %.2f cycles.\n", (float)total/(float)N);
}
