#include <rtdx.h> 	/* target API */
#include <stdio.h> 
#include <IMG_sobel.h>
#include <IMG_thr_le2min.h> 
#include <IMG_histogram.h>  
#include <fastrts62x64x.h> /* spuint() & divsp() */
#include "target.h" /* defines TARGET_INITIALIZE() */
#include "image.h"  /* dimensions */

RTDX_CreateInputChannel(ichan); /* input image & T come down this pipe */
RTDX_CreateOutputChannel(ochan); /* processed image back through this pipe */

#pragma DATA_SECTION(img_buf1, "SDRAM");
#pragma DATA_ALIGN (img_buf1, 8);
unsigned char img_buf1[N_PIXELS];

#pragma DATA_SECTION(img_buf2, "SDRAM");
#pragma DATA_ALIGN (img_buf2, 8);
unsigned char img_buf2[N_PIXELS];

#pragma DATA_ALIGN (hist, 8);
unsigned short hist[256];

#pragma DATA_ALIGN (smoothed_hist, 8);
unsigned short smoothed_hist[256];

#pragma DATA_ALIGN (t_hist, 8);
unsigned short t_hist[1024]; /* temp storage needed for IMG_histogram */

/* 
 * Faster than memset(), count must be a multiple of  
 * 8 and greater than or equal to 32
 */
void memclear( void * ptr, int count )
{
  long * lptr = ptr;
  _nassert((int)lptr%8==0);
  #pragma MUST_ITERATE (32);
  for (count>>=3; count>0; count--)
    *lptr++ = 0;
}

/* 
 * Computes histogram, smoothes it, and returns the 
 * maximum pixel value 
 */
unsigned char calc_hist(const unsigned char * restrict pimg)
{
  short * restrict phist = (short *)hist; /* to remove warnings */
  short * restrict pthist = (short *)t_hist; /* see above */
  unsigned short *pH = hist;
  unsigned int accum;
  int ii;
  const unsigned int DIVISOR = 13107; /* (1/5)*2^16 */
  unsigned char max_pixel = 0;

  /* set histogram buffers to 0 */
  memclear(t_hist, 1024*sizeof(unsigned short));
  memclear(hist, 256*sizeof(unsigned short));

  IMG_histogram(pimg, N_PIXELS, 1, pthist, phist); /* computes histogram */ 
  
  /* now smooth histogram using 5-sample running average */
  accum = pH[0]+pH[1]+pH[2]+pH[3]+pH[4];
  for (ii=2; ii<254; ++ii, ++pH) {
    smoothed_hist[ii] = (accum*DIVISOR)>>16; /* accum/5 (Q.16) */
    accum -= pH[0];
    accum += pH[5];
    /*
     * We read one element beyond the end of the array,
     * so that's why we look for pH[4] instead of pH[5]
     * when updating the max pixel variable
     */
    if (pH[4])
      max_pixel = ii+2;
  }
  
  smoothed_hist[0] = hist[0];
  smoothed_hist[1] = hist[1];
  smoothed_hist[254] = hist[254];
  smoothed_hist[255] = hist[255];
  return max_pixel;
}

/*
 * Used by isodata() to compute center-of-mass under histogram
 */
unsigned long dotproduct(int lo, int hi)
{
  /* 0, 1, 2, ..., 255 */
  static const unsigned short pixval[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255};
  unsigned long sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0, sum; 
  const int N = hi-lo;
  int ii=0, jj=lo, remaining;
  double h1_h2_h3_h4, b1_b2_b3_b4;
  unsigned int h1_h2, h3_h4, b1_b2, b3_b4;

  /* unrolled dot-product loop with non-aligned double word reads */   
  for (; ii<N; ii+=4, jj+=4)     
  {
    h1_h2_h3_h4 = _memd8_const(&smoothed_hist[ii]);
    h1_h2 = _lo(h1_h2_h3_h4);
    h3_h4 = _hi(h1_h2_h3_h4);
    
    b1_b2_b3_b4 = _memd8_const(&pixval[ii]);
    b1_b2 = _lo(b1_b2_b3_b4);
    b3_b4 = _hi(b1_b2_b3_b4);
    
    sum1 += _mpyu(h1_h2, b1_b2);  /* (h1)(b1) */        
    sum2 += _mpyhu(h1_h2, b1_b2); /* (h2)(b2) */
    sum3 += _mpyu(h3_h4, b3_b4);  /* (h3)(b3) */
    sum4 += _mpyhu(h3_h4, b3_b4); /* (h4)(b4) */  
    
  }
  sum = sum1 + sum2 + sum3 + sum4;
  /* 
   * loop epilogue: if # iterations guaranteed to
   * be a multiple of 4, then this would not be required. 
   */
  remaining = N - ii;
  jj = N - remaining;
  for (ii=jj; ii<N; ii++)
    sum += smoothed_hist[ii]*pixval[ii];
    
  return sum;
}

/*
 * Finds a threshold for segmentation using the isodata algorithm
 */
unsigned char isodata(unsigned char max_pixel)
{
  const int max_iterations = 10;
  unsigned char T = max_pixel>>1,
                T_prev,
                kk = 0, mhi, mlo;
  unsigned long sumhi, sumlo, dotprod;
  int ii;
  
  if (T) { /* guard against pathological case where image all zero */
    do {

      /* mean (center-of-mass) above current T */
      sumhi = 0;    
      for (ii=T; ii<256; ++ii) 
        sumhi += smoothed_hist[ii];
      if (sumhi) {
		dotprod = dotproduct(T, 256);
        mhi = spuint( divsp((float)dotprod, (float)sumhi) );
      } 
      else /* upper portion of hist all zeros */
        mhi = T + (256-T)>>1;
        
      /* mean (center-of-mass) below current T */
      sumlo = 0;
      for (ii=0; ii<T; ++ii)
        sumlo += smoothed_hist[ii];
      if (sumlo) {
		dotprod = dotproduct(0, T);
        mlo = spuint( divsp((float)dotprod, (float)sumlo) );
      } 
      else /* lower portion of hist all zeros */
        mlo = (T-1)>>1;
          
      T_prev = T;
      T = (mhi+mlo)>>1;
      
    } while (T_prev!=T && ++kk<max_iterations);
  }
  
  printf("threshold = %d\n", T);
  return T;
}

/* 
 * Most of the work emanates from here, returns 
 * the processed image data pointer 
 */
unsigned char *process_image(int run_edge_detection)
{
  unsigned char max_pixel, T;
  
  if (run_edge_detection) {
  
    IMG_sobel(img_buf1, img_buf2, Y_SIZE, X_SIZE);
    max_pixel = calc_hist(img_buf2);
    T = isodata(max_pixel); /* calc threshold value */
    IMG_thr_le2min(img_buf2, img_buf1, Y_SIZE, X_SIZE, T); 
    return img_buf1; 
      
  } else { /* no edge detection, just straight up segmentation */
  
    max_pixel = calc_hist(img_buf1);
    T = isodata(max_pixel); /* calc threshold value */
    IMG_thr_le2min(img_buf1, img_buf2, Y_SIZE, X_SIZE, T); 
    return img_buf2;    
  
  }
}

void main()
{
  int status, ii;
  int run_edge_detector;
  unsigned char *pimg = NULL;
  
  TARGET_INITIALIZE();
  RTDX_enableOutput(&ochan); /* enable output channel */
  RTDX_enableInput(&ichan); /* enable input channel */
  printf("Input & Output channels enabled ...\n");
  
  while (1) {
    /* wait for the host to send us a threshold */
    if (sizeof(run_edge_detector) != (status = RTDX_read(&ichan, &run_edge_detector, sizeof(run_edge_detector))))
      printf("ERROR: RTDX_read of edge detection flag failed!\n");
    else
      printf("Edge detection = %s\n", (run_edge_detector)?"yes":"no");
      
    /* now we're expecting X_SIZE x Y_SIZE worth of image data */
    if (N_PIXELS != (status = RTDX_read(&ichan, img_buf1, N_PIXELS))) {
      printf("ERROR: RTDX_read of image failed (%d)!\n", status);
      exit(-1);
    }    
    printf("Received %dx%d image\n", X_SIZE, Y_SIZE);
    
    pimg = process_image(run_edge_detector);
    
    /* send processed image back to host */
    printf("Sending processed image data back to host ...\n");
    for (ii=0; ii<X_SIZE; ++ii) { 
      /* write one row's worth of data */
      if (!RTDX_write(&ochan, pimg+ii*Y_SIZE, Y_SIZE)) {
        printf("ERROR: RTDX_write of row %d failed!\n", ii);
        exit(-1);
      }
    } /* end (for each row) */
    printf("Image segmentation completed.\n");
  }
}
