#include <stdio.h>
#define CHIP_6416
#include <dsk6416.h>
#include "image.h"

#define KERNEL_SIZE 3 /* adaptive filter kernel is of size 3x3 */
#define MARGIN      (KERNEL_SIZE/2) 

unsigned char out_img[N_PIXELS], 
              local_mean[N_PIXELS]; 
unsigned short local_variance[N_PIXELS];

/* 
 * place all buffers in external RAM aligned 
 * on double-word boundaries 
 */
#pragma DATA_SECTION (in_img, "SDRAM");
#pragma DATA_ALIGN (in_img, 8);
#pragma DATA_SECTION (out_img, "SDRAM");
#pragma DATA_ALIGN (out_img, 8);
#pragma DATA_SECTION (local_mean, "SDRAM");
#pragma DATA_ALIGN (local_mean, 8);
#pragma DATA_SECTION (local_variance, "SDRAM");
#pragma DATA_ALIGN (local_variance, 8);

#define Q 12  /* right-shift used in collect_local_pixel_stats */
#define S 455 /* 1/9 * 2^12, box filter divisor in Q12 format */
#define Q15_ONE (1L<<15)

unsigned short collect_local_pixel_stats()
{
  int ir=MARGIN, ic;
  unsigned char *pin1 = in_img, *pin2 = in_img+Y_SIZE, *pin3 = in_img+2*Y_SIZE;
  unsigned char *plocal_mean = local_mean+Y_SIZE+MARGIN;
  unsigned short *plocal_var = local_variance+Y_SIZE+MARGIN;
  unsigned long accum, sumsq; /* sumsq = "sum of squares" */
  unsigned long sumvar = 0, /* sum of the local variances */
                m; /* temporary local mean storage */
  float avg_variance;
  
  for (; ir<X_SIZE-MARGIN; ++ir) {
    accum = pin1[0] + pin1[1] + pin1[2] +
            pin2[0] + pin2[1] + pin2[2] +
            pin3[0] + pin3[1] + pin3[2]; 
    sumsq = pin1[0]*pin1[0] + pin1[1]*pin1[1] + pin1[2]*pin1[2] +
            pin2[0]*pin2[0] + pin2[1]*pin2[1] + pin2[2]*pin2[2] +
            pin3[0]*pin3[0] + pin3[1]*pin3[1] + pin3[2]*pin3[2]; 
    for (ic=MARGIN; ic<Y_SIZE-MARGIN; ++ic, plocal_mean++, plocal_var++) {
      m = (accum * S);
      accum -= pin1[0] + pin2[0] + pin3[0];
      accum += pin1[3] + pin2[3] + pin3[3];
      
      *plocal_var = ( (sumsq * S) - ((m*m)>>Q) ) >> Q;
      *plocal_mean = m>>Q;
      sumsq -= pin1[0]*pin1[0] + pin2[0]*pin2[0] + pin3[0]*pin3[0];
      sumsq += pin1[3]*pin1[3] + pin2[3]*pin2[3] + pin3[3]*pin3[3];
      sumvar += *plocal_var;
      
      pin1++; pin2++; pin3++;
    } /* end (for each column) */
    pin1 += 2*MARGIN;
    pin2 += 2*MARGIN;
    pin3 += 2*MARGIN;
    plocal_mean += 2*MARGIN;
    plocal_var += 2*MARGIN;
  } /* end (for each row) */

  avg_variance = (float)sumvar / ((X_SIZE-2*MARGIN)*(Y_SIZE-2*MARGIN));
  return (unsigned short)(avg_variance + 0.5f);
}

inline unsigned short newton_raphson_Q15(unsigned short x, unsigned short seed)
{
  unsigned short r = seed, rprev = r;
  int ii=0;
  
  for (; ii<6; ++ii) {
    r = (r*(65536 - x*r)) >> 15;
    if (rprev==r)
      return r;
    else
      rprev = r;
  }
    
  return r;
}

/* Newton-Raphson seed lookup tables */
const unsigned short seed_1_to_256[] = {
  2048,1024,682,512,409,341,292,256,
  227,204,186,170,157,146,136,128
};

const unsigned short seed_128_to_9362[] = {
  256,85,51,36,28,23,19,17,15,13,12,11,10,9,8,8,
  7,7,6,6,6,5,5,5,5,5,4,4,4,4,4,4,3,3,3,3,3
};

inline unsigned short recip_Q15(unsigned short x)
{
  static const unsigned short reciprocals_1_to_5[] = {32768,16384,10923,8192};
  if (x<5)
    return reciprocals_1_to_5[x];
  else if (x > 21845)
    return 1;
  else if (13107<x && x<21845)
    return 2;
  else if (9326<x && x<13107)
    return 3;
  else if (x<256)
    return newton_raphson_Q15(x, seed_1_to_256[x>>4]);
  else
    return newton_raphson_Q15(x, seed_128_to_9362[x>>8]);
}

void mmse(unsigned short noise_var)
{
  int ir=MARGIN, ic;
  unsigned char *pin = in_img+Y_SIZE+MARGIN,
                *plocal_mean = local_mean+Y_SIZE+MARGIN,
                *pout = out_img+Y_SIZE+MARGIN;
  unsigned short *plocal_var = local_variance+Y_SIZE+MARGIN;
  unsigned short alpha; /* (noise variance) / (local variance) */

  for (; ir<X_SIZE-MARGIN; ++ir) {
    for (ic=MARGIN; ic<Y_SIZE-MARGIN; ++ic, plocal_mean++, plocal_var++, pin++) {
      if (noise_var > *plocal_var)
        *pout++ = *plocal_mean;  
      else {
        alpha = noise_var * recip_Q15(*plocal_var);
        *pout++ = (Q15_ONE-alpha)*(*pin) + (alpha)*(*plocal_mean) >> 15;
      }
    }
    plocal_mean += 2*MARGIN;
    plocal_var += 2*MARGIN;
    pin += 2*MARGIN;
    pout += 2*MARGIN;    
  }
}

void main()
{
  unsigned short noise_variance_est;
  
  DSK6416_init(); /* initialize the DSK board support library */
  
  noise_variance_est = collect_local_pixel_stats();
  mmse(noise_variance_est);
}
