// =========================================================
// It performs the following: 
//   1. Starts the timer 
//   2. runs a loop with 10 ops 10K times
//   3. computes the time needed and the FMIPS score
// =========================================================
#define LEDR_BASE    0xFF200000
#define SW_BASE      0xFF200040
#define KEY_BASE     0xFF200050
#define TIMER_BASE   0xFF202000
#define Fcpu         100000000 
//#define LOOP_ITERATIONS 2000000
#define LOOP_ITERATIONS 200000

#include <stdio.h>      /* printf */
#include <math.h>       /* sqrt */
#include <stdlib.h>     /* abs */
//#include <unistd.h>
//#include <float.h>

int main()
{
  volatile int *red_LED_ptr   = (int *) LEDR_BASE;  // Red LED address
  volatile int *SW_switch_ptr = (int *) SW_BASE;    // Slider switch address
  volatile int *KEY_ptr    = (int *) KEY_BASE;  // Pushbutton KEY address
  volatile int * interval_timer_ptr = (int *) TIMER_BASE; // Timer address
  int high_half, counter, User_Time=0;
  int SW_value, KEY_value;  
  int Status;
  int Value1=0;
  int Value2=0;
  int Value21=0;
  int seconds=0;
  float j, result;
  float Tscale, Cscale;

// for the buttons:
int old_buttons_state=-1;
int buttons_state, count=0;
int k, i = 0;
volatile int vi;
//volatile prevents compiler optimization of empty loop


  printf("Hello from Nios II\n\r");


    /* Set the interval timer to 32 bit max */
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;
    *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0

    Tscale = 10.0/Fcpu;
    Cscale = 1.0/100000.0;
    printf("************ 100 MHz Nios II/e with no FP HW support Timer=100 MHz*************\r\n");
      /////////////////// FP add test ////////////////////////
      j = 1.3; *(red_LED_ptr) = 1;
      result = j+j;
      Value1=result*100;
      printf("add 1.3+1.3 times 100 = %d\r\n",Value1);
      //start timer
    /* Set the interval timer to 32 bit max */
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
      for(i = 0; i < 10000; i++){
      //multiplication
      result = result + j;result = result + j;result = result + j;
      result = result + j;result = result + j;result = result + j;
      result = result + j;result = result + j;result = result + j;
      result = result + j;
      }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
    
      //Elapsed cycles for 10x10.0000 single precision multiplications
      printf("FP Add cycles: %d\r\n", (int) (Value21*Cscale));
      printf("FP Add   time: %d us\r\n", (int) (Value21*Tscale));
      //Note: this output is required otherwise loop is optimized by compiler
      printf("FP 10K adds result: %d\r\n\r\n", (int) result);

      /////////////////// FP sub test ////////////////////////
      *(red_LED_ptr) = 2;      
      j = 1.3;
      result = j*2-j;
      Value1=result*100;
      printf("sub 1.3*2 - 1.3 times 100 = %d\r\n",Value1);
      //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
      for(i = 0; i < 10000; i++){
      //multiplication
      result = result - j;result = result - j;result = result - j;
      result = result - j;result = result - j;result = result - j;
      result = result - j;result = result - j;result = result - j;
      result = result - j;
      }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
      //Elapsed cycles for 10x10.0000 single precision multiplications
      printf("FP sub cycles: %d\n", (int) (Value21*Cscale));
      printf("FP sub   time: %d us\n", (int) (Value21*Tscale));
      //Note: this output is required otherwise loop is optimized by compiler
      printf("FP 10K sub result: %d\r\n\r\n", (int) result);

      /////////////////// FP Mult test ////////////////////////
      j = 1.3; *(red_LED_ptr) = 4;
      result = j*j;
      Value1=result*100;
      printf("Multiplication 1.3*1.3 times 100 = %d\r\n",Value1);
      //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
      for(i = 0; i < 10000; i++){
      //multiplication
      result = result * j;result = result * j;result = result * j;
      result = result * j;result = result * j;result = result * j;
      result = result * j;result = result * j;result = result * j;
      result = result * j;
      }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
      printf("FP Mul cycles: %d\n", (int) (Value21*Cscale));
      printf("FP Mul   time: %d us\n", (int) (Value21*Tscale));
      //Note: this output is required otherwise loop is optimized by compiler
      printf("FP 10K Multiplication result: %d\r\n\r\n", (int) result);

      /////////////////// FP Div test ////////////////////////
      j = 1.3; *(red_LED_ptr) = 8;
      result = j * 2.0 / j;
      Value1=result*100;
      printf("Div 2*1.3/1.3 times 100 = %d\r\n",Value1);
      //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
      for(i = 0; i < 10000; i++){
      //multiplication
      result = result / j;result = result / j;result = result / j;
      result = result / j;result = result / j;result = result / j;
      result = result / j;result = result / j;result = result / j;
      result = result / j;
      }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
      printf("FP Div cycles: %d\n", (int) (Value21*Cscale));
      printf("FP Div   time: %d us\r\n", (int) (Value21*Tscale));
      //Note: this output is required otherwise loop is optimized by compiler
      printf("FP 10K Div result: %d\n\n", (int) result);

      /////////////////// FP SQRT test ////////////////////////
              j = 4.0; *(red_LED_ptr) = 16;
              result = j * j;
              Value1=sqrt(result);
              printf("Sqrt( 4.0*4.0 )= %d\r\n",Value1);
              //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
              for(i = 0; i < 10000; i++){
              //multiplication
              result =sqrt(result);result = sqrt(result);result = sqrt(result);
              result =sqrt(result);result = sqrt(result);result = sqrt(result);
              result =sqrt(result);result = sqrt(result);result = sqrt(result);
              result =sqrt(result);
              }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
              printf("FP sqrt cycles: %d\n", (int) (Value21*Cscale));
              printf("FP sqrt   time: %d us\n", (int) (Value21*Tscale));
              //Note: this output is required otherwise loop is optimized by compiler
              printf("FP 10K sqrt result: %d\n\n", (int) result);

              /////////////////// FP sqrtf test ////////////////////////
                  j = 4.0; *(red_LED_ptr) = 32;
                  result =  sqrtf( j* j);
                  Value1=result;
                  printf("Sqrtf 16  = %d\r\n",Value1);
                  //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
                  for(i = 0; i < 10000; i++){
                  //multiplication
                  result = sqrtf( result ); result = sqrtf( result );result = sqrtf( result );
                  result = sqrtf( result );result = sqrtf( result );result = sqrtf( result );
                  result = sqrtf( result );result = sqrtf( result );result = sqrtf( result );
                  result = sqrtf( result );
                  }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
                  printf("FP sqrtf cycles: %d\n", (int) (Value21*Cscale));
                  printf("FP sqrtf   time: %d us\n", (int) (Value21*Tscale));
                  //Note: this output is required otherwise loop is optimized by compiler
                  printf("FP 10K SQRTF result: %d\r\n\r\n", (int) result);

                  /////////////////// INT -> FP conversion test ////////////////////////
                  i = 10; *(red_LED_ptr) = 64;
                  result =  (float ) i;
                  Value1=(int) 20*result-19*result;
                  printf("ten 10.0  = %d\r\n",Value1);
                  //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
                  for(vi = 0; vi < 100000; vi++){
                  //conversion
                  result = (float)  i;
                  }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
                  printf("INT->FP cycles: %d\r\n", (int) (Value21*Cscale));
                  printf("INT->FP   time: %d us\r\n", (int) (Value21*Tscale));
                  //Note: this output is required otherwise loop is optimized by compiler
                  printf("FP 10K INT->FP result: %d\n\n", (int) result);

                  /////////////////// FP -> INT conversion test ////////////////////////
                    i = 10; *(red_LED_ptr) = 64;
                    result =  (float ) i;
                    Value1=(int) 20*result-19*result;
                    printf("ten 10.0  = %d\r\n",Value1);
                    //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
                    for(vi = 0; vi < 100000; vi++){
                    //conversion
                    k = (int)  result;
                    }
    // Stop the counter
    *(interval_timer_ptr + 1) = 0x8;	// Set STOP=1  START = 0, CONT = 0, ITO = 0 
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle divided by CPU frequency
    printf("Clock cycles  = %d\n",Value21);
                    printf("FP->INT  cycles: %d\r\n", (int) (Value21*Cscale));
                    printf("FP->INT    time: %d us\r\n", (int) (Value21*Tscale));
                    //Note: this output is required otherwise loop is optimized by compiler
                    printf("FP 10K FP->INT result: %d\n\n", k);

                    /////////////////// Clock test ////
                    printf("Second counter starts\n\r");
                  //start timer
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
    while (seconds<5) {
    // Make a counter snapshot by wrting a dummy value to snapl
    *(interval_timer_ptr + 0x4) =  0;
  	// read the 32-bit counter snapshot from the 16-bit timer registers
	high_half = *(interval_timer_ptr + 0x5) & 0xFFFF;
	counter = (*(interval_timer_ptr + 0x4) & 0xFFFF) | (high_half << 16);
    Value21 = (0x7FFFFFFF - counter); // Clock cycle
       	  if (Value21 > 100000000){ // second count is 100 MHz cycles per second
                   	     printf("sec=%d \n",seconds);
                   	     seconds++;
    *(interval_timer_ptr + 1) = 0x8;  // Set STOP=1  START = 0, CONT = 0, ITO = 0  
    *(interval_timer_ptr + 0x2) = 0xFFFF;  *(interval_timer_ptr + 0x3) = 0x7FFF;
    *(interval_timer_ptr + 1) = 0x4;  // Set START = 1, CONT = 0, ITO = 0
                      }
                    }

            	printf("\r\nSuccessfully ran FP measurements on Nios II\r\n");
          *(red_LED_ptr) = 255;

    return 0;
    }
