#include <stdio.h>
#include <fcntl.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include "defines.h"  /* constants etc. */
#include "vars.h"
#include "routines.h" /* All our function definitions go here, om3.h absorbed in here */

#define OM3_IODIR "."

int my_address = 0;

void dump_test()
{
	int iy;
	if (my_address == 0)
	{
		for (iy = 0; iy < My; iy++)
		{
			printf("u: %f\n",u[Mz-1][iy][Mx-5]);
		}
	}
}
			
void lflush(FILE *fp)
{
	    while(fgetc(fp) != '\n');
}

int read_input_data(char *filename)
{
	FILE *fp_input_dat = NULL;
	if ((fp_input_dat = fopen(filename,"r")) == NULL)
	{
		printf("Cannot open input file %s, bailing\n",filename);
		exit(0);
	}
	
	fscanf(fp_input_dat,"%i",&Nx); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%i",&Ny); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%i",&Nz); lflush(fp_input_dat);
	
	fscanf(fp_input_dat,"%i",&Nrows); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%i",&Ncols); lflush(fp_input_dat);
	
	fscanf(fp_input_dat,"%f",&NorthLat); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%f",&SouthLat); lflush(fp_input_dat);

	fscanf(fp_input_dat,"%f",&Delx); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%f",&Dely); lflush(fp_input_dat);

	fscanf(fp_input_dat,"%f",&Diff_Mks); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%f",&Diff_Ratio); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%f",&G); lflush(fp_input_dat);

	fscanf(fp_input_dat,"%i",&N_Split); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%i",&Sub_Split); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%f",&Delt); lflush(fp_input_dat);

	fscanf(fp_input_dat,"%i",&N_Steps); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%i",&N_Blocks); lflush(fp_input_dat);

	fscanf(fp_input_dat,"%s",hist_base); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%s",GeometryFileName); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%s",RestartRunName); lflush(fp_input_dat);
	fscanf(fp_input_dat,"%s",RestartSaveName); lflush(fp_input_dat);
	
	Np = Nrows * Ncols;
	Restore = 1.0/(30.0*24.0*3600.0);
	Mx = (Nx/Ncols)+2;
	My = (Ny/Nrows)+2;
	Mz = Nz;

	fclose(fp_input_dat);
	return(1);
}


void om3_init( const char* data_dir, const char* output_dir, int argc, char *argv[] ) {

	int ellen, elcnt, stride;
	float max_an_t, max_an_s, max_sx, max_sy;
	float max_u, max_v, max_t, max_p;
	int flag;
	int it;
	
	extern MPI_Datatype mpi_edge_type;
	int err = MPI_Initialized(&flag);
	if( flag == 0 ) { 
	  MPI_Init(&argc,&argv);
	}
	
	sprintf (buff, "%s/model.dat", data_dir );
	if ((read_input_data(buff)) != 1) {
		printf("Ugh, this should not have happened\n"); exit(0);
	}

	srbuff = memalloc1dchar("srbuff",srbuff,Mx * My * 8 * 10);/*huh?*/
	MPI_Buffer_attach (srbuff, Mx * My * 8 * 10);

	ellen = sizeof (float);	/* make mti type for e-w edge */
	elcnt = My;
	stride = Mx * ellen;
	current_data_array = NULL;
	MPI_Type_vector (elcnt, ellen, stride, MPI_CHAR, &mpi_edge_type);
	MPI_Type_commit (&mpi_edge_type);

	/* Get self address and number of nodes. */

	MPI_Comm_rank (MPI_COMM_WORLD, &my_address);
	MPI_Comm_size (MPI_COMM_WORLD, &num_nodes);
/* open stdout file on sphome for root node, flush on other nodes */

	printf ("info: OM3 address = %d of %d\n", my_address, num_nodes);

	if (my_address <= 0) /* eh? How can this happen? orf */
	{
		sprintf (buff, "%s/stdout_%d_%d.%d", output_dir, Nrows, Ncols, my_address);
	}

	if (my_address == 0)
	{
		printf("info:****************** Model input parameters *****************\n");
		printf("info: Nx: %i\n",Nx);
		printf("info: Ny: %i\n",Ny);
		printf("info: Nz: %i\n",Nz);
		printf("info: Nrows: %i\n",Nrows);
		printf("info: Ncols: %i\n",Ncols);
		printf("info: Np: %i\n",Np);
		printf("info: Mx: %i\n",Mx);
		printf("info: My: %i\n",My);
		printf("info: Mz: %i\n",Mz);

		printf("info: NorthLat: %f\n",NorthLat);
		printf("info: SouthLat: %f\n",SouthLat);

		printf("info: Delx: %f\n",Delx);
		printf("info: Dely: %f\n",Dely);

		printf("info: Diff_Mks: %f\n",Diff_Mks);
		printf("info: Diff_Ratio: %f\n",Diff_Ratio);
		printf("info: Restore: %e\n",Restore);
		printf("info: G: %f\n",G);
		printf("info: N_Split: %i\n",N_Split);
		printf("info: Sub_Split: %i\n",Sub_Split);
		printf("info: Delt: %f\n",Delt);
		printf("info: N_Steps: %i\n",N_Steps);
		printf("info: N_Blocks: %i\n",N_Blocks);
		printf("info: hist_base: %s\n",hist_base);
		printf("info: GeometryFileName: %s\n",GeometryFileName);
		printf("info: RestartRunName: %s\n",RestartRunName);
		printf("info: RestartSaveName: %s\n",RestartSaveName);
		printf("info:****************** End Model input parameters *************\n");
		fflush(stdout);
	}
	MPI_Barrier (MPI_COMM_WORLD); /* all together now, should make above output contiguous */

	om3_set_data_directory(data_dir);
	om3_set_output_directory(output_dir);

	/* open picture file */

	if (my_address == 0)
	{
		sprintf (buff, "%s/om3_%d_%d.cmpics", output_directory, Nrows, Ncols);
		ofile = global_open (buff, O_CREAT | O_WRONLY, 0666);
	}
	/* validate machine geometry */

	if (Np != num_nodes)
	{
		printf ("error: OM3 Node number mismatch - needed %d got %d ...exiting\n", Np, num_nodes);
		exit (89);
	}
	else printf ("info: OM3 Started on %d nodes.\n", num_nodes);
	fflush (of);


	/* Calculate node geometery and neighbor node numbers */

	if (my_address == 0)
	printf ("info: OM3 Ncols,Nrows %d %d Mx,My: %d %d\n", Ncols, Nrows, Mx, My);

	row = my_address / Ncols;
	col = my_address % Ncols;

	if (col == 0)
	west = my_address + (Ncols - 1);	/* 2-D torus */
	else
	west = my_address - 1;

	if (col == Ncols - 1)
	east = my_address - (Ncols - 1);
	else
	east = my_address + 1;

	if (row == 0)
	south = my_address + (Nrows - 1) * Ncols;
	else
	south = my_address - Ncols;

	if (row == Nrows - 1)
	north = my_address - (Nrows - 1) * Ncols;
	else
	north = my_address + Ncols;

	/* Initialization processing */

	node_timer_clear (0);
	node_timer_start (0);

	dtu = memalloc3dfloat ("dtu",dtu,Mz,My,Mx);
	dtv = memalloc3dfloat ("dtv",dtv,Mz,My,Mx);
	dtt = memalloc3dfloat ("dtt",dtt,Mz,My,Mx);
	dts = memalloc3dfloat ("dts",dts,Mz,My,Mx);
	dta = memalloc3dfloat ("dta",dta,Mz,My,Mx);

	u = memalloc3dfloat ("u",u,Mz,My,Mx);
	v = memalloc3dfloat ("v",v,Mz,My,Mx);
	t = memalloc3dfloat ("t",t,Mz,My,Mx);
	p = memalloc3dfloat ("p",p,Mz,My,Mx);
	s = memalloc3dfloat ("s",s,Mz,My,Mx);
	a = memalloc3dfloat ("a",a,Mz,My,Mx);

	ul = memalloc3dfloat ("ul",ul,Mz,My,Mx);
	vl = memalloc3dfloat ("vl",vl,Mz,My,Mx);
	tl = memalloc3dfloat ("tl",tl,Mz,My,Mx);
	sl = memalloc3dfloat ("sl",sl,Mz,My,Mx);
	al = memalloc3dfloat ("al",al,Mz,My,Mx);
	
	mix_u = memalloc3dfloat ("mix_u",mix_u,Mz,My,Mx);
	mix_v = memalloc3dfloat ("mix_v",mix_v,Mz,My,Mx);
	mix_t = memalloc3dfloat ("mix_t",mix_t,Mz,My,Mx);
	mix_s = memalloc3dfloat ("mix_s",mix_s,Mz,My,Mx);
	mix_a = memalloc3dfloat ("mix_a",mix_a,Mz,My,Mx);
	
	risq = memalloc3dfloat ("risq",risq,Mz,My,Mx);
	nsq = memalloc3dfloat ("nsq",nsq,Mz,My,Mx);
	kuv = memalloc3dfloat ("kuv",kuv,Mz,My,Mx);
	kts = memalloc3dfloat ("kts",kts,Mz,My,Mx);
	
	dzt = memalloc3dfloat ("dzt",dzt,Mz,My,Mx);
	dzs = memalloc3dfloat ("dzs",dzs,Mz,My,Mx);
	dza = memalloc3dfloat ("dza",dza,Mz,My,Mx);
	w = memalloc3dfloat ("w",w,Mz,My,Mx);
	dzu = memalloc3dfloat ("dzu",dzu,Mz,My,Mx);
	dzv = memalloc3dfloat ("dzv",dzv,Mz,My,Mx);

	/* tracers */

	for (it = 0; it < NumTracers; it++)
	{
		tr[it] = memalloc3dfloat("tr_x",tr[it],Mz,My,Mx);
		trl[it] = memalloc3dfloat("trl_x",trl[it],Mz,My,Mx);
		trx[it] = memalloc3dfloat("trx_x",trx[it],Mz,My,Mx);
		dttr[it] = memalloc3dfloat("dttr_x",dttr[it],Mz,My,Mx);
		dztr[it] = memalloc3dfloat("dztr",dztr[it],Mz,My,Mx);
		mix_tr[it] = memalloc3dfloat ("mix_tr",mix_tr[it],Mz,My,Mx);
		/*
		tracer_name[it] = NULL;
		*/
	}

	freshwater = memalloc2dint ("freshwater",freshwater,My,Mx);
	
	uvts_levels = memalloc1dfloat ("uvts_levels",uvts_levels,Mz);
	w_levels = memalloc1dfloat ("w_levels",w_levels,Mz);
	
	strx = memalloc2dfloat ("strx",strx,My,Mx);
	stry = memalloc2dfloat ("stry",stry,My,Mx);

	taux = memalloc2dfloat ("taux",taux,My,Mx);
	tauy = memalloc2dfloat ("tauy",tauy,My,Mx);

	ptop = memalloc2dfloat ("ptop",ptop,My,Mx);
	ptopl = memalloc2dfloat ("ptopl",ptopl,My,Mx);
	ptopx = memalloc2dfloat ("ptopx",ptopx,My,Mx);

	dtubaro = memalloc2dfloat ("dtubaro",dtubaro,My,Mx);
	dtvbaro = memalloc2dfloat ("dtvbaro",dtvbaro,My,Mx);
	
	picbuf = memalloc2duschar("picbuf",picbuf,My-2,Mx-2);
	
	f = memalloc1dfloat("f",f,My);
	geo1 = memalloc1dfloat("geo1",geo1,My);
	geo2 = memalloc1dfloat("geo2",geo2,My);
	geo3 = memalloc1dfloat("geo3",geo3,My);
	base_t = memalloc1dfloat("base_t",base_t,Mz);
	base_s = memalloc1dfloat("base_s",base_s,Mz);

	/* new */

	ux = memalloc3dfloat ("ux",ux,Mz,My,Mx);
	vx = memalloc3dfloat ("vx",vx,Mz,My,Mx);
	tx = memalloc3dfloat ("tx",tx,Mz,My,Mx);
	sx = memalloc3dfloat ("sx",sx,Mz,My,Mx);
	ax = memalloc3dfloat ("ax",ax,Mz,My,Mx);
	
	h = memalloc3dfloat ("h",h,Mz,My,Mx);
	uvh = memalloc3dfloat ("uvh",uvh,Mz,My,Mx);
	annual_salt = memalloc3dfloat ("annual_salt",annual_salt,Mz,My,Mx);
	annual_temp = memalloc3dfloat ("annual_temp",annual_temp,Mz,My,Mx);
	
	ubstar = memalloc2dfloat ("ubstar",ubstar,My,Mx);
	vbstar = memalloc2dfloat ("vbstar",vbstar,My,Mx);
	ptstar = memalloc2dfloat ("ptstar",ptstar,My,Mx);
	ubaro = memalloc2dfloat ("ubaro",ubaro,My,Mx);
	vbaro = memalloc2dfloat ("vbaro",vbaro,My,Mx);

	depth = memalloc2dfloat ("depth",depth,My,Mx);
	uvdepth = memalloc2dfloat ("uvdepth",uvdepth,My,Mx);
	rdepth = memalloc2dfloat ("rdepth",rdepth,My,Mx);
	ruvdepth = memalloc2dfloat ("ruvdepth",ruvdepth,My,Mx);

	t_forcing = memalloc2dfloat ("t_forcing",t_forcing,My,Mx);
	s_forcing = memalloc2dfloat ("s_forcing",s_forcing,My,Mx);
	
	in_xz = memalloc2dfloat ("in_xz",in_xz,Mz,Mx);
	out_xz = memalloc2dfloat ("out_xz",out_xz,Mz,Mx);
	
	in_yz = memalloc2dfloat ("in_yz",in_yz,Mz,My);
	out_yz = memalloc2dfloat ("out_yz",out_yz,Mz,My);
	
	in_3x = memalloc2dfloat ("in_3x",in_3x,3,Mx);
	out_3x = memalloc2dfloat ("out_3x",out_3x,3,Mx);

	in_3y = memalloc2dfloat ("in_3y",in_3y,3,My);
	out_3y = memalloc2dfloat ("out_3y",out_3y,3,My);


	read_geometry ();
/*
	read_stress();
	read_annual_temp();
	read_annual_salt_simple();
*/
	/* orf */
	read_annual_temp_simple();
	read_annual_salt();
	read_freshwater();

	node_timer_stop (0);

	if (my_address == 0)
	printf ("info: OM3 Forcing and geometry read time=%f [secs]\n", node_timer_busy (0));


	write_picture_d (strx, 0.1);
	write_picture_d (stry, 0.1);
	write_picture_d (&(annual_temp[Nz - 1][0]), 10.0);
	write_picture_d (&(annual_salt[Nz - 1][0]), 2.0);

	max_sx = f_max_d (strx);
	max_sy = f_max_d (stry);
	max_an_t = f_max_d (annual_temp[Nz - 1]);
	max_an_s = f_max_d (annual_salt[Nz - 1]);

	if (my_address == 0)
	{
		printf ("info: OM3 Max sx: %f, sy: %f, ann. temp, %f, ann. salt %f\n",
			max_sx, max_sy, max_an_t, max_an_s);
	}
	fflush (of);

	node_timer_clear (0);
	node_timer_start (0);

	init_state ();

	node_timer_stop (0);

	if (my_address == 0)
	printf ("info: OM3 State init time=%f [secs] ,\n", node_timer_busy (0));
	fflush (of);

#ifdef RestartRun
	load_restart_wall ();
	if (my_address == 0)
	{
		printf ("info: OM3 Model initialized from restart file: %s\n", RestartRunName);
	}
	fflush (of);
#endif

	max_u = f_max_d (u[Nz - 1]);
	max_v = f_max_d (v[Nz - 1]);
	max_t = f_max_d (t[Nz - 1]);
	max_p = f_max_d (ptop);

	if (my_address == 0)
	{
		printf ("info: OM3 istep: 0, Max u: %f, v: %f, t: %f, p: %f\n", max_u, max_v, max_t, max_p);
		printf ("info: OM3 Delt = %f seconds, Block time = %f days.\n", Delt, Delt * N_Steps / (3600.0 * 24.0));
	}
	fflush (of);
}

void om3_open_block( int iblock ) {
	if (my_address == 0)
	{
		/*
		sprintf (buff, "%s/om3_%d_%d.%d.cmhist", output_directory, Nrows, Ncols, iblock);
		*/
		sprintf (buff, "%s/%s%i.cmhist", output_directory, hist_base,iblock );
		hfile = fopen (buff, "wb");
		fprintf (hfile, "%d\n", 1);
	}

	printf ("info: OM3 ****** START BLOCK %d of %d *********************\n",
		iblock + 1, N_Blocks);
	fflush (of);

	node_timer_clear (0);
	node_timer_start (0);
}

void om3_step( int istep ) {

	float max_u, max_v, max_t, max_s, max_p, max_a;

	if (my_address == 0) {
	  printf ("\ninfo: OM3 Timestep %d: time = %f\n", istep, run_time);
	}
	/*
	dump_state_square( "tr1", run_time, 70, 1, 2, 0 ); 
	*/

	time_step ();
	max_u = f_max_d (&(u[Nz - 1][0]));  
	max_v = f_max_d (&(v[Nz - 1][0]));
	max_t = f_max_d (&(t[Nz - 1][0]));
	max_s = f_max_d (&(s[Nz - 1][0]));
	max_a = f_max_d (&(a[Nz - 1][0]));
	max_p = f_max_d (&(ptop[0]));

	if (my_address == 0) {
		printf ("info: OM3 Max u: %f, v: %f, t: %f, s: %f, p: %f, a: %f\n", max_u, max_v, max_t, max_s, max_p, max_a);
	}
	fflush (of);
	run_time += Delt; /* run_time in seconds */
	/*
	dump_state_square( "tr1", run_time, 70, 1, 2, 0 ); 
	*/
}

void om3_close_block( int iblock ) {

	float max_tf, max_sf, max_s;
	float max_u, max_v, max_t, max_p, max_a;

	node_timer_stop (0);
	if (my_address == 0)
	printf ("info: OM3 Timestep calculation time=%f [secs], run_time: %e\n",
		node_timer_busy (0), run_time);
	node_timer_clear (0);
	fflush (of);

	max_u = f_max_d (&(u[Nz - 1][0]));
	max_v = f_max_d (&(v[Nz - 1][0]));
	max_t = f_max_d (&(t[Nz - 1][0]));
	max_s = f_max_d (&(s[Nz - 1][0]));
	max_a = f_max_d (&(a[Nz - 1][0]));
	max_p = f_max_d (&(ptop[0]));

	if (my_address == 0)
	{
		printf ("info: OM3 Max u: %f, v: %f, t: %f, s: %f, p: %f, a: %f\n", max_u, max_v, max_t, max_s, max_p, max_a);
	}

	max_tf = f_max_d (t_forcing);
	max_sf = f_max_d (s_forcing);

	if (my_address == 0)
	{
		printf ("info: OM3 Max tf: %e, sf: %e\n", max_tf / run_time, max_sf / run_time);
	}

	fflush (of);

	if (iblock >= 0)
	{
		write_picture_d (t_forcing, fabs (max_tf) / 3.0);
		write_picture_d (s_forcing, fabs (max_sf) / 3.0);
	}

	if (iblock >= 0)
	{
		int it;
#ifdef WRITE_HISTORY
		write_history_d (u);
		write_history_d (v);
		write_history_d (w);
		write_history_d (t);
		write_history_d (s);
		write_history_d (p);
		write_history_d (a);
		for (it=0; it < NumTracers; it++)
		{
			write_history_d (tr[it]);
		}
#endif /* WRITE_HISTORY */
		/* write_history(risq); */

	}
	if (my_address == 0) fclose (hfile);
}

void om3_finalize() {
	if (my_address == 0)
	{
		close (ofile);
		fclose (hfile);
	}
#ifdef RestartSave
	save_restart ();
	if (my_address == 0)
	{
		printf ("info: OM3 Model state saved to restart file: %s\n", RestartSaveName);
	}
	fflush (of);
#endif

	MPI_Finalize ();		/* clean up world */

}

void om3_run( double stop_time ) {   /* stop_time in seconds */
  static int iblock=0, istep=0;

  while( run_time < stop_time ) { /* in seconds */
	  if( istep == (N_Steps-1)) {	
		if( iblock != 0 ) { 
		  om3_close_block(iblock); 
		}
		om3_open_block(iblock++); 
		istep = 0;
	  }	
	  om3_step( istep++ );
  }
}

float 
f_max_d (float **a)
{
	int ix, iy /* , maxix, maxiy */ ;
	float max, gmax, gmin, maxamp;

	maxamp = -1.0;

	for (iy = 1; iy < My - 1; iy++)
	{
		for (ix = 1; ix < Mx - 1; ix++)
		{
			if ((fabs (a[iy][ix]) > maxamp) && InWater(h[Nz - 1][iy][ix + 1]))
			{
				max = a[iy][ix];
				maxamp = fabs (a[iy][ix]);
				/*
				maxix = ix;
				maxiy = iy;
				*/
			}
		}
	}
/*
   if(my_address == 0) printf("Max ix,iy: %d %d %f\n",maxix,maxiy,max);
 */

	MPI_Allreduce (&max, &gmax, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	MPI_Allreduce (&max, &gmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);



	if (gmax > -gmin)
	return gmax;
	else
	return gmin;
}

#ifdef OLD

float 
f_max (float a[My][Mx])
{
	int ix, iy /*, maxix, maxiy */;
	float max, gmax, gmin, maxamp;

	maxamp = -1.0;

	for (iy = 1; iy < My - 1; iy++)
	{
		for (ix = 1; ix < Mx - 1; ix++)
		{
			if ((fabs (a[iy][ix]) > maxamp) && InWater(h[Nz - 1][iy][ix + 1]))
			{
				max = a[iy][ix];
				maxamp = fabs (a[iy][ix]);
				/*
				maxix = ix;
				maxiy = iy;
				*/
			}
		}
	}
/*
   if(my_address == 0) printf("Max ix,iy: %d %d %f\n",maxix,maxiy,max);
 */

	MPI_Allreduce (&max, &gmax, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	MPI_Allreduce (&max, &gmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);



	if (gmax > -gmin)
	return gmax;
	else
	return gmin;
}

float 
f_total (a)
float a[My][Mx];
{
	int ix, iy;
	float total, gtotal;

	total = 0.0;

	for (iy = 1; iy < My - 1; iy++)
	{
		for (ix = 1; ix < Mx - 1; ix++)
		{
			{
				if InWater(h[Nz - 1][iy][ix + 1]) total = total + a[iy][ix];
			}
		}
	}
	MPI_Allreduce (&total, &gtotal, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);


	return gtotal;
}

#endif

void
tidal_boundary (float **pt, float time)
{
	int iy;
	float amp, pi=3.141592654;
	
	amp = 200.0 * sin (2.0 * pi * time / ((float)(60*60*12)));
	if (col == Ncols-1)
	{
		for (iy = 0; iy < My; iy++)
		{
			pt[iy][Mx - 1] = amp;
		}
	}
}


int 
global_open (char *file, int flags, int mode)
{
	int fd;

	if (my_address == 0)
	{
		if ((fd = open (file, flags, mode)) < 0)
		{
			printf ("error: OM3 File open error: %s\n", file);
			exit (1);
		}
		printf ("info: OM3 File open: %s\n", file);
		return (fd);
	}
	else
	return (-1);
}
