/*
 *	File: lnreYuSi.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *		Fiona J. Tweedie
 *
 *      History:
 *
 *      - jul 1997, version 1.0 (rhb)
 *	- dec 1998, version 1.1 (rhb, fjt)
 *      - april 1999, version 1.2 (rhb)
 *            -e option added (mse in simplex minimization as score function)
 *
 *      Description:
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <malloc.h>
#include <math.h>
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

/* functions for numerical procedures */

extern double	expV ();
extern double	expVm ();
extern double	funcV ();
extern double	funcVm ();
extern double	getZ2 ();
extern double	qromb ();
extern double   getmse ();
extern void	amoeba ();

extern double	**matrix ();
extern double	*vector ();
extern void	free_vector ();
extern void	free_matrix ();
extern void	print_sim_matrix ();
extern double	sim_functie ();
extern double	sim_functie2 ();
extern double	sim_functie_mse ();
extern double	sim_functie2_mse ();


/* argument reading, file manipulation, and help function */

extern int	leesgetal ();
extern void	change_extension ();
extern void	help ();


/* GLOBAL VARIABLES */

double   N, V, n1, n2, n3,          /* number of tokens, types, hapaxes, disleg */
         E,                         /* extrapolation sample size */
         pstar,                     /* maximum relative frequency */
         mmax,                      /* highest frequency rank */
         mrank,                     /* word frequency, used in expVm() */
         vm,                        /* variable for arbitrary Vm */
         SPECTRUM[MAXM3][3],        /* frequency spectrum m Vm EVm */
         Z, VZ,                     /* Zipf size, and V(Z) */
         VZparam,                   /* VZ as a third parameter of the model */
         beta,                      /* second parameter of the model */
         Nzero, Vzero,              /* original sample size N0 */
         eV, eV1, eV2, eV2N, S,     /* E[V(N)], E[V(2N)], S */
         eV3,
         CHUNKS[MAXCHUNKS3],        /* the chunk sizes */
         chunksize, remainDer,      /* chunk variables */
         x, x1, y, y2, y3, y4, y5,
	 u_bound,		    /* upper integration intervals */
         logt, t,                   /* t = N/Z, logt = log(t) */
         **sim_mat,                 /* for simplex minimization */
         *sim_vec,
         *sim_yy,
         miny,
         tolerance;

FILE     *fpspectrum,               /* fpspectrum: datafile: m, Vm */
         *fpexpspect,               /* expected spectrum */
         *fpexpspect2N,             /* spectrum at 2N */
         *fpVN,                     /* file with E[V(N)] and E[V(2N)] */
         *fpsum,                    /* file with summary of model */
         *fpint,                    /* interpolation results */
         *fpext,                    /* extrapolation results */
         *fpE,                      /* spectrum at sample size E */
         *fullspc,		    /* full spectrum .fsp for m=1..skip */
         *fpKvalues;                /* list N_k for k = 1..K, K+1,..,2K */

int      nranks,                    /* number of different ranks in spectrum */
         maxrank,                   /* largest rank for fit, default 15 */
         i, j,                      /* counter */
         header,                    /* boolean for presence of header */
         k,                         /* variable for chunks */
         nchunks,                   /* number of chunks for interpolation */
         enchunks,                  /* number of chunks for extrapolation */
         token, type,               /* auxiliary variables for scanf */
         infinite,                  /* boolean on value of S */
         again,                     /* boolean for manual search for params */
         ndimensions,               /* for amoeba: simplex minimization */
         ndimensions1,              /* for amoeba: simplex minimization */
         niterations,               /* for amoeba: simplex minimization */
         simplex_flunked,           /* boolean for success of simplex */
         zipfstart,                 /* boolean for start with Zipf model */
         skip,			    /* print spectrum only, for m=1..skip */
	 Skip,
         kfile,                     /* print fpKvalue file */
         forceN,                    /* force reading N and V from stdin */
         forceV,                    /* force reading N and V from stdin */
         freemem,                   /* free allocated memory */
         pstarmethod,               /* use pstar estimation method for VZ */
         msemethod,                     /* use mse over msenranks ranks for
                                           parameter estimation */
         msenranks,                     /* number of ranks for mse param.
                                           estimation */
         aantal;                    /* for command line options */

char     woord[MAXWORDLENGTH],       /* variable for skipping header in fscanf */
         new_name[MAXWORDLENGTH],    /* variables for extension handling    */
         base_name[MAXWORDLENGTH],
         c,                          /* for input during manual search */
         *fs;                        /* variable for scanning options */


/* MAIN () */

int main (argc, argv)
int     argc;
char    *argv[];

{ 
   /* DEFAULTS */

   maxrank = DEF_MAXRANK;
   nchunks = DEF_CHUNKS;
   enchunks = DEF_CHUNKS;
   header = 1;
   zipfstart = 0;
   E = NULL_F;
   simplex_flunked = 0;
   infinite = 0;  /* default: S is not infinite */
   skip = 0;
   Skip = 0;
   kfile = 0;
   forceN = 0;
   forceV = 0;
   pstarmethod = 0;
   freemem = 0;
   u_bound = U_BOUND;
   msenranks = maxrank;
   msemethod = 0;


   /* COMMAND LINE OPTIONS */

   while ((--argc > 0) && ((*++argv)[0] == '-')) {
        for (fs = argv[0] + 1; *fs != '\0'; fs++) {
            switch (*fs) {
            case 'h':
                help();
                break;
            case 'E':
                i =  leesgetal (fs, &aantal);
                E = (double) i;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'k':
                nchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'K':
                enchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'e':
                msenranks = leesgetal (fs, &aantal);
                msemethod = 1;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'i':
                u_bound = (double) leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'm':
                maxrank = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'F':
                kfile = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                if (kfile == 0) {
                  fprintf(stderr, "lnreYuSi: cannot int/ext with zero N\n");
                  exit(1);
                }
                break;
            case 'V':
                forceV = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'N':
                forceN = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'n':
                zipfstart = 1;
                break;
            case 'P':
                pstarmethod = 1;
                break;
            case 's':      /* don't interpolate or extrapolate */
                           /* show m and Vm and EVm for m=1..s */
                skip = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                if (skip == 0) {
                  fprintf(stderr, "lnreYuSi: cannot skip with zero rank\n");
                  exit(1);
                }
                break;
            case 'S':
                Skip = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
		break;
            case 'H':      /* input files without headers! */
                header = 0;
                break;
            default:
                fprintf(stderr, "lnreYuSi: illegal option %c\n", *fs);
                exit(1);
                break;
            }
        }
   } /* of while */

   /* FILE HANDLING */

   if (argc == 0) {
     help ();
   }

   /* load input spectrum, should have .spc extension */

   if ((fpspectrum = fopen(*argv, "r")) == NULL) {
       fprintf(stderr, "lnreYuSi: can't open %s\n", *argv);
       exit(1);
   }

   if (Skip > 0) {
       nranks = 0; n1 = 0; n2 = 0;
       if (header){
          fscanf(fpspectrum, "%s ", woord);  /* m */
          fscanf(fpspectrum, "%s ", woord);  /* Vm */
       }

       fprintf(stdout,"read header\n");
       fflush(stdout);

       while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
            nranks++;
            SPECTRUM[nranks][0] = (double) token;
            SPECTRUM[nranks][1] = (double) type;
            if (token == 1) n1 = (double) type;
            if (token == 2) n2 = (double) type;
            if (token == 3) n3 = (double) type;
            N+= (double) token * (double) type;
            V+= (double) type;
       }
       fprintf(stdout,"read spectrum\n");
       fflush(stdout);
       mmax = SPECTRUM[nranks][0];
       pstar = mmax / N;
       Nzero = N; Vzero = V;

       Z = getZ2 (pstar, Nzero, Vzero, N); 
       VZ = Z / log (pstar * Z); 
       beta = 1.0;
       eV = expV(N);
       eV1 = expVm(1.0, N);
       eV2 = expVm(2.0, N);
       fprintf(stdout, "Initial values for minimization (simple Zipf):\n");
       fprintf(stdout, "   Z  = %10.4f  beta  = %10.2f VZ    = %12.2f  \n", \
                  Z, beta, VZ);
       fprintf(stdout, "   V  = %10.0f  E[V]  = %15.2f\n", V, eV);
       fprintf(stdout, "   V1 = %10.0f  E[V1] = %15.2f\n", n1, eV1);
       fprintf(stdout, "   V2 = %10.0f  E[V2] = %15.2f\n", n2, eV2);

       exit(1);
   }

   /* file name handling output files */

   strncpy(base_name, *argv, strlen(*argv) - 4);

  if ((skip == 0) && (kfile == 0)) {
   change_extension (base_name, new_name, "_Y.spc");
   if ((fpexpspect = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_Y.sp2");
   if ((fpexpspect2N = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_Y.ev2");
   if ((fpVN = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_Y.sum");
   if ((fpsum = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_Y.int");
   if ((fpint = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_Y.ext");
   if ((fpext = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
      exit(1);
   }
   if (E > NULL_F){
     change_extension (base_name, new_name, "_Y.sp3");
     if ((fpE = fopen(new_name, "w")) == NULL){
        fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
        exit(1);
     }
   }
  } else {
     if (skip > 0) {
        change_extension (base_name, new_name, "_Y.fsp");
        if ((fullspc = fopen(new_name, "w")) == NULL){
           fprintf(stderr, "lnreYuSi: can't open output file %s\n", new_name);
           exit(1);
        }
     } else {
           change_extension (base_name, new_name, "_Y.iex");
           if ((fpKvalues = fopen(new_name, "w")) == NULL){
              fprintf(stderr,"lnreYuSi: can't open output file %s\n", new_name);
              exit(1);
           }
     }
  }

   /* LOAD SPECTRUM FILE */

   nranks = 0; n1 = 0; n2 = 0;
   if (header){
      fscanf(fpspectrum, "%s ", woord);  /* m */
      fscanf(fpspectrum, "%s ", woord);  /* Vm */
   }

   fprintf(stdout,"read header\n");
   fflush(stdout);

   while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
        nranks++;
        SPECTRUM[nranks][0] = (double) token;
        SPECTRUM[nranks][1] = (double) type;
        if (token == 1) n1 = (double) type;
        if (token == 2) n2 = (double) type;
        if (token == 3) n3 = (double) type;
        N+= (double) token * (double) type;
        V+= (double) type;
   }
   fprintf(stdout,"read spectrum\n");
   fflush(stdout);
   mmax = SPECTRUM[nranks][0];
   pstar = mmax/N;
   Nzero = N; Vzero = V;

   if ((forceV > 0) && (pstarmethod==1)) {
       fprintf(stdout, "forcing V and N, treating VZ as third parameter\n");
       pstarmethod = 0;
   }
   if (forceN > 0) {
       Nzero = (double) forceN;
       N = (double) forceN;
   }
   if (forceV > 0){
       Vzero = (double) forceV;
       V = (double) forceV;
   }

   /* DETERMINE THE PARAMETERS Z AND BETA */

   /* as a starting point: take the Zipfian model (beta=1): */

   if (zipfstart) { 
    if (forceV == 0) {
     Z = getZ2 (pstar, Nzero, Vzero, N); 
     VZ = Z / log (pstar * Z); 
     beta = 1;
     eV = expV(N);
     eV1 = expVm(1.0, N);
     eV2 = expVm(2.0, N);
    } else {
     fprintf(stdout, \
       "cannot use Zipfian starting point when V and N are forced\n");
     zipfstart = 0;
     fflush(stdout);
    }
   }

      /* tolerance for simplex minimization */
   tolerance = 0.0001;

   fprintf(stdout, "run downhill simplex minimization? (y/n) \n");
   fflush(stdout);
   scanf("%1s", &c);
   if (c=='y'){
    if (zipfstart){
     fprintf(stdout, "Initial values for minimization (Zipf):\n");
     fprintf(stdout, "   Z  = %10.4f  beta  = %10.2f VZ    = %12.2f  \n", \
                  Z, beta, VZ);
     fprintf(stdout, "   V  = %10.0f  E[V]  = %15.2f\n", V, eV);
     fprintf(stdout, "   V1 = %10.0f  E[V1] = %15.2f\n", n1, eV1);
     fprintf(stdout, "   V2 = %10.0f  E[V2] = %15.2f\n", n2, eV2);
     fprintf(stdout, "proceed (y), specify own starting point (o), or quit (q) ");
     scanf("%1s", &c);
     fflush(stdout);
     if (c=='q') exit(1);
    }
    else{
     /* fprintf(stderr, "specify starting point  "); */
     c = 'o';
    }
    if (c=='o'){
     if (pstarmethod) {
        fprintf(stdout, "specify Z and beta:  ");
        scanf("%lf %lf", &Z, &beta);
     } else {
        fprintf(stdout, "specify Z, beta, VZ:  ");
        scanf("%lf %lf %lf", &Z, &beta, &VZ);
     }
    }
    fprintf(stdout, "Change tolerance (%10.8f)? (y/n) ", tolerance);
    scanf("%1s", &c);
    if (c=='y'){
        fprintf(stdout, "specify tolerance: ");
        scanf("%lf", &tolerance);
    }

    fprintf(stdout, "Z = %f  beta = %f VZ = %f tolerance = %f\n",
              Z, beta, VZ, tolerance);
    fprintf(stdout, "N = %f V = %f\n", N, V);
    fflush(stdout);
    if (pstarmethod) {
        ndimensions = 2;
    } else {
        ndimensions = 3;
    }
    ndimensions1 = ndimensions + 1;

    /* use downhill simplex method, NumRecC, p. 305 */

    niterations = 0;
    freemem = 1;
    sim_mat = matrix (1, ndimensions + 1, 1, ndimensions);

    sim_mat[1][1] = Z; sim_mat[1][2] = beta; 
    sim_mat[2][1] = 0.7*Z; sim_mat[2][2] = beta-0.2; 
    sim_mat[3][1] = 1.7*Z; sim_mat[3][2] = beta+0.2; 
    if (pstarmethod==0) {
        sim_mat[1][3] = VZ;
        sim_mat[2][3] = 0.7*VZ;
        sim_mat[3][3] = 1.7*VZ;
        sim_mat[4][1] = 1.3*Z; 
        sim_mat[4][2] = beta+0.1; 
        sim_mat[4][3] = 1.3*VZ;
    }

    sim_vec = vector (1, ndimensions + 1);
    sim_yy = vector (1, ndimensions);

    if (pstarmethod==1) {
       fprintf(stdout, "preparing simplex optimization: ");
       fflush(stdout);
       for (i = 1; i <= ndimensions+1; i++){
         fprintf(stdout, ".");
         sim_yy[1] = sim_mat[i][1];
         sim_yy[2] = sim_mat[i][2];
         /* sim_vec[i] = sim_functie (sim_yy); */
         if (msemethod == 0) {
            sim_vec[i] = sim_functie(sim_yy);
         } else {
            sim_vec[i] = sim_functie_mse(sim_yy);
         }
       }
       fprintf(stdout, "\n");
    } else {
       fprintf(stdout, "preparing simplex optimization: ");
       fflush(stdout);
       for (i = 1; i <= ndimensions+1; i++){
         fprintf(stdout, ".");
         sim_yy[1] = sim_mat[i][1];
         sim_yy[2] = sim_mat[i][2];
         sim_yy[3] = sim_mat[i][3];
         /* sim_vec[i] = sim_functie2 (sim_yy); */
         if (msemethod == 0) {
            sim_vec[i] = sim_functie2(sim_yy);
         } else {
            sim_vec[i] = sim_functie2_mse(sim_yy);
         }
       }
       fprintf(stdout, "\n");
    }

    /* print_sim_matrix(sim_mat,ndimensions); */

    fprintf(stdout, "\nStarting simplex method for parameter estimation\n");
    fflush(stdout);
    if (pstarmethod == 1) {
       /*
       amoeba(sim_mat,sim_vec,ndimensions,tolerance,\
              sim_functie,&niterations,pstarmethod);
       */
      if (msemethod == 0) {
         amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie, \
               &niterations,pstarmethod);
      } else {
         amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie_mse, \
               &niterations,pstarmethod);
      }
    } else {
      /*
      amoeba(sim_mat,sim_vec,ndimensions,tolerance,\
             sim_functie2,&niterations,pstarmethod);
      */
      if (msemethod == 0) {
         amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie2, \
               &niterations,pstarmethod);
      } else {
         amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie2_mse, \
               &niterations,pstarmethod);
      }
    }
    fprintf(stdout, "\n");  /* ends report of simplex procedure */
    fflush(stdout);

    /* print_sim_matrix(sim_mat,ndimensions); */

    /* find minimum for which values are less than tolerance */
    miny = MAXX;
    for (i = 1; i <= ndimensions+1; i++){
            if (sim_vec[i] < miny){
                  j = i;
                  miny = sim_vec[i];
            }
    }
    if (pstarmethod) {
       Z = sim_mat[j][1]; beta = sim_mat[j][2]; 
       VZ = Z/(beta *log(pstar*Z));
    } else {
       Z = sim_mat[j][1]; beta = sim_mat[j][2]; VZ = sim_mat[j][3];
    }
   }
   else{
    simplex_flunked = 1;
   }

   if (simplex_flunked){   /* TRY MANUALLY */
     if (forceV == 0) {
        Z = getZ2 (pstar, Nzero, Vzero, N);
        VZ = Z / log (pstar * Z); 
        beta = 1;
        fprintf(stdout, "Zipf: Z = %10.4f, beta = %10.2f, VZ = %10.2f \n", \
            Z, beta, VZ);
        fflush(stdout);
     }

     again = 1;
     while (again){
       if (pstarmethod) {
          fprintf(stdout, "specify Z and beta\n ");
          scanf("%lf %lf ", &Z, &beta);
          VZ = Z/(beta *log(pstar*Z)); 
          eV = expV (N);
          eV1 = expVm (EINS_F, N);
          fprintf(stdout, \
             "V       =    %10.2f   V1      =   %10.2f\n", V, n1);
          fprintf(stdout, \
             "E[V]    =    %10.2f   E[V1]   =   %10.2f\n", eV, eV1);
       } else {
          fprintf(stdout, "specify Z, beta, and VZ\n ");
          scanf("%lf %lf %lf", &Z, &beta, &VZ);
          /* VZ = Z/(beta *log(pstar*Z));  */
          eV = expV(N);
          eV1 = expVm(1.0, N);
          eV2 = expVm(2.0, N);
          fprintf(stdout, \
    "V       =    %10.2f   V1      =   %10.2f V2    = %10.2f\n", V, n1, n2);
          fprintf(stdout, \
    "E[V]    =    %10.2f   E[V1]   =   %10.2f E[V2] = %10.2f\n", eV, eV1, eV2);

       }
       fprintf(stdout, "reestimate (r), continue (c), or quit (q)? ");
       scanf("%s", &c);
       if (c=='q') exit(1);
       if (c!='r') again=0;
     }
   }
   fflush(stdout);

   /* AND CALCULATE E[V(N)] AND E[V(m,N)] ;  NOTE: S IS INFINITE */

   fprintf(stdout, "estimated parameters: Z = %f  beta = %f  VZ = %f\n", Z, beta, VZ);

   eV = expV(Nzero);

   if (skip>0) {
    fprintf(stdout, "\ncomputing EVm for m=1..%d\n", skip);
    fflush(stdout);
    fprintf(fullspc, "m EVm\n");
    for (i = 1; i <= skip; i++) {
      vm = expVm((double) i, Nzero );
      if (i == 1) eV1 = vm;
      if (i == 2) eV2 = vm;
      if (i == 3) eV3 = vm;
      fprintf(fullspc, "%10d %15.4f\n", i, vm);
      fprintf(stdout, "[%d]\n", i); fflush(stdout);
    }
    fprintf(stdout, "  Z  = %10.4f beta  = %15.4f  VZ = %10.4f\n", Z, beta, VZ);
    fprintf(stdout, "  V  = %15.2f E[V]  = %15.2f\n", V, eV);
    fprintf(stdout, "  V1 = %15.2f E[V1] = %15.2f\n", n1, eV1);
    fprintf(stdout, "  V2 = %15.2f E[V2] = %15.2f\n", n2, eV2);
    fprintf(stdout, "  V3 = %15.2f E[V3] = %15.2f\n", n3, eV3);
    fflush(stdout);
    fclose(fullspc);
    fprintf(stdout, "MSE(%d+2) = %10.4f\n", msenranks, getmse());
    exit(1);
   }
   /* fprintf(stdout, "MSE(%d+2) = %10.4f\n", msenranks, getmse());*/

   if (kfile > 0) {
     Nzero = (double) kfile;
     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     fprintf(stdout, "computing interpolation+extrapolation statistics\n");
     fflush(stdout);
     fprintf(fpKvalues, "       N       EV      EV1      EV2      EV3      EV4      EV5       GV\n");
     for (k = 1; k <= nchunks; k++){
        fprintf(stdout, "[%d]\n", k);
        fflush(stdout);
        x = expV(CHUNKS[k]);
        x1 = expV(CHUNKS[k]+1.0);
        y = expVm(1.0, CHUNKS[k]);
        y2 = expVm(2.0, CHUNKS[k]);
        y3 = expVm(3.0, CHUNKS[k]);
        y4 = expVm(4.0, CHUNKS[k]);
        y5 = expVm(5.0, CHUNKS[k]);
        fprintf(fpKvalues, 
           "%15.2f %15.2f %15.2f %15.2f %15.2f %15.2f %15.2f %15.4f\n", 
           CHUNKS[k], x, y, y2, y3, y4, y5, x1-x);
     }
     fprintf(stdout, "\n");
     fflush(stdout);
     fclose(fpKvalues);

     exit(1);
   }

   fprintf(stdout, "\ncomputing expected spectrum at N\n");
   fflush(stdout);

   S = NULL_F;
   for (i = 1; i <= maxrank; i++) {
     fprintf(stdout, "[%d]\n", i);
     fflush(stdout);
     SPECTRUM[i][2] = expVm((double) i, Nzero);
   }
   fprintf(stdout, "\n");
   fflush(stdout);

   /* PRINT SUMMARY */

   fprintf(fpsum, "Yule-Simon Zipfian model for %s", *argv);
   if (msemethod == 0) {
     fprintf(fpsum, " (parameter estimation using E[V(N)] and E[V(1,N)])\n");
   } else {
     fprintf(fpsum, " (parameter estimation using mse method with %d ranks)\n",\
                msenranks);
   }
   fprintf(fpsum, "N         = %12d\n", (int) N);
   fprintf(fpsum, "V(N)      = %12d\n", (int) V);
   fprintf(fpsum, "E[V(N)]   = %12.4f\n", eV);
   fprintf(fpsum, "V(1,N)    = %12d\n", (int) n1);
   fprintf(fpsum, "E[V(1,N)] = %12.4f\n", SPECTRUM[1][2]);
   fprintf(fpsum, "V(2,N)    = %12d\n", (int) SPECTRUM[2][1]);
   fprintf(fpsum, "E[V(2,N)] = %12.4f\n", SPECTRUM[2][2]);
   if (beta < 1){
       fprintf(fpsum, "S         = infinite\n");
   }
   else{
       S = VZ * beta/(beta-1.0);
       fprintf(fpsum, "S         = %12.5f\n", S);
   }
   fprintf(fpsum, "Z         = %15.8f\n",   Z);
   fprintf(fpsum, "beta      = %15.10f\n",  beta);
   fprintf(fpsum, "VZ        = %15.2f\n",   VZ);
   fclose(fpsum);
   fprintf(stdout, "MSE(%d+2) = %10.4f\n", msenranks, getmse());

   /* WARNING: any changes in this summary file should be updated in
      ad2YuSi, which depends on exactly this number of lines and header */

   /* PRINT SPECTRUM */

   fprintf(fpexpspect, "         m         Vm        EVm     alphaM    EalphaM\n");
   for (i = 1; i <= maxrank; i++) {
    fprintf(fpexpspect, "%10d %10d ",(int) SPECTRUM[i][0],(int) SPECTRUM[i][1]);
    fprintf(fpexpspect, "%15.4f %15.4f %15.4f\n", SPECTRUM[i][2],
           SPECTRUM[i][1]/Vzero, SPECTRUM[i][2]/eV);
   }
   fclose(fpexpspect);

   /* PRINT SPECTRUM AT 2N */

   fprintf(stdout, "computing expected spectrum at 2N\n");
   fflush(stdout);
   N = 2 * Nzero;
   eV2N = expV(N);
   fprintf(fpexpspect2N, "         m      EVm2N\n");
   for (i = 1; i <= 2 * maxrank; i++){
     fprintf(stdout, "[%d]\n", i);
     fflush(stdout);
     fprintf(fpexpspect2N, "%10d %15.2f\n", i, expVm((double) i, N));
   }
   fprintf(stdout, "\n");
   fflush(stdout);
   fclose(fpexpspect2N);

   /* PRINT VOCABULARY SIZES */

   fprintf(fpVN, "       V       EV     EV2N\n");
   fprintf(fpVN, "%15.2f %15.2f %15.2f\n", V, eV, eV2N);
   fclose(fpVN);

   /* INTERPOLATION */

   if (nchunks > 0){

     /* CALCULATE THE TEXT CHUNKS */

     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     /* AND PRINT THE CORRESPONDING STATISTICS */

     fprintf(stdout, "computing interpolation statistics\n");
     fflush(stdout);
     fprintf(fpint, "       N       EV      EV1      EV2      EV3      EV4      EV5       GV\n");
     for (k = 1; k <= nchunks; k++){
        fprintf(stdout, "[%d]\n", k);
        fflush(stdout);
        x = expV(CHUNKS[k]);
        x1 = expV(CHUNKS[k]+1.0);
        y = expVm(1.0, CHUNKS[k]);
        y2 = expVm(2.0, CHUNKS[k]);
        y3 = expVm(3.0, CHUNKS[k]);
        y4 = expVm(4.0, CHUNKS[k]);
        y5 = expVm(5.0, CHUNKS[k]);
        fprintf(fpint, 
           "%15.2f %15.2f %15.2f %15.2f %15.2f %15.2f %15.2f %15.4f\n", 
           CHUNKS[k], x, y, y2, y3, y4, y5, x1-x);
     }
     fprintf(stdout, "\n");
     fflush(stdout);

   }

   /* EXTRAPOLATION */
  
   if (E == NULL_F) {  /* extrapolate to 2N */
     fprintf(stdout, "computing extrapolation statistics to 2N\n");
     fflush(stdout);
     fprintf(fpext, "         N         EV        EV1      EV2      EV3      EV4      EV5\n");

     for (k = 1; k <= nchunks; k++){
        fprintf(stdout, "[%d]\n", k);
        fflush(stdout);
        x = expV(Nzero+CHUNKS[k]);
        y = expVm(1.0, Nzero+CHUNKS[k]);
        y2 = expVm(2.0, Nzero+CHUNKS[k]);
        y3 = expVm(3.0, Nzero+CHUNKS[k]);
        y4 = expVm(4.0, Nzero+CHUNKS[k]);
        y5 = expVm(5.0, Nzero+CHUNKS[k]);
        fprintf(fpext, "%15.2f %15.2f %15.4f ", Nzero+CHUNKS[k],  x, y);
        fprintf(fpext, "%15.2f %15.2f %15.2f %15.2f\n", y2, y3, y4, y5);
     }
     fprintf(stdout, "\n");
     fflush(stdout);
   }
   else{

     /* FIND NEW CHUNKSIZES */

     fprintf(stdout, "computing extrapolation statistics to E\n");
     chunksize = floor((E-Nzero)/(enchunks*1.0));
     remainDer = (E-Nzero) - ((enchunks*1.0) * chunksize);
     for (k = 1; k <= enchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++)  CHUNKS[k]++;
     for (k = 2; k <= enchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     /* PRINT THE GROWTH CURVE */

	 fprintf(stdout, "computing extrapolation statistics to 2N\n");
     fflush(stdout);
     fprintf(fpext, "         N         EV        EV1      EV2      EV3      EV4      EV5\n");

     for (k = 1; k <= enchunks; k++){
        fprintf(stdout, "[%d]\n", k);
        fflush(stdout);
        x = expV(Nzero+CHUNKS[k]);
        y = expVm(1.0, Nzero+CHUNKS[k]);
        y2 = expVm(2.0, Nzero+CHUNKS[k]);
        y3 = expVm(3.0, Nzero+CHUNKS[k]);
        y4 = expVm(4.0, Nzero+CHUNKS[k]);
        y5 = expVm(5.0, Nzero+CHUNKS[k]);
        fprintf(fpext, "%15.2f %15.2f %15.4f ", Nzero+CHUNKS[k],  x, y);
        fprintf(fpext, "%15.2f %15.2f %15.2f %15.2f\n", y2, y3, y4, y5);
     }
     fprintf(stdout, "\n");
     fflush(stdout);


     /* AND SHOW THE SPECTRUM AT E */

     eV2N = expV(E);
     fprintf(fpE, "         m      EVmXN\n");
     for (i = 1; i <= maxrank; i++){
       fprintf(fpE, "%10d %15.2f\n", i, expVm((double) i, E));
     }
   }

   if (freemem == 1) {
      free_matrix (sim_mat, 1, ndimensions + 1, 1, ndimensions);
      free_vector (sim_vec, 1, ndimensions + 1);
      free_vector (sim_yy, 1, ndimensions);
   }

   return (0);
} /* end of main */



double expV (n)
double n;
{
  N = n;
  t = N / Z;
  return (qromb (funcV, L_BOUND, u_bound) * VZ * beta); 
}

double expVm (m, n)
double m, n;  
{
  mrank = m;
  N = n;
  t = N / Z;
  logt = log (t);
  return (qromb (funcVm, L_BOUND, u_bound) * VZ * beta); 
}


double funcVm(x)
double x;
{
  return( (exp(mrank*logt) * x) / (exp((mrank+1.0)*log(t+x)) * exp(beta*log(1+x))));
}

double funcV(x)
double x;
{
  return( t / ((t+x) * exp(beta*log(1+x))));
}


void help ()
{
  fprintf (stderr,"lnreYuSi text.spc\n");
  fprintf (stderr,"OPTIONS:\n");
  fprintf (stderr,"     -h: display help\n");
  fprintf (stderr,"     -m: number of ranks in fit (default: 15)\n");
  fprintf (stderr,"     -k: number of chunks for interpolation (default: 20)\n");
  fprintf (stderr,"     -K: number of chunks for extrapolation (default: 20)\n");
  fprintf (stderr,"     -E: extrapolation sample size (default: 2N)\n");
  fprintf (stderr,"     -H: input files lack header (default: with header)\n");
  fprintf (stderr,"     -P: force the use of the use of p* to estimate VZ\n");
  fprintf (stderr,"     -N: force N to specified value\n");
  fprintf (stderr,"     -V: force V to specified value\n");
  fprintf (stderr,"     -s: compute first s spectrum elements and quit\n");
  fprintf (stderr,"     -e: use mse on first -e ranks in simplex cost function\n");
  fprintf (stderr,"INPUT:\n");
  fprintf (stderr,"     text.spc:  m Vm\n");
  fprintf (stderr,"OUTPUT:\n");
  fprintf (stderr,"     text_Y.spc:  expected spectrum\n");
  fprintf (stderr,"     text_Y.fsp:  expected spectrum (-s option only)\n");
  fprintf (stderr,"     text_Y.sp2:  expected spectrum at 2N\n");
  fprintf (stderr,"     text_Y.ev2:  E[V(N)] and E[V(2N)]\n");
  fprintf (stderr,"     text_Y.sum:  summary, fitted parameters\n");
  exit (1);
}


void print_sim_matrix(m, ndim)
double **m;
int ndim;
{
 int i, j;
 for (i = 1; i <= ndim+1; i++){
   for (j = 1; j <= ndim; j++){
       fprintf(stderr,"%15.2f ", m[i][j]);
   }
   fprintf(stderr, "%4.2f ", sim_vec[i]);
   Z = m[i][1]; beta = m[i][2];
   VZ = Z/(beta *log(pstar*Z));
   eV = expV(N);
   eV1 = expVm(1.0, N);
   fprintf(stderr, "eV = %6.2f (V = %6.0f)  eV1 = %6.2f (V1 = %6.0f)\n", eV, V, eV1, n1);
 }
}

double sim_functie(x)
double *x;
{
   double xx;

   Z = x[1]; beta = x[2]; 
   if (Z < NULL_F) Z=5;
   if (beta < NULL_F) beta = 0.01;
   VZ = Z/(beta *log(pstar*Z));
   eV = expV(N);
   eV1 = expVm(1.0, N);
   xx = fabs(V-eV)+fabs(n1-eV1);
   return(xx);
}

double sim_functie_mse(x)
double *x;
{
   extern double getmse();
   double simMSE;

   Z = x[1]; beta = x[2]; 
   if (Z < NULL_F) Z=5;
   if (beta < NULL_F) beta = 0.01;
   VZ = Z/(beta *log(pstar*Z));

   /* calculate MSE for the first msenranks given the parameter values in x */
   simMSE = getmse();
   return(simMSE);
}

double sim_functie2(x)
double *x;
{
   double xx;

   Z = x[1]; beta = x[2]; VZ = x[3];
   if (Z < NULL_F) Z=5;
   if (beta < NULL_F) beta = 0.01;
   if (VZ < 1) VZ = 1.0;
   eV = expV(N);
   /* fprintf(stderr, "sim_functie2: eV = %f\n", eV); */
   eV1 = expVm(1.0, N);
   /* fprintf(stderr, "sim_functie2: eV1 = %f\n", eV1); */
   eV2 = expVm(2.0, N);
   /* fprintf(stderr, "sim_functie2: eV2 = %f\n", eV2); */
   xx = fabs(V-eV)+fabs(n1-eV1)+fabs(n2-eV2);

   return(xx);
}

double sim_functie2_mse(x)
double *x;
{
   extern double getmse();
   double simMSE;

   Z = x[1]; beta = x[2]; VZ = x[3];
   if (Z < NULL_F) Z=5;
   if (beta < NULL_F) beta = 0.01;
   if (VZ < 1) VZ = 1.0;

   /* calculate MSE for the first msenranks given the parameter values in x */
   simMSE = getmse();
   /*fprintf(stderr, "sim_functie2_mse: mse = %10.4f\n", simMSE);*/
   return(simMSE);
}

double getVn (r) 
int r;
{
   int i;
   for (i = r; (int) SPECTRUM[i][0] > r; i--) ;
   if ((int) SPECTRUM[i][0] == r) return(SPECTRUM[i][1]);
   else return(0.0);
}

double getmse (){
   extern double getVn ();
   
   double som, som2, esom2, EVn, Vn, x, y, EV;
   int i;
   
   som = 0.0; som2 = 0.0; esom2 = 0.0; x = 0.0; y = 0.0; EVn = 0.0; Vn = 0.0;

   for (i = 1; i <= msenranks; i++) {
        /*
        fprintf(stderr, "%f - %f = %f\n", EVn, Vn, EVn - Vn);
        */
        EVn = expVm ((double)i, N);
        Vn = getVn(i);
        som2 += Vn; esom2 += EVn;
        som += (EVn-Vn)*(EVn-Vn);
   }
   EV = expV(N);
   x = EV - esom2; y = V - som2;
   som += (x-y)*(x-y);
        /*
        fprintf(stderr, "%f - %f = %f\n", x, y, x-y);
        */
   som += (EV-V)*(EV-V);
        /*
        fprintf(stderr, "%f - %f = %f\n", EV, V, EV-V);
        fprintf(stderr, "mse = %10.4f\n", som/((double)(msenranks+2)));
        */
   return(som/((double)(msenranks+2)));
}
