/*
 *	File: ad2Sgam.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *
 *      History:
 *
 *      - jul 1997, version 1.0
 *
 *      Description:
 *
 *      Sichel's three-parameter model
 *	with option of Labbe-Hubert adjustment
 *
 *  LAST MODIFICATION NOV 24, 1999
 */

#include <stdio.h>
#include <stdlib.h>        
#include <string.h>
#include <errno.h>          
#include <malloc.h>
#include <math.h>          
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

/* basic functions for the inverse Gauss-Poisson model */

extern double	alpha1 ();
extern double	alpha2 ();
extern double	expV ();
extern double	getS ();
extern double	alphaRecur ();
extern double	bessel2 ();
extern void	triplet ();
extern double	gammln ();

/* functions for numerical procedures */

extern double	adjustV ();
extern double	adjustVm ();
extern double	getlexprop ();

extern double	sim_functie ();
extern void	amoeba ();
extern double	**matrix ();
extern double	*vector ();
extern void	free_vector ();
extern void	free_matrix ();

/* argument reading, file manipulation, and help function */

extern int	leesgetal ();
extern void	change_extension ();
extern void	getGaussParam ();
extern void	help ();


/* GLOBAL VARIABLES */

double    N, V, n1, n2, n3, n4, n5,      /* number of tokens, types, hapaxes, disleg */
         E,                             /* extrapolation sample size */
         SPECTRUM[MAXM3][2],            /* frequency spectrum m Vm */
         alphamin1, alphamin2,
         alpha, Gamma, theta, b, c, Z,  /* parameters of the model */
         ALPHA1N[MAXM3],                /* array for relative spectrum V(m,N)/V(N) */
         ALPHA2N[MAXM3],                /* array for relative spectrum at 2N */
         Nzero, Vzero,                  /* original sample size N0, idem voc. */
         V1zero,                        /* exp. V1 at Nzero */
         eV, eV2N, S, eV1, eV2,         /* E[V(N)], E[V(2N)], S */
         eV3, eV4, eV5, eVorig,
         x, x1, y, y2, y3, y4, y5,
         lexprop,                       /* Labbe-Hubert lex.spec parameter */
         mse,                           /* MSE for specialization fit */
         errorV,                        /* E[V(N)] - V(N)  when w1 == 0 */
         eV2Nadj,                       /* eV2N adjusted for errorV at 2N */
         eVadj,                         /* eV adjusted for error at Nk */
         eVadj1,                        /* eV adjusted for error at Nk+1 */
         w1,w2,w3,w4,w5,w6,             /* similarity weights */
         OBSERVED[MAXCHUNKS5+1][6],     /* V and V1..5 */
         CHUNKS[MAXCHUNKS5],            /* the chunk sizes */
         chunksize, remainDer,          /* chunk variables */
         **sim_mat,                     /* for simplex minimization */
         *sim_vec,
         *sim_yy,
         miny,
	 tolerance;

FILE     *fpspectrum,                   /* fpspectrum: datafile: m, Vm */
         *fpexpspect,                   /* expected spectrum */
         *fpexpspect2N,                 /* spectrum at 2N */
         *fpVN,                         /* file with E[V(N)] and E[V(2N)] */
         *fpsum,                        /* file with summary of model */
         *fpint,                        /* interpolation results */
         *fpext,                        /* extrapolation results */
         *fpE,                          /* spectrum at sample size E */
         *fpprofile,                    /* for reading text.obs */
         *fullspc,		        /* spectrum with m EVM for m=1..skip */
         *fpKvalues;                    /* list N_k for k = 1..K, K+1,..,2K */

int      nranks,                        /* number of different ranks in spectrum */
         maxrank,                       /* largest rank for fit, default 15 */
         i, j,            
         header,                        /* boolean for presence of header */
         k,                             /* variable for chunks */
         nchunks,                       /* number of chunks for interpolation */
         enchunks,                      /* number of chunks for extrapolation */
         token, type,                   /* auxiliary variables for scanf */
         ndimensions,                   /* for simplex downhill method */
         ndimensions1,                  /* for simplex downhill method */
         niterations,                   /* for simplex downhill method */
         simplex_flunked,               /* idem */
         dolexprop,                     /* carry out LH adjustment */
         lexpropviaV,                   /* estimate lexprop via V */
         again,                         /* for manual parameter estimation */
         fiddleWithWeights,             /* fiddleWithWeights */
	     skip,			        /* only print spectrum for m=1..skip */
		 withSimplex,                   /* boolean, 1 if simplex method used */
         aantal;                        /* for command line options */

char     woord[MAXWORDLENGTH],          /* variable for skipping header in fscanf */
         new_name[MAXWORDLENGTH],       /* variables for extension handling    */
         base_name[MAXWORDLENGTH],
         cc,                            /* for scanf () */
         *fs;                            /* variable for scanning options */


/* MAIN () */

int main (argc, argv)
int     argc;
char    *argv[];

{
   /* DEFAULTS */

   maxrank = DEF_MAXRANK;
   nchunks = DEF_CHUNKS;
   enchunks = DEF_CHUNKS;
   header = 1;
   dolexprop = 1;  /* carry out LH adjustment */
   lexprop = 0;
   lexpropviaV = 1;
   skip = 0;
   fiddleWithWeights = 0;  
   E = NULL_F;
   withSimplex = 0;
   w1 = 1.0; w2 = 1.0; w3 = 1.0; w4 = NULL_F; w5 = NULL_F; w6 = NULL_F;
   
   /* COMMAND LINE OPTIONS */

   while ((--argc > 0) && ((*++argv)[0] == '-')) {
        for (fs = argv[0] + 1; *fs != '\0'; fs++) {
            switch (*fs) {
            case 'h':
                help();
                break;
            case 'E':
                i =  leesgetal (fs, &aantal);
                E = (double) i;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'k':
                nchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'K':
                enchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'm':
                maxrank = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'L':  
                dolexprop = 0;  /* no Labbe-Hubert adjustment */
                break;
            case 'f':  
                fiddleWithWeights = 1;  
                break;
            case 'v':  
                lexpropviaV = 0;  /* use hapaxes to estimate lexprop */
                break;
            case 's':      /* don't interpolate or extrapolate */
                           /* show m and Vm and EVm for m=1..s */
                skip = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                if (skip == 0) {
                   fprintf(stderr, "ad2Sgam: cannot skip with zero rank\n");
                   exit(1);
                }
                break;
            case 'H':      /* input files without headers! */
                header = 0;
                break;
            default:
                fprintf(stderr, "ad2Sgam: illegal option %c\n", *fs);
                exit(1);
                break;
            }
        }
   } /* of while */

   /* FILE HANDLING */

   if (argc == 0) help();

   /* load input spectrum, should have .spect extension */

   if ((fpspectrum = fopen(*argv, "r")) == NULL) {
       fprintf(stderr, "ad2Sgam: can't open %s\n", *argv);
       exit(1);
   }

   /* file name handling output files */
 
   strncpy(base_name, *argv, strlen(*argv) - 4);

   if (dolexprop == 0){
       change_extension (base_name, new_name, "_G.spc");
   }
   else{
       change_extension (base_name, new_name, "_bG.spc");
   }
   if ((fpexpspect = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
      exit(1);
   }

   if (dolexprop == 0){
       change_extension (base_name, new_name, "_G.sp2");
   }
   else{
       change_extension (base_name, new_name, "_bG.sp2");
   }
   if ((fpexpspect2N = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
      exit(1);
   }

   if (dolexprop == 0){
       change_extension (base_name, new_name, "_G.ev2");
   }
   else{
       change_extension (base_name, new_name, "_bG.ev2");
   }
   if ((fpVN = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
      exit(1);
   }

   if (dolexprop == 0){
       change_extension (base_name, new_name, "_G.sum");
   }
   else{
       change_extension (base_name, new_name, "_bG.sum");
   }
   if ((fpsum = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
      exit(1);
   }

   if (dolexprop == 0){
       change_extension (base_name, new_name, "_G.int");
   }
   else{
       change_extension (base_name, new_name, "_bG.int");
   }
   if ((fpint = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
      exit(1);
   }

   if (dolexprop == 0){
       change_extension (base_name, new_name, "_G.ext");
   }
   else{
       change_extension (base_name, new_name, "_bG.ext");
   }
   if ((fpext = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
      exit(1);
   }

   if (E > NULL_F){
     if (dolexprop == 0){
         change_extension (base_name, new_name, "_G.sp3");
     }
     else{
         change_extension (base_name, new_name, "_bG.sp3");
     }
     if ((fpE = fopen(new_name, "w")) == NULL){
        fprintf(stderr, "ad2Sgam: can't open output file %s\n", new_name);
        exit(1);
     }
   } else {
    if (skip > 0 ) {
        change_extension (base_name, new_name, "_G.fsp");
        if ((fullspc = fopen(new_name, "w")) == NULL){
           fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
           exit(1);
        }
	/* CHANGE HERE - THIS BIT ADDED */

       change_extension (base_name, new_name, "_G.sum");
       if ((fpsum = fopen(new_name, "w")) == NULL){
          fprintf(stderr, "lnreSgam: can't open output file %s\n", new_name);
          exit(1);
       }
    } else {
        change_extension (base_name, new_name, "_G.iex");
        if ((fpKvalues = fopen(new_name, "w")) == NULL){
              fprintf(stderr,"lnreSgam: can't open output file %s\n", new_name);
              exit(1);
        }
    }
   }

   /* LOAD SPECTRUM FILE */

   fprintf(stdout, "loading spectrum\n");

   nranks = 0; n1 = 0; n2 = 0;
   if (header){
      fscanf(fpspectrum, "%s ", woord);  /* m */
      fscanf(fpspectrum, "%s ", woord);  /* Vm */
   }
   while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
        nranks++;
        SPECTRUM[nranks][0] = (double) token;
        SPECTRUM[nranks][1] = (double) type;
        if (token == 1) n1 = type;
        if (token == 2) n2 = type;
        if (token == 3) n3 = type;
        if (token == 4) n4 = type;
        if (token == 5) n5 = type;
        N+= (double) token * (double) type;
        V+= (double) type;
   }

   /* DETERMINE THE PARAMETERS OF THE MODEL */

   getGaussParam (N, V, n1, &alpha, &theta, &b, &c); 
   if ((c == 0)&&(b==0)){
      fprintf(stdout, "ad2Sgam: no solution for Gamma=-0.5\n");
      Z = 0;
   }
   else{
      Z = 1.0/c;
      Gamma = -0.5;
   }
   Nzero = N; Vzero = V;

   triplet ();
   
   /* PARAMETER ESTIMATION */

   /* use downhill simplex method, NumRecC, p. 305 */

   ndimensions = 3;  /* specific for full Sichel model */
   ndimensions1 = ndimensions+1;
   tolerance = 0.0001;
   niterations = 0;


   fprintf(stdout, "downhill simplex minimization? (y/n) \n");
   scanf ("%1s", &cc);
   if (cc != 'y'){
      simplex_flunked = 1;
   } else {
	withSimplex = 1;
    sim_mat = matrix (1, ndimensions + 1, 1, ndimensions);

    fprintf(stdout, "Initial values for minimization:\n");
    fprintf(stdout, "   Z  = %10.4f   b     = %14.12f   gamma     = %10.4f\n", \
            Z, b, Gamma);
    fprintf(stdout, "   V  = %10.0f   E[V]  = %15.2f\n", V, eV);
    fprintf(stdout, "   V1 = %10.0f   E[V1] = %15.2f\n", n1, eV1);
    fprintf(stdout, "   V2 = %10.0f   E[V2] = %15.2f\n", n2, eV2);
    fprintf(stdout, "   V3 = %10.0f   E[V3] = %15.2f\n", n3, eV3);
    fflush(stdout);
    fprintf(stdout,"proceed (y), specify own starting point (o), or quit (q) ");
    scanf("%1s", &cc);
    if (cc == 'q') exit(1);
    if (cc=='o'){
     fprintf(stdout, "specify Z, b, and gamma: ");
     scanf("%lf %lf %lf", &Z, &b, &Gamma);
    }
    fprintf(stdout, "tolerance = %f, change? (y/n) ", tolerance);
    scanf("%1s", &cc);
    if (cc=='y') {
        fprintf (stdout, "specify tolerance: ");
        scanf ("%lf", &tolerance);
    }
    fflush(stdout);

    sim_mat[1][1] = Z;    sim_mat[1][2] = b;         sim_mat[1][3] = Gamma;
    sim_mat[2][1] = 0.7*Z;sim_mat[2][2] = b-(0.2*b); sim_mat[2][3] = Gamma-0.1;
    sim_mat[3][1] = 1.7*Z;sim_mat[3][2] = b+(0.2*b); sim_mat[3][3] = Gamma+0.1;
    sim_mat[4][1] = 1.3*Z;sim_mat[4][2] = b+(0.1*b); sim_mat[4][3] = Gamma+0.02;

    sim_vec = vector (1, ndimensions + 1);
    sim_yy = vector (1, ndimensions);

    for (i = 1; i <= ndimensions+1; i++){
      sim_yy[1] = sim_mat[i][1];
      sim_yy[2] = sim_mat[i][2];
      sim_yy[3] = sim_mat[i][3];
      sim_vec[i] = sim_functie (sim_yy);
    }

    fprintf (stdout, "\nStarting simplex method for parameter estimation\n");
    fflush (stdout);
    amoeba (sim_mat, sim_vec, ndimensions, tolerance, sim_functie, &niterations, 0);

    /* find minimum for which values are less than tolerance */

    if (simplex_flunked != 2) {
      miny = 100000000.0;
      for (i = 1; i <= ndimensions+1; i++){
            if (sim_vec[i] < miny){
                  j = i;
                  miny = sim_vec[i];
            }
      }
      Z = sim_mat[j][1]; b = sim_mat[j][2]; Gamma = sim_mat[j][3];
	}

   if (fiddleWithWeights == 1) {
    fprintf(stdout, "change standard weights? (y/n) ");
    scanf("%1s", &cc);
    if (cc=='y') {
        fprintf(stdout, "specify w1 w2 w3 w4 w5 w6  ");
        scanf("%lf %lf %lf %lf %lf %lf", &w1, &w2, &w3, &w4, &w5, &w6);
        if (w1!= 1.0) lexpropviaV = 0;
    }
   }

   c = 1.0/Z;
   triplet ();
   fprintf(stdout, "\n   Z  = %10.4f  b     = %14.12f  gamma     = %10.4f\n", \
           Z, b, Gamma);
   fprintf(stdout, "   V  = %10.0f  E[V]  = %15.2f\n", V, eV);
   fprintf(stdout, "   V1 = %10.0f  E[V1] = %15.2f\n", n1, eV1);
   fprintf(stdout, "   V2 = %10.0f  E[V2] = %15.2f\n", n2, eV2);
   fprintf(stdout, "   V3 = %10.0f  E[V3] = %15.2f\n", n3, eV3);
   fprintf(stdout, "   V4 = %10.0f  E[V4] = %15.2f\n", n4, eV4);
   fprintf(stdout, "   V5 = %10.0f  E[V5] = %15.2f\n", n5, eV5);
 } 

 if (simplex_flunked){   /* TRY MANUALLY */
     getGaussParam (N, V, n1, &alpha, &theta, &b, &c);
     if ((c == 0)&&(b==0)){
         fprintf(stdout, "ad2Sgam: no solution for Gamma=-0.5\n");
         fflush(stdout);
         Z = 0;
     }
     else{
         Z = 1.0/c;
         Gamma = -0.5;
         triplet ();
         fprintf(stdout, \
                 "   Z  = %10.4f  b     = %14.12f  gamma     = %10.2f\n", \
                 Z, b, -0.5);
         fprintf(stdout, "V    = %15.2f  V1    = %15.2f  V2    = %15.2f\n", \
                         V, n1, n2);
         fprintf(stdout, "E[V] = %15.2f  E[V1] = %15.2f  E[V2] = %15.2f\n", \
                         eV, eV1, eV2);
         fflush(stdout);
     }

     again = 1;
     if (simplex_flunked==2) {
       again = 0;
     }
     while (again){
        fprintf(stdout, "specify Z, b, and gamma\n ");
        scanf("%lf %lf %lf", &Z, &b, &Gamma);
        triplet ();
        fprintf(stdout, "Summary of main accuracy statistics:\n");
        fprintf(stdout, "V    = %15.2f  V1    = %15.2f  V2    = %15.2f\n", \
                         V, n1, n2);
        fprintf(stdout, "E[V] = %15.2f  E[V1] = %15.2f  E[V2] = %15.2f\n", \
                         eV, eV1, eV2);
        fprintf(stdout, \
                 "   Z  = %10.4f  b     = %14.12f  gamma     = %10.8f\n", \
                 Z, b, Gamma);
        fflush(stdout);
        fprintf(stdout, "reestimate (r), continue (c), or quit (q)? ");
        scanf("%s", &cc);
        if (cc=='q') exit(1);
        if (cc!='r') again=0;
     }
   }
 
   /* CONSTRUCT RELATIVE FREQUENCY SPECTRUM FOR THE FIRST maxrank RANKS */
 
   triplet ();
   errorV = eV - V;

   ALPHA1N[1] = alpha1 ();
   ALPHA1N[2] = alpha2 (ALPHA1N[1]);
   for (i = 3; i <= maxrank; i++){
      ALPHA1N[i] = alphaRecur ( (double) i, ALPHA1N[i-1], ALPHA1N[i-2]);
   }

   /* AND CALCULATE E[V(N)] and S */

   Nzero = N;    /* take present sample size as Nzero */

   if (dolexprop==1){ /* read observed profile */

     change_extension (base_name, new_name, ".obs");

     if ((fpprofile = fopen(new_name, "r")) == NULL) {
       fprintf(stderr, "ad2Sgam: can't open file %s\n", new_name);
       exit(1);
     }

     fprintf(stdout, "reading %s.obs\n", base_name);
     if (header){
         for (i = 1; i <= NMEASURES+1; i++){
            if (fscanf(fpprofile, "%s ", woord) == EOF){
              fprintf(stderr, "incomplete header for %s.obs\n", base_name);
            }
         }
     }
     for (i = 1; i <= nchunks; i++){
       if (fscanf(fpprofile, "%d ", &type) != EOF){
           ;
       }
       else{
           fprintf(stderr, 
           "%s.obs is incompatible with required number of chunks\n", base_name);
           exit(1);
       }
       for (j = 2; j <= NMEASURES+1; j++){
           if (fscanf(fpprofile, "%lf ", &x) != EOF){
               switch (j){
                 case 4:
                   OBSERVED[i][0] = x;
                   break;
                 case 5:
                   OBSERVED[i][1] = x;
                   break;
                 case 6:
                   OBSERVED[i][2] = x;
                   break;
                 case 7:
                   OBSERVED[i][3] = x;
                   break;
                 case 8:
                   OBSERVED[i][4] = x;
                   break;
                 case 9:
                   OBSERVED[i][5] = x;
                   break;
                 default:
                   break;
              }
           }
           else{
             fprintf (stderr, "%s.obs is incompatible with required number of chunks\n",
                              base_name);
             exit (1);
           }
       }
    }

    /* determine optimal specialization proportion */
    fprintf (stdout, "starting estimation of p\n");
    lexprop = getlexprop ();
   }

   /* PRINT SUMMARY */

   fprintf(fpsum, "Full Inverse Gauss-Poisson model for %s\n", *argv);
   fprintf(fpsum, "N         = %12d\n", (int) N);
   fprintf(fpsum, "V(N)      = %12d\n", (int) V);
   fprintf(fpsum, "E[V(N)]   = %12.2f\n", eV - errorV);
   eVorig = eV - errorV;
   fprintf(fpsum, "V-error   = %12.2f\n", errorV);
   fprintf(fpsum, "V(1,N)    = %12d\n", (int) n1);
   fprintf(fpsum, "E[V(1,N)] = %12.2f\n", eV1);
   fprintf(fpsum, "V(2,N)    = %12d\n", (int) n2);
   fprintf(fpsum, "E[V(2,N)] = %12.2f\n", eV2);
   fprintf(fpsum, "V(3,N)    = %12d\n", (int) n3);
   fprintf(fpsum, "E[V(3,N)] = %12.2f\n", eV3);
   fprintf(fpsum, "S         = %12.2f (upper bound)\n", S);
   fprintf(fpsum, "b         = %12.10f\n",  b);
   fprintf(fpsum, "c         = %12.10f\n",  c);
   fprintf(fpsum, "Z         = %12.10f\n",  Z);
   fprintf(fpsum, "gamma     = %12.10f\n\n",  Gamma);
   if (fiddleWithWeights == 1) {
     fprintf(fpsum, "w1        = %12.10f\n",  w1);
     fprintf(fpsum, "w2        = %12.10f\n",  w2);
     fprintf(fpsum, "w3        = %12.10f\n",  w3);
     fprintf(fpsum, "w4        = %12.10f\n",  w4);
     fprintf(fpsum, "w5        = %12.10f\n",  w5);
     fprintf(fpsum, "w6        = %12.10f\n\n",  w6);
   }
   fprintf(fpsum, "lexprop   = %12.4f\n", lexprop);
   fprintf(fpsum, "MSE       = %12.8f\n", mse);
   fclose(fpsum);

   if (skip > 0) {
    fprintf(stdout, "estimating EVm for m=1..%d\n", skip);
    fflush(stdout);
    fprintf(fullspc, "m EVm\n");
    fprintf(fullspc, "%10d %10.4f\n", 1, eV1);
    fprintf(fullspc, "%10d %10.4f\n", 2, eV2);
    fflush(fullspc);
    alphamin1 = eV2/eV;
    alphamin2 = eV1/eV;
    for (i = 3; i <= skip; i++) {
      x = alphaRecur( (double) i, alphamin1, alphamin2);
      fprintf(fullspc, "%10d %10.4f\n", i, eV*x);
      alphamin2 = alphamin1; alphamin1 = x;
    }
    fprintf(stdout, "V = %10.2f   S         = %12.5f\n", eV, S);
    fflush(stdout);
    exit(1);
   }

   /* PRINT SPECTRUM */

   fprintf(fpexpspect, "         m         Vm        EVm     alphaM    EalphaM\n");
   for (i = 1; i <= maxrank; i++) {
     fprintf(fpexpspect, "%10d %10d ",  (int) SPECTRUM[i][0], \
           (int) SPECTRUM[i][1]);
     fprintf(fpexpspect, "%10.1f %10.4f %10.4f\n", \
           ALPHA1N[i]*eV,  SPECTRUM[i][1]/V, ALPHA1N[i]);
     /*     VMZERO[i] = ALPHA1N[i]*eV; */
   }
   fclose(fpexpspect);

   /* PRINT SPECTRUM AT 2N */

   N = 2 * Nzero;
   eV2N = expV();
   eV2Nadj = eV2N - (2*errorV);

   eV = eV2N;
   ALPHA2N[1] = alpha1();
   ALPHA2N[2] = alpha2(ALPHA2N[1]);
   for (i = 3; i <= 2*maxrank; i++){
      ALPHA2N[i] = alphaRecur( (double) i, ALPHA2N[i-1], ALPHA2N[i-2]);
   }

   fprintf (fpexpspect2N, "         m      EVm2N\n");
   for (i = 1; i <= 2 * maxrank; i++) {
     fprintf (fpexpspect2N, "%10d %10.2f\n", i, adjustVm (ALPHA2N[i] * eV2N, N, lexprop, Nzero, ALPHA1N[i] * eV));
     /* fprintf(stderr, "%10d %10.2f\n", i, adjustVm (ALPHA2N[i] * eV2N, N, lexprop, Nzero, ALPHA1N[i] * eV)); */
   }
   fclose (fpexpspect2N);

   /* PRINT VOCABULARY SIZES */

   fprintf (fpVN, "       V       EV     EV2N  EV2Norig\n");
   if (w1==1.0){
       fprintf (fpVN, "%12.2f %12.2f %12.2f %12.2f\n", V, eVorig, \
           adjustV (eV2Nadj, N, lexprop, Nzero, Vzero), eV2N);
   }
   else{
       fprintf (fpVN, "%12.2f %12.2f %12.2f %12.2f\n", V, V, \
           adjustV (eV2Nadj, N, lexprop, Nzero, Vzero), eV2N);
   }
   fclose (fpVN);

   /* INTERPOLATION */

   if (nchunks > 0){ 

     /* CALCULATE THE TEXT CHUNKS */

     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];
    
     /* AND PRINT THE CORRESPONDING STATISTICS */

     fprintf (fpint, "       N         EV     Alpha1        EV1       EV2        EV3        EV4        EV5         GV       EVorig\n");

     for (k = 1; k <= nchunks; k++) {
        N = CHUNKS[k]+1; 
        x1 = expV ();
        eVadj1 = x1 - ((N / Nzero)*errorV);

        N = CHUNKS[k];
        x = expV ();
        eV = x;
        eVadj = x - ((N / Nzero)*errorV);

        y = alpha1 (); 
        fprintf (fpint, "%8.1f   %8.1f   %8.4f   %8.1f", N,  adjustV (eVadj, N, lexprop, Nzero, Vzero), y,\
                 adjustVm (x * y, N, lexprop, Nzero, ALPHA1N[1] * eV));
        y2 = alpha2 (y);
        y3 = alphaRecur (3.0, y2, y);
        y4 = alphaRecur (4.0, y3, y2);
        y5 = alphaRecur (5.0, y4, y3);
        fprintf (fpint, "  %8.1f   %8.1f   %8.1f   %8.1f   %8.5f   %8.1f\n", \
                        adjustVm (y2 * x, N, lexprop, Nzero, ALPHA1N[2] * eV),
                        adjustVm (y3 * x, N, lexprop, Nzero, ALPHA1N[3] * eV),
                        adjustVm (y4 * x, N, lexprop, Nzero, ALPHA1N[4] * eV),
                        adjustVm (y5 * x, N, lexprop, Nzero, ALPHA1N[5] * eV),
                        eVadj1 - eVadj, x);
     }
     fclose (fpint);
   }

   /* EXTRAPOLATION */
   
   if (E == NULL_F) {  /* extrapolate to 2N */
     fprintf(fpext, "         N           EV          EV1        EV2        EV3        EV4        EV5      EVorig\n");

     for (k = 1; k <= nchunks; k++) {
        N = Nzero + CHUNKS[k];
        x = expV ();
        eVadj = x - ((N / Nzero) * errorV);
        eV = x;
        y = alpha1 ();
        fprintf (fpext, "%10.1f   %10.1f   %10.1f", \
                 N, adjustV (eVadj, N, lexprop, Nzero, Vzero), \
                 adjustVm (y * x, N, lexprop, Nzero, ALPHA1N[1] * eV));
        y2 = alpha2 (y);
        y3 = alphaRecur (3.0, y2, y);
        y4 = alphaRecur (4.0, y3, y2);
        y5 = alphaRecur (5.0, y4, y3);
        fprintf (fpext, "  %8.1f   %8.1f   %8.1f   %8.1f   %8.1f\n", \
                        adjustVm (y2 * x, N, lexprop, Nzero, ALPHA1N[2] * eV),
                        adjustVm (y3 * x, N, lexprop, Nzero, ALPHA1N[3] * eV), 
                        adjustVm (y4 * x, N, lexprop, Nzero, ALPHA1N[4] * eV), 
                        adjustVm (y5 * x, N, lexprop, Nzero, ALPHA1N[5] * eV), 
                        adjustV (x, N, lexprop, Nzero, Vzero));
     }
   }
   else{ 
     
     /* FIND NEW CHUNKSIZES */

     chunksize = floor((E-Nzero)/(enchunks*1.0));
     remainDer = (E-Nzero) - ((enchunks*1.0) * chunksize);
     for (k = 1; k <= enchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++)  CHUNKS[k]++;
     for (k = 2; k <= enchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     /* PRINT THE GROWTH CURVE */

     fprintf(fpext, \
	 "         N           EV          EV1        EV2        EV3        EV4        EV5      EVorig\n");
     for (k = 1; k <= enchunks; k++) {
        N = Nzero + CHUNKS[k];
        x = expV ();
        eVadj = x - ((N / Nzero) * errorV);
        eV = x;
        y = alpha1 ();
        fprintf (fpext, "%10.1f   %10.1f   %10.1f", \
                 N, adjustV (eVadj, N, lexprop, Nzero, Vzero), \
                 adjustVm (y * x, N, lexprop, Nzero, ALPHA1N[1] * eV));
        y2 = alpha2 (y);
        y3 = alphaRecur (3.0, y2, y);
        y4 = alphaRecur (4.0, y3, y2);
        y5 = alphaRecur (5.0, y4, y3);
        fprintf (fpext, "  %8.1f   %8.1f   %8.1f   %8.1f   %8.1f\n", \
                        adjustVm (y2 * x, N, lexprop, Nzero, ALPHA1N[2] * eV),
                        adjustVm (y3 * x, N, lexprop, Nzero, ALPHA1N[3] * eV), 
                        adjustVm (y4 * x, N, lexprop, Nzero, ALPHA1N[4] * eV), 
                        adjustVm (y5 * x, N, lexprop, Nzero, ALPHA1N[5] * eV), 
                        adjustV (x, N, lexprop, Nzero, Vzero));
     }


     /* AND SHOW THE SPECTRUM AT E */

     N = E;
     eV2N = expV();
     eV = eV2N;
     ALPHA2N[1] = alpha1();
     ALPHA2N[2] = alpha2(ALPHA2N[1]);
     for (i = 3; i <= maxrank; i++){
         ALPHA2N[i] = alphaRecur( (double) i, ALPHA2N[i-1], ALPHA2N[i-2]);
     }
     fprintf(fpE, "         m      EVmXN\n");
     for (i = 1; i <= maxrank; i++){
       fprintf(fpE, "%10d %10.2f\n", i, ALPHA2N[i]*eV2N);
     }
   }

   if (withSimplex == 1) {
      free_matrix (sim_mat, 1, ndimensions + 1, 1, ndimensions);
      free_vector (sim_vec, 1, ndimensions + 1);
      free_vector (sim_yy, 1, ndimensions);
   }

   return (0);
}


double getS ()
{
  return (((2.0 * Z) / b) * (bessel2 (Gamma, b) / bessel2 (Gamma + 1.0, b)));
}


double expV ()
{
  S = getS ();
  return (S * (1.0 - (bessel2 (Gamma, b * sqrt (1.0 + (N / Z))) /
                      (exp ((Gamma / 2.0) * log (1.0 + (N / Z))) * bessel2 (Gamma, b)))));
}


double alpha1 ()
{
 return (((N / exp ((Gamma + 1.0) * log (sqrt (1.0 + (N / Z))))) *
          (bessel2 (Gamma + 1.0, b * sqrt (1.0 + (N / Z))) / bessel2 (Gamma + 1.0, b)) /
          eV));
}


double alpha2 (a1)   /* a1 = alpha1() */
double	a1;

{
  return (a1 * 0.5 * (1.0 / ((Z / N) + 1.0)) *
          (((b * sqrt (1 + (N / Z)) * bessel2 (Gamma, b * sqrt (1 + (N / Z)))) /
            (2.0  * bessel2 (Gamma + 1.0, b * sqrt (1.0 + (N / Z))))) +
           Gamma + 1));
}


double alphaRecur(m, amin1, amin2)
double	m, amin1, amin2;

{
  double x;

  x = (b * N) / (2 * Z * sqrt (1 + (N / Z)));
  return ((((Gamma + m - 1.0) / m) * (1.0 / ((Z / N) + 1.0)) * amin1) +
          ((x * x) / (m * (m - 1.0)) * amin2));
}


void help ()
{
  fprintf (stderr, "ad2Sgam text.spc\n");
  fprintf (stderr, "OPTIONS:\n");
  fprintf (stderr, "     -h: display help\n");
  fprintf (stderr, "     -m: number of ranks in fit (default: 15)\n");
  fprintf (stderr, "     -k: number of chunks for interpolation (default: 20)\n");
  fprintf (stderr, "     -K: number of chunks for extrapolation (default: 20)\n");
  fprintf (stderr, "     -E: extrapolation sample size (default: 2N)\n");
  fprintf (stderr, "     -H: input files lack header (default: with header)\n");
  fprintf (stderr, "     -L: do not carry out partition-based adjustment\n");
  fprintf (stderr, "     -v: partition-based adjustment by means of hapaxes\n");
  fprintf (stderr, "     -f: fiddle with the weights\n");
  fprintf (stderr, "INPUT:\n");
  fprintf (stderr, "     text.spc:  m Vm\n");
  fprintf (stderr, "OUTPUT:\n");
  fprintf (stderr, "     text_bG.spc:  expected spectrum\n");
  fprintf (stderr, "     text_bG.sp2:  expected spectrum at 2N\n");
  fprintf (stderr, "     text_bG.ev2:  E[V(N)] and E[V(2N)]\n");
  fprintf (stderr, "     text_bG.sum:  summary, fitted parameters\n");
  fprintf (stderr, "     text_bG.int:  interpolated E[V(N)] and spectrum\n");
  fprintf (stderr, "     text_bG.ext:  extrapolated E[V(N)] and spectrum\n");
  exit (1);
}


double IF (nu, z)
double	nu, z;

{
  double teller, noemer;
  double som, term, delta, lastterm;
  double n;
  double OPSLAG[1000];
  int i, j;

  term = 1.0;
  delta = 1.0;
  n = NULL_F;
  som = NULL_F;
  i = 0;
  lastterm = NULL_F;

  while (delta > EPSILON) {
    teller = exp( (nu + (2.0*n)) * log( z/2.0 ) );
    if (n > NULL_F) {
        noemer = exp (gammln(n+1.0))* exp(gammln(nu+n+1.0));
    }
    else{
        delta = 1.0;
        noemer = exp (gammln(nu+n+1.0));
    }
    term = teller/noemer;
    if (i > 999) {
        fprintf(stderr, "ad2Sgam: no convergence within 1000 steps for Bessel function\n");
        fflush(stdout);
        fclose(stdout);
        exit(1);
    }
    OPSLAG[i] = term;
    if (i > 0) {
        delta = term - lastterm;
        if (delta < NULL_F) {
            delta *= -1.0;
        }
    }

     n += 1.0;
    i++;
    lastterm = term;
 }
 for (j = i-1; j >= 0; j--) {
    som += OPSLAG[j];
 }
 return(som);
}


double bessel2 (nu, z)
double nu, z;

{
 double x, y, q;

 x = PI_2;
 y = sin (nu * PI);
 q = IF((-1.0*nu),z) - IF(nu,z);
 return((x*q)/y);
}


double sim_functie (x)
double *x;

{
   Z = x[1]; b = x[2]; Gamma = x[3];
   if (Z <= 0) Z=0.1;
   if (b <= 0) b = 0.01;
   if (Gamma <= -1) Gamma = -0.99;
   if (Gamma >= 0) Gamma = -0.01;
   triplet ();
   return((w1*fabs(V-eV))+(w2*fabs(n1-eV1))+(w3*fabs(n2-eV2))+(w4*fabs(n3-eV3))
         +(w5*fabs(n4-eV4))+(w6*fabs(n5-eV5)));
}


void triplet ()
{
   eV = expV ();
   ALPHA1N[1] = alpha1 ();
   ALPHA1N[2] = alpha2 (ALPHA1N[1]);
   ALPHA1N[3] = alphaRecur (3.0, ALPHA1N[2], ALPHA1N[1]);
   ALPHA1N[4] = alphaRecur (4.0, ALPHA1N[3], ALPHA1N[2]);
   ALPHA1N[5] = alphaRecur (5.0, ALPHA1N[4], ALPHA1N[3]);
   eV = expV ();
   eV1 = ALPHA1N[1]*eV;
   eV2 = ALPHA1N[2]*eV;
   eV3 = ALPHA1N[3]*eV;
   eV4 = ALPHA1N[4]*eV;
   eV5 = ALPHA1N[5]*eV;
}


double getlexprop ()
{
     double expvoc, adjvoc, optp, p, t, diff, expV1N, adjexpV1N;
     int i, k;
     /* this version works by minimizing for V(1,N) instead of for V(N) */
     /* when w1!=1.0 */

     /* determine chunksizes */

     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];


     N = Nzero;
     V1zero = alpha1 () * expV ();

     /* use vocabulary growth curve for fit */


     mse = MAXX;
     optp = NULL_F;
     for (p = 0.01; p <= 1.0; p += 0.01) {
       x = NULL_F;
       for (i = 1; i <= nchunks; i++) {
          N = CHUNKS[i]; t = N/CHUNKS[nchunks];
          if (lexpropviaV==1.0){
             expvoc = expV();  /* with w1==1 note that Vzero == EV: */
             adjvoc = (p * t * Vzero) + (1.0-p)*expvoc;
             diff = (adjvoc-OBSERVED[i][0]);
             x += (diff*diff);
          }
          else{
             expvoc = expV();
             expV1N = alpha1()*expvoc;
             adjexpV1N = (p * t * V1zero) + (1.0-p)*expV1N;
             diff = (adjexpV1N-OBSERVED[i][1]);
             x += (diff*diff);
             /*
             printf("V1 eV1 aeV1 diff V1zero   p; %f %f %f %f %f  %f\n", 
              OBSERVED[i][1], expV1N, adjexpV1N, diff, V1zero, p);
             */
          }
       }
       x = x / nchunks;
       if (x < mse) {
         mse = x;
         optp = p;
       }
     }
     return (optp);
}
