/*
 *	File: lnreSich.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *
 *      History:
 *
 *      - feb 1997, version 1.0
 *
 *      Description:
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>        
#include <string.h>          
#include <errno.h>
#include <math.h>          
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

/* functions for numerical procedures */

extern double	alpha1 ();
extern double	alpha2 ();
extern double	alphaRecur();
extern double	expV ();
extern double	getS ();

/* argument reading, file manipulation, and help function */

extern int	leesgetal ();
extern void	change_extension ();
extern void	getGaussParam ();
extern void	help ();


/* GLOBAL VARIABLES */

double   N, V, n1, n2,                  /* number of tokens, types, hapaxes, disleg */
         E,                             /* extrapolation sample size */
         SPECTRUM[MAXM3][2],            /* frequency spectrum m Vm */
         alpha, Gamma, theta, b, c, Z,  /* parameters of the model */
         ARS[MAXM3],                    /* array for relative spectrum V(m,N)/V(N) */
         ARS2N[MAXM3],                  /* array for relative spectrum at 2N */
         Nzero,                         /* original sample size N0 */
         eV, eV2N, S,                   /* E[V(N)], E[V(2N)], S */
         x, x1, y, y2, y3, y4, y5,
         CHUNKS[MAXCHUNKS3],            /* the chunk sizes */
         chunksize, remainDer;          /* chunk variables */

FILE     *fpspectrum,                   /* fpspectrum: datafile: m, Vm */
         *fpexpspect,                   /* expected spectrum */
         *fpexpspect2N,                 /* spectrum at 2N */
         *fpVN,                         /* file with E[V(N)] and E[V(2N)] */
         *fpsum,                        /* file with summary of model */
         *fpint,                        /* interpolation results */
         *fpext,                        /* extrapolation results */
         *fpE;                          /* spectrum at sample size E */

int      nranks,                        /* number of different ranks in spectrum */
         maxrank,                       /* largest rank for fit, default 15 */
         i,                             /* counter */
         header,                        /* boolean for presence of header */
         k,                             /* variable for chunks */
         nchunks,                       /* number of chunks for interpolation */
         enchunks,                      /* number of chunks for extrapolation */
         token, type,                   /* auxiliary variables for scanf */
         aantal;                        /* for command line options */

char     woord[MAXWORDLENGTH],          /* variable for skipping header in fscanf */
         new_name[MAXWORDLENGTH],       /* variables for extension handling */
         base_name[MAXWORDLENGTH],
         *fs;                            /* variable for scanning options */


/* MAIN () */

int main (argc, argv)
int     argc;
char    *argv[];

{ 
   /* DEFAULTS */

   maxrank = DEF_MAXRANK;
   nchunks = DEF_CHUNKS;
   enchunks = DEF_CHUNKS;
   header = 1;
   E = NULL_F;
   

   /* COMMAND LINE OPTIONS */

   while ((--argc > 0) && ((*++argv)[0] == '-')) {
        for (fs = argv[0] + 1; *fs != '\0'; fs++) {
            switch (*fs) {
            case 'h':
                help();
                break;
            case 'E':
                i =  leesgetal (fs, &aantal);
                E = (double) i;
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'k':
                nchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'K':
                enchunks = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'm':
                maxrank = leesgetal (fs, &aantal);
                for (; aantal > 0; aantal--){
                   fs++;
                }
                break;
            case 'H':      /* input files without headers! */
                header = 0;
                break;
            default:
                fprintf(stderr, "lnreSich: illegal option %c\n", *fs);
                exit(1);
                break;
            }
        }
   } /* of while */

   /* FILE HANDLING */

   if (argc == 0) {
     help ();
   }

   /* load input spectrum, should have .spect extension */

   if ((fpspectrum = fopen(*argv, "r")) == NULL) {
       fprintf(stderr, "lnreSich: can't open %s\n", *argv);
       exit(1);
   }

   /*
         fpexpspect     text.S.espect
         fpexpspect2N   text.S.espect2
         fpVN           text.S.ev2
   */

   /* file name handling output files */
 
   strncpy(base_name, *argv, strlen(*argv) - 4);

   change_extension (base_name, new_name, "_S.spc");
   if ((fpexpspect = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_S.sp2");
   if ((fpexpspect2N = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_S.ev2");
   if ((fpVN = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_S.sum");
   if ((fpsum = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_S.int");
   if ((fpint = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
      exit(1);
   }
   change_extension (base_name, new_name, "_S.ext");
   if ((fpext = fopen(new_name, "w")) == NULL){
      fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
      exit(1);
   }
   if (E > NULL_F){
     change_extension (base_name, new_name, "_S.sp3");
     if ((fpE = fopen(new_name, "w")) == NULL){
        fprintf(stderr, "lnreSich: can't open output file %s\n", new_name);
        exit(1);
     }
   }

   /* LOAD SPECTRUM FILE */

   nranks = 0; n1 = 0; n2 = 0;
   if (header){
      fscanf(fpspectrum, "%s ", woord);  /* m */
      fscanf(fpspectrum, "%s ", woord);  /* Vm */
   }
   while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
        nranks++;
        SPECTRUM[nranks][0] = (double) token;
        SPECTRUM[nranks][1] = (double) type;
        if (token == 1) n1 = type;
        if (token == 2) n2 = type;
        N+= (double) token * (double) type;
        V+= (double) type;
   }

   /* DETERMINE THE PARAMETERS OF THE MODEL */

   getGaussParam (N, V, n1, &alpha, &theta, &b, &c); 

   if ((c == 0) && (b == 0)){
      fprintf (stdout, "lnreSich: no solution after getGaussParam()\n");
      Z = 0;
   }
   else{
      Z = 1.0 / c;
   }

   /* CONSTRUCT RELATIVE FREQUENCY SPECTRUM FOR THE FIRST maxrank RANKS */
 
   ARS[1] = alpha1 ();
   ARS[2] = alpha2 (ARS[1]);
   for (i = 3; i <= maxrank; i++){
      ARS[i] = alphaRecur ((double) i, ARS[i-1], ARS[i-2]);
   }

   /* AND CALCULATE E[V(N)] and S */

   eV = expV ();
   S = getS ();
   Nzero = N;    /* take present sample size as Nzero */

   /* PRINT SUMMARY */

   fprintf(fpsum, "Inverse Gauss-Poisson model for %s\n", *argv);
   fprintf(fpsum, "N         = %12d\n", (int) N);
   fprintf(fpsum, "V(N)      = %12d\n", (int) V);
   fprintf(fpsum, "E[V(N)]   = %12.4f\n", eV);
   fprintf(fpsum, "V(1,N)    = %12d\n", (int) n1);
   fprintf(fpsum, "E[V(1,N)] = %12.4f\n", eV*ARS[1]);
   fprintf(fpsum, "S         = %12.4f\n\n", S);
   fprintf(fpsum, "b         = %12.10f\n",  b);
   fprintf(fpsum, "c         = %12.10f\n",  c);
   fprintf(fpsum, "Z         = %12.5f\n",  Z);
   fprintf(fpsum, "gamma     = %12.8f\n",  -0.5);
   fclose(fpsum);

   /* PRINT SPECTRUM */

   fprintf(fpexpspect, "         m         Vm        EVm     alphaM    EalphaM\n");
   for (i = 1; i <= maxrank; i++) {
     fprintf(fpexpspect, "%10d %10d ",  (int) SPECTRUM[i][0], 
           (int) SPECTRUM[i][1]);
     fprintf(fpexpspect, "%15.4f %15.4f %15.4f\n", 
           ARS[i]*eV,  SPECTRUM[i][1]/V, ARS[i]);
   }
   fclose(fpexpspect);

   /* PRINT SPECTRUM AT 2N */

   N = 2 * Nzero;
   eV2N = expV();
   ARS2N[1] = alpha1();
   ARS2N[2] = alpha2(ARS2N[1]);
   for (i = 3; i <= 2*maxrank; i++){
      ARS2N[i] = alphaRecur( (double) i, ARS2N[i-1], ARS2N[i-2]);
   }

   fprintf(fpexpspect2N, "         m      EVm2N\n");
   for (i = 1; i <= 2 * maxrank; i++){
     fprintf(fpexpspect2N, "%10d %15.2f\n", i, ARS2N[i]*eV2N);
   }
   fclose(fpexpspect2N);

   /* PRINT VOCABULARY SIZES */

   fprintf(fpVN, "       V       EV     EV2N\n");
   fprintf(fpVN, "%15.2f %15.2f %15.2f\n", V, eV, eV2N);
   fclose(fpVN);

   /* INTERPOLATION */

   if (nchunks > 0){ 

     /* CALCULATE THE TEXT CHUNKS */

     chunksize = floor(Nzero/(nchunks*1.0));
     remainDer = Nzero - ((nchunks*1.0) * chunksize);
     for (k = 1; k <= nchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++) CHUNKS[k]++;
     for (k = 2; k <= nchunks; k++)   CHUNKS[k] += CHUNKS[k-1];
    
     /* AND PRINT THE CORRESPONDING STATISTICS */

     fprintf(fpint, "       N       EV   Alpha1      EV1     EV2      EV3      EV4      EV5       GV\n");

     for (k = 1; k <= nchunks; k++){
        N = CHUNKS[k]+1; x1 = expV();
        N = CHUNKS[k];
        x = expV();
        y = alpha1();
        fprintf(fpint, "%15.2f %15.2f %15.4f %15.2f", N,  x, y, x*y);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpint, "%15.2f %15.2f %15.2f %15.2f %15.4f\n", 
                y2*x, y3*x, y4*x, y5*x, x1-x);
     }
     fclose(fpint);
   }

   /* EXTRAPOLATION */
   
   if (E == NULL_F) {  /* extrapolate to 2N */
     fprintf(fpext, "         N         EV        EV1      EV2      EV3      EV4      EV5\n");

     for (k = 1; k <= nchunks; k++){
        N = Nzero + CHUNKS[k];
        x = expV();
        y = alpha1();
        fprintf(fpext, "%15.2f %15.2f %15.4f ", N,  x, y*x);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpext, "%15.2f %15.2f %15.2f %15.2f\n", y2*x, y3*x, y4*x, y5*x);
     }
   }
   else { 
     
     /* FIND NEW CHUNKSIZES */

     chunksize = floor((E-Nzero)/(enchunks*1.0));
     remainDer = (E-Nzero) - ((enchunks*1.0) * chunksize);
     for (k = 1; k <= enchunks; k++)   CHUNKS[k] = chunksize;
     for (k = 1; k <= remainDer; k++)  CHUNKS[k]++;
     for (k = 2; k <= enchunks; k++)   CHUNKS[k] += CHUNKS[k-1];

     /* PRINT THE GROWTH CURVE */
 
     fprintf(fpext, "         N         EV        EV1      EV2      EV3      EV4      EV5\n");

     for (k = 1; k <= enchunks; k++){
        N = Nzero + CHUNKS[k];
        x = expV();
        y = alpha1();
        fprintf(fpext, "%15.2f %15.2f %15.4f ", N,  x, y*x);
        y2 = alpha2(y);
        y3 = alphaRecur((double) 3, y2, y);
        y4 = alphaRecur((double) 4, y3, y2);
        y5 = alphaRecur((double) 5, y4, y3);
        fprintf(fpext, "%15.2f %15.2f %15.2f %15.2f\n", y2*x, y3*x, y4*x, y5*x);
     }


     /* AND SHOW THE SPECTRUM AT E */

     N = E;
     eV2N = expV();
     ARS2N[1] = alpha1();
     ARS2N[2] = alpha2(ARS2N[1]);
     for (i = 3; i <= maxrank; i++){
         ARS2N[i] = alphaRecur( (double) i, ARS2N[i-1], ARS2N[i-2]);
     }
     fprintf(fpE, "         m      EVmXN\n");
     for (i = 1; i <= maxrank; i++){
       fprintf(fpE, "%10d %15.2f\n", i, ARS2N[i]*eV2N);
     }
   }

   return (0);
}


double expV()
{
 return(
   ((2*Z)/b) *
      ( 1.0 -
         exp(
           b*(1.0 - sqrt(1.0+ (N/Z)))
         )
      )
 );
}


double alpha1()
{
 return(
   (b*N)/( (2.0*Z)*sqrt(1.0 + (N/Z))) *
     (1.0/
        (
           exp(
             b * (sqrt(1+ (N/Z)) - 1.0)
           )
           - 1.0
        )
     )
 ); 
}


double alpha2(a1)   /* a1 = alpha1() */
double a1;
{
 return(
   (a1 / ( 4.0 * ((Z/N) + 1.0)) ) *
   ( (b* sqrt(1.0+(N/Z))) + 1.0)
 );
}


double alphaRecur(m, amin1, amin2)
double m, amin1, amin2;
{
  double x;
  x = (b*N)/(2*Z*sqrt(1+(N/Z)));
  return(
    (((m-1.0-0.5)/m)*(1.0/( (Z/N)+1.0)) * amin1) +
    ((x * x)/(m*(m-1.0))*amin2)
  );
}


double getS()
{
 return(2.0 * Z / b);
}


void help ()
{
  fprintf (stderr,"lnreSich text.spc\n");
  fprintf (stderr,"OPTIONS:\n");
  fprintf (stderr,"     -h: display help\n");
  fprintf (stderr,"     -m: number of ranks in fit (default: 15)\n");
  fprintf (stderr,"     -k: number of chunks for interpolation (default: 20)\n");
  fprintf (stderr,"     -K: number of chunks for extrapolation (default: 20)\n");
  fprintf (stderr,"     -E: extrapolation sample size (default: 2N)\n");
  fprintf (stderr,"     -H: input files lack header (default: with header)\n");
  fprintf (stderr,"INPUT:\n");
  fprintf (stderr,"     text.spc:  m Vm\n");
  fprintf (stderr,"OUTPUT:\n");
  fprintf (stderr,"     text_S.spc: expected spectrum\n");
  fprintf (stderr,"     text_S.sp2: expected spectrum at 2N\n");
  fprintf (stderr,"     text_S.ev2: E[V(N)] and E[V(2N)]\n");
  fprintf (stderr,"     text_S.sum: summary, fitted parameters\n");
  exit (1);
}
