/*
 *	File: labhub.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *
 *      History:
 *
 *      - feb 1997, version 1.0
 *
 *      Description:
 *
 *      
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <math.h>
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

/* functions for numerical procedures */

extern double	gammln ();
extern double	nchoosek ();
extern int	find ();
extern double	findoptimalp ();
extern double	getmse ();
extern double	expV ();
extern double	expVm ();

/* argument reading, file manipulation, and help function */

extern int	leesgetal ();
extern double	leesdouble ();
extern void	change_extension ();
extern void	help ();


/* GLOBAL VARIABLES */

double    N,                         /* tokens */
          V,                         /* types */
          m[MAXM1+2],                /* ranks m */
          Vm[MAXM1+2],               /* V(m,N) */
          CHUNKS[MAXCHUNKS3],        /* the chunk sizes */
          chunksize, remainDer,      /* chunk variables */
          x,
          mse,                       /* mean squared error */
          PROFILE[MAXCHUNKS3][6],    /* V, Vm, m = 1..5 */
          specfac;                   /* specialization factors */

int       token, type,               /* dummy variables for scanf */
          lines,                     /* index for number of lines in spectrum file */
          verbose,                   /* Boolean for extra print statements */
          mmax,                      /* number of ranks m = 1, 2, ..., mmax required */
          indx,                      /* index for the arrays m[] and Vm[] */
          i, j, k,                   /* index variable */
          intmax,                    /* number of intervals to calculate */
          nchunks,                   /* number of chunks */
          header,                    /* Boolean for presence of header */
          r,                         /* index for ranks m */
          precisie,                  /* number of intervals for finding p */
          aantal;

char      new_name[MAXWORDLENGTH],   /* variables for extension handling    */
          base_name[MAXWORDLENGTH],
          woord[MAXWORDLENGTH],
          *s;


FILE      *fpspectrum,               /* fpspectrum: the input file */
          *fpbinom,
          *fpobs;


/* MAIN () */

int main (argc, argv)
int	argc;
char	*argv[];

{
    /* DEFAULTS */

    verbose = 0;               /* default: no extra print statements */
    mmax = 5;                  /* show m=1..3 */
    nchunks = DEF_CHUNKS;      /* use DEF_CHUNKS chunks */
    header = 1;                /* spectrum file has header */
    precisie = 100;
    specfac = NULL_F;
    

    while ((--argc > 0) && ((*++argv)[0] == '-')) {
       for ( s = argv[0]+1; *s != '\0'; s++) {
        switch (*s) {
           case 'h':
            help();
            break;
           case 'H':      /* input files without headers! */
            header = 0;
            break;
           case 'k':
            nchunks = leesgetal (s, &aantal);
            for (; aantal > 0; aantal--){
                  s++;
            }
            break;
           case 'P':
            specfac = leesdouble (s, &aantal);
            for (; aantal > 0; aantal--){
                  s++;
            }
            break;
           case 'p':
            precisie = leesgetal (s, &aantal);
            for (; aantal > 0; aantal--){
                  s++;
            }
            break;
           case 'm':
            mmax = leesgetal (s, &aantal);
            for (; aantal > 0; aantal--){
                  s++;
            }
            break;
           case 'v':
            verbose = 1;
            break;
           default:
            fprintf(stderr, "labhub: illegal option %c\n", *s);
            argc = 0;
            exit(1);
            break;
        }
       }
    }

    /* FILE HANDLING */
 
    if (argc == 0) help();

    /* open input spectrum, should have .spect extension */

    if ((fpspectrum = fopen(*argv, "r")) == NULL) {
        fprintf(stderr, "labhub: can't open spectrum file %s\n", *argv);
        exit(1);
    }
  
    /* file name handling output files */

    strncpy(base_name, *argv, strlen(*argv) - 4);

    change_extension (base_name, new_name, ".obs");
    if ((fpobs = fopen (new_name, "r")) == NULL) {
        fprintf(stderr, "labhub: can't open spectrum file %s\n", *argv);
        exit(1);
    }

    change_extension (base_name, new_name, ".lhu");
    if ((fpbinom = fopen (new_name, "w")) == NULL){
       fprintf(stderr, "labhub: can't open output file %s\n", new_name);
       exit(1);
    }

    /* load spectrum file */

    if (header){
      fscanf(fpspectrum, "%s ", woord);  /* m */
      fscanf(fpspectrum, "%s ", woord);  /* Vm */
    }

    lines = 0;
    while (fscanf(fpspectrum, "%d %d", &token, &type) != EOF)  {
        lines++;
        m[lines] = (double) token;           /* m */
        Vm[lines] = (double) type;           /* V(m,N) */
        N = N + (double) (type * token);     /* N */
        V = V + (double) type;               /* V */
    }

    fprintf(stderr, "labhub: loaded spectrum, N = %d, V = %d\n", 
            (int) N, (int) V);
    if (lines > MAXM1) {
        fprintf (stderr, 
          "labhub: number of ranks in input exceeds array bounds\n");
        exit(1);
    }

    /* load developmental profile */


    if (header){
       for (i = 1; i <= NMEASURES+1; i++){
          if (fscanf(fpobs, "%s ", woord) == EOF){
            fprintf(stderr, "incomplete header for %s.obs\n", base_name);
          }
       }
    }

    /* load the table with observed developmental profiles */

    for (i = 1; i <= nchunks; i++){
       if (fscanf(fpobs, "%d ", &type) != EOF){
           CHUNKS[i] = (double) type;
       }
       else{
           fprintf(stderr, 
             "%s.obs is incompatible with required number of chunks\n", 
             base_name);
           exit(1);
       }
       for (j = 2; j <= NMEASURES+1; j++){
           if (fscanf(fpobs, "%lf ", &x) != EOF){
               switch (j){
                 case 4:
                   PROFILE[i][0] = x;  /* V */
                   break;
                 case 5:
                   PROFILE[i][1] = x;  /* V1 */
                   break;
                 case 6:
                   PROFILE[i][2] = x;  /* V2 */
                   break;
                 case 7:
                   PROFILE[i][2] = x;  /* V3 */
                   break;
                 case 8:
                   PROFILE[i][3] = x;  /* V4 */
                   break;
                 case 9:
                   PROFILE[i][5] = x;  /* V5 */
                   break;
                 default:
                   break;
              }
           }
           else{
               fprintf(stderr, 
                   "%s.obs is incompatible with required number of chunks\n", 
                   base_name);
               exit(1);
           }
       }
    } /* end of for loop */

    fprintf(stderr, "loaded observed profile\n");

    if (specfac==0) specfac = findoptimalp();
    
    printf("p %10.3f    MSE %10.3f\n", specfac, mse);

    /* print profile */

    fprintf(fpbinom, "         N         EV  ");
    for (k = 1; k <= mmax; k++) {
        fprintf(fpbinom, "      EV%d ", k);
    }
    fprintf(fpbinom, "\n");

    for (k = 1; k <= nchunks; k++) {
        fprintf(stderr, "[%d]", k);
        fprintf(fpbinom, "%10.2f %10.2f  ", CHUNKS[k], expV(CHUNKS[k],specfac));
        for (r = 1; r <= mmax; r++) {
            indx = find(r);
            fprintf(fpbinom, " %8.2f ", expVm(CHUNKS[k], indx, (double) r, specfac)); 
        }
        fprintf(fpbinom, "\n");
    }
    fprintf(stderr, "\n");

    return (0);
}


double findoptimalp ()
{
  double	p, x;
  int		i;

  mse = MAXX;
  p = NULL_F;

  for (i = 1; i <= precisie; i++) {
    x = getmse ((1.0 * i) / (1.0 * precisie));
 
    if (x < mse) {
      p = (1.0 * i) / (1.0 * precisie);
      mse = x;
    }
    fprintf (stderr, "%d\r", i);
  }

  fprintf (stderr, "\n");

  return (p);
}


double getmse (pr)
double	pr;

{
  double	sum, x;
  int		i;

  sum = NULL_F;
  for (i = 1; i <= nchunks; i++) {
    x = PROFILE[i][0] - expV (CHUNKS[i], pr);
    sum += (x * x);
  }

  return (sum / nchunks);
}


double expVm (M, ind, rank, prob)
double	M, rank, prob;
int	ind;

{
  int j;
  double sum, p, x;
  
  x = Vm[find((int) rank)];

  if (M != N){
    sum = NULL_F;
    p = M/N;
    for (j = ind; (j <= lines); j++)
    sum += Vm[j]*nchoosek(m[j],rank)*exp(rank*log(p))*exp((m[j]-rank)*log(1-p));
    return((prob*p*x)+((1.0-prob)*sum));
  }
  else{
    return(x);
  }
}


double expV(M,prob)
double M,prob;
{
  int r;
  double sum, p;

  if (M != N){
    p = M/N;
    sum = NULL_F;
    for (r = 1; r <= lines; r++) sum += Vm[r] * exp(m[r] * log(1.0 - p));
    return((prob*V*p) + ((1.0-prob) * (V - sum)) );
  }
  else return(V);
}


int find(k)
int k;
{
    int r;
    for (r = 1; m[r] <= k; r++) ;
    if (m[r-1] == k) return(r-1);
    else return(MAXM1+1);  /* for which Vm = 0 */
}


void help ()
{
  fprintf (stderr, "labhub text.spc\n");
  fprintf (stderr, "OPTIONS:\n");
  fprintf (stderr, "     -h: display help\n");
  fprintf (stderr, "     -m: number of interpolated ranks (default: 3)\n");
  fprintf (stderr, "     -k: number of chunks for interpolation (default: 20)\n");
  fprintf (stderr, "     -H: input files lack header (default: with header)\n");
  fprintf (stderr, "     -p: estimation precision (default: 100, i.e. .01)\n");
  fprintf (stderr, "     -P: fix specialization value at following value\n");
  fprintf (stderr, "INPUT:\n");
  fprintf (stderr, "     text.spc: (m Vm)\n");
  fprintf (stderr, "     Presupposes that text.obs also exists\n");
  fprintf (stderr, "OUTPUT:\n");
  fprintf (stderr, "     text.lhu: (N EV EV1 EV2 ... EVm)\n");
  exit (1);
}
