/*
 *	File: lex_statFuncs.c
 *
 *      (C) IWTS
 *          KU Nijmegen
 *          The Netherlands
 *
 *      Author: R. Harald Baayen
 *
 *      History:
 *
 *      - feb 1997, version 1.0
 *
 *      Description:
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "lex_cons.h"


/* EXTERN FUNCTIONS */

extern void	nrerror ();
extern double	*vector ();
extern void	free_vector ();


/* FUNCTION DEFINITIONS */

double getlogMean (numranks, VM, MLIJST, curV)
int	numranks, VM[], MLIJST[], curV;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++){
    som += VM[i] * log (MLIJST[i]);
  }

  return (som / (double) curV);
}


double getlogMean2 (numranks, VM, MLIJST, curV)
int	numranks, VM[], MLIJST[], curV;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++){
    som += VM[MLIJST[i]] * log (MLIJST[i]);
  }

  return (som / (double) curV);
}


double getlogStdDev (mean, numranks, VM, MLIJST, curV)
double	mean;
int	numranks, VM[], MLIJST[], curV;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++){
    som += VM[i] * log (MLIJST[i]) * log (MLIJST[i]);
  }

   return (sqrt ((som - (curV * mean * mean)) / ((double) curV - EINS_F)));
}


double getlogStdDev2 (mean, numranks, VM, MLIJST, curV)
double	mean;
int	numranks, VM[], MLIJST[], curV;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++){
    som += VM[MLIJST[i]] * log (MLIJST[i]) * log (MLIJST[i]);
  }

   return (sqrt ((som - (curV * mean * mean)) / ((double) curV - EINS_F)));
}


double getE (numranks, VM, MLIJST, curN)
int	numranks, VM[], MLIJST[], curN;

{
  int		i;
  double		som;

 /* E <- -sum((spect$m/N)*log(spect$m/N)*spect$Vm) */

  som = NULL_F;
  for (i = 1; i <= numranks; i++) {
    som += VM[i] * log (MLIJST[i] / (double) curN) * (MLIJST[i] / (double) curN);
  }

  return (-som);
}


double getE2 (numranks, VM, MLIJST, curN)
int	numranks, VM[], MLIJST[], curN;

{
  int		i;
  double		som;

 /* E <- -sum((spect$m/N)*log(spect$m/N)*spect$Vm) */

  som = NULL_F;
  for (i = 1; i <= numranks; i++) {
    som += VM[MLIJST[i]] * log (MLIJST[i] / (double) curN) * (MLIJST[i] / (double) curN);
  }

  return (-som);
}


double getD (numranks, VM, MLIJST, curN)
int	numranks, VM[], MLIJST[], curN;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++) {
    som += MLIJST[i] * (MLIJST[i] - EINS_F) * VM[i];
  }

  return (som / (1.0* curN * ((1.0* curN) - EINS_F)));
}


double getD2 (numranks, VM, MLIJST, curN)
int	numranks, VM[], MLIJST[], curN;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++) {
    som += MLIJST[i] * (MLIJST[i] - EINS_F) * VM[MLIJST[i]];
  }

  return (som / (1.0 * curN * ((1.0* curN) - EINS_F)));
}


double getK (numranks, VM, MLIJST, curN)
int	numranks, VM[], MLIJST[], curN;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++) {
    som += MLIJST[i] * MLIJST[i] * VM[i];
  }

  return (10000.0 * (som - curN) / (1.0*curN * curN));
}


double getK2 (numranks, VM, MLIJST, curN)
int	numranks, VM[], MLIJST[], curN;

{
  int		i;
  double		som;

  som = NULL_F;
  for (i = 1; i <= numranks; i++) {
    som += MLIJST[i] * MLIJST[i] * VM[MLIJST[i]];
  }

  return (10000.0 * (som - curN) / (1.0*curN * curN));
}


/* Sichel functions */

double fz (z, curN, curV, n1)
double	z, curN, curV, n1;

{
  if (z <= 0) {
    fprintf (stderr, "z <= 0; %20.10f\n", z);
    exit(1);
  }

  return (EINS_F + ((curV / (2.0 * n1)) * (EINS_F + ((z * n1) / (double) curN)) * log (z)));
}


double delta (x, curN, curV, n1)
double x, curN, curV, n1;

{
  return (x - fz (x, curN, curV, n1));
}

/* modified by Kyo Kageura */

double                       /* Addition by Kyo Kageura */
harmonic(int m)
{
    int i;
    double sum;

    sum = 0.0;
    for( i = 1; i <= m; i++ )
        sum += 1.0 / ((double) (i+1));
    return(sum);
}


double fZipf(z, pstarr, curN, newMethod)
double z, pstarr, curN;
int newMethod;
{
  double  xx, xy, xz;
  extern double harmonic(int);

  if (newMethod == 1) {
      xx = z / harmonic(floor(pstarr*z)); 
  } else {
      xx = z / log(pstarr*z);
  }
  xy = curN / (  curN - z );
  xz = log( curN / z );
  return(xx * xy * xz);
}


double getZ (pster, curN, curV, zeros, newMethod)
double	pster;
int	curN, curV, *zeros, newMethod;

{
  double	Z, maxerror, Vd,
			diff,
			a, b, ab, xx,
			mini;
  int		iterations;

  iterations = 0;
  maxerror = 0.5;
  Vd = (double) curV;
  mini = EINS_F / pster;

  a = mini + 1;
  b = 500.0 * curN;
  Z = a + (0.5 * (b - a));
  xx = fZipf (Z, pster, (double) curN, newMethod);
  /* diff = Vd - fZipf (Z, pster, (double) curN, newMethod); */
  diff = Vd - xx;

  while (fabs (diff) > maxerror) {
    iterations++;

    if (diff > NULL_F) {
      ab = a;
      a = Z;
      b = b + (2 * (b - ab));
      Z = a + (0.5 * (b - a));
    }
    else {
      b = a + (0.5 * (b - a));
      Z = a + (0.5 * (b - a));
    }

  xx = fZipf (Z, pster, (double) curN, newMethod);
  diff = Vd - xx;
    /* diff = Vd - fZipf (Z, pster, (double) curN, newMethod); */

    if (iterations > MAXITERATIONS){
      (*zeros)++;
      return (NULL_F);
    }
  }
  return (Z);
}


double getZ2 (pster, curN, curV, N, newMethod)
double	pster, curN, curV, N;
int newMethod;

{
  double Z, maxerror, Vd,
		 diff,
		 a, b, ab, 
		 mini;
  int	 iterations;

  iterations = 0;
  maxerror = 0.5;
  Vd = (double) curV;
  mini = EINS_F / pster;

  a = mini + 1;
  b = 500.0 * curN;
  Z = a + (0.5 * (b - a));
 
  diff = Vd - fZipf (Z, pster, curN, newMethod);
  /* diff =  Vd - fZipf (Z, pster, N, newMethod); */
  /* note that N = curN, at least in lnreZipf */

  while( (fabs (diff)) > maxerror){
      iterations++;

      if (diff > NULL_F) {   /* Z is too small */
          ab = a;
          a = Z;
          b = b + (2 * (b - ab));
          Z = a + (0.5 * (b - a));
      }
      else{             /* Z is too large */
          b = a + (0.5 * (b - a));
          Z = a + (0.5 * (b - a));
      }
      /* diff =  Vd - fZipf (Z, pster, N, newMethod); */
      diff = Vd - fZipf (Z, pster, curN, newMethod);

      if (iterations > MAXITERATIONS){
           fprintf (stdout, "getZ2 (): ERROR - no solution for Z within 10000 iterations\n");
           exit(1);
      }
  }
  return (Z);
}


/* Estimation of parameters for Gamma fixed at -0.5 */

void getGaussParam (curN, curV, n1, alpha, theta, b, c)
double	curN, curV, n1,
	*alpha, *theta, *b, *c;

{
  double	upper, lower,
		stepsize,
		x, y, z;
  int		slecht;

  upper = curN / n1;
  lower = NULL_F;
  stepsize = (curN / n1) / 1000.0;

  x = upper;
  slecht = 0;

  while (delta (x, curN, curV, n1) > EPSILON) {
    upper = x;
    x -= stepsize;

    if (x > NULL_F) {   
      if (delta (x, curN, curV, n1) < NULL_F) {
        x = upper;
        stepsize /= 1000.0;
      }
    }

    if (fz (x, curN, curV, n1) < NULL_F) {
      x = upper;
      slecht = 1;
      break;
    }
  }

  z = x;
  y = z * n1 / curN;
  *theta = (EINS_F - (y * y));
  *alpha = ((2.0 * n1) / (curV * (*theta))) * (((curN * y) / n1) - EINS_F);

  *c = *theta / (curN * (EINS_F - (*theta)));
  *b = *alpha / sqrt (EINS_F + (*c * curN));

  if (slecht) {
    *c = NULL_F;
    *b = NULL_F;
  }
}


void lognormal (numranks, PVM, VM, MLIJST, curN, smean, sstdev)
int	numranks;
double	PVM[];
int	VM[], MLIJST[], curN;
double	*smean, *sstdev;

{
  int		i;

  *smean = NULL_F;
  for (i = 1; i <= numranks; i++){
    PVM[i] = log (VM[i] * MLIJST[i] / (double) curN);
    *smean += PVM[i];
  }
  *smean = *smean / numranks;

  *sstdev = NULL_F;
  for (i = 1; i <= numranks; i++){
    *sstdev += (PVM[i] - (*smean)) * (PVM[i] - (*smean));
  }
  *sstdev = sqrt (*sstdev / (numranks - 1));
}


void lognormal2 (numranks, PVM, VM, MLIJST, curN, smean, sstdev)
int	numranks;
double	PVM[];
int	VM[], MLIJST[], curN;
double	*smean, *sstdev;

{
  int		i;

  *smean = NULL_F;
  for (i = 1; i <= numranks; i++){
    PVM[i] = log (VM[MLIJST[i]] * MLIJST[i] / (double) curN);
    *smean += PVM[i];
  }
  *smean = *smean / numranks;

  *sstdev = NULL_F;
  for (i = 1; i <= numranks; i++){
    *sstdev += (PVM[i] - (*smean)) * (PVM[i] - (*smean));
  }
  *sstdev = sqrt (*sstdev / (numranks - 1));
}


/* NUMERICAL RECIPIES IN C FUNCTIONS */

double trapzd (func, a, b, n)
double	(*func)(), a, b;
int	n;

{
  double		x,tnm,sum,del;
  static double	s;
  static int	it;
  int		j;

  if (n == 1) {
    it = 1;
    s = 0.5 * (b - a) * (FUNC2 (a) + FUNC2 (b));
    return (s);
  }
  else {
    tnm = it;
    del = (b - a) / tnm;
    x = a + 0.5 * del;

    for (sum = NULL_F, j = 1; j <= it; j++, x += del) {
      sum += FUNC2(x);
    }

    it *= 2;
    s = 0.5 * (s + (b - a) * sum / tnm);
    return s;
  }
}


void polint (xa, ya, n, x, y, dy)
double	xa[], ya[], x, *y, *dy;
int	n;

{
  int		i, m, ns;
  double		den, dif, dift, ho, hp, w,
		*c, *d;

  ns = 1;
  dif = fabs (x - xa[1]);
  c = vector (1, n);
  d = vector (1, n);

  for (i = 1; i <= n; i++) {
    if ((dift = fabs (x - xa[i])) < dif) {
      ns = i;
      dif = dift;
    }

    c[i] = ya[i];
    d[i] = ya[i];
  }

  *y = ya[ns--];

  for (m = 1; m < n; m++) {
    for (i = 1; i <= (n - m); i++) {
      ho = xa[i] - x;
      hp = xa[i+m] - x;
      w = c[i+1] - d[i];

      if ((den = ho - hp) == NULL_F) {
        nrerror("Error in polint ()");
      }

      den = w / den;
      d[i] = hp * den;
      c[i] = ho * den;
    }

    *y += (*dy = ((2 * ns) < (n-m) ? c[ns+1] : d[ns--]));
  }

  free_vector (d, 1, n);
  free_vector (c, 1, n);
}


double qromb (func, a, b)
double	(*func)(), a, b;

{
  double	ss, dss,
		s[JMAXP+1], h[JMAXP+1];
  int		j;

  h[1] = EINS_F;
  for (j = 1; j <= JMAX; j++) {
    s[j] = trapzd (func, a, b, j);

    if (j >= K) {
      polint (&h[j-K], &s[j-K], K, NULL_F, &ss, &dss);
      if (fabs (dss) < (EPS * fabs (ss))) {
        return ss;
      }
    }

    s[j+1] = s[j];
    h[j+1] = 0.25 * h[j];
  }

  nrerror ("Too many steps in qromb ()");
  /* this last step will not be reached at all, but saves us a 
   * compiler warning ... */
  return ss;
}


/* gammln() returns the value ln(Gamma(xx)) for xx > 0.
   Full accuracy is obtained for xx > 1. For 0 < xx < 1,
   the reflection formula 6.1.4. can be used first.
*/

double gammln (xx)
double	xx;

{

/* Internal arithmetic will be done in double precision,
   a nicety that you can omit if five-figure
   accuracy is good enough
*/

  double		x, tmp, ser;
  static double	cof[6] = {76.18009173, -86.50532033,
			  24.01409822, -1.231739516,
			  0.120858003e-2, -0.536382e-5};
  int		j;

  x = xx - EINS_F;
  tmp= x + 5.5;
  tmp -= (x + 0.5) * log (tmp);
  ser = EINS_F;

  for (j = 0; j <= 5; j++) {
    x += EINS_F;
    ser += cof[j]/x;
  }

  return (-tmp + log (2.50662827465*ser));
}


/* factln() returns the value ln(n!) */

double factln (n)
int	n;

{
  static double	a[101];

  if (n < 0) {
    nrerror ("Negative factorial in routine factln()");
  }

  if (n <= 1) {
    return (NULL_F);
  }

  if (n <= 100) { /* in range of the table */
    return (a[n] ? a[n] : (a[n] = gammln (n + EINS_F)));
  }
  else { /* out of range of the table */
    return (gammln (n + EINS_F));
  } 
}


double nchoosek (n, k)
double	n, k;
{
  return (exp (gammln (n + 1) - gammln (n - k + 1) - gammln (k + 1)));
}


/* bincoef () returns the binomial coefficient n, k as a double */

double bincoef (n, k)
int	n, k;
{
  return (floor (0.5 + exp (factln (n) - factln (k) - factln (n - k))));
}


/* gser () returns the incomplete gamma function P(a,x) evaluated
   by its series representation as gamser.
   Also returns ln Gamma(a) as gln.
*/

void gser(gamser, a, x, gln)
double	*gamser, a, x, *gln;

{
  int		n;
  double		sum, del, ap;

  *gln = gammln (a);

  if (x <= NULL_F) {
    if (x < NULL_F) {
      nrerror ("x less than 0 in function gser()");
    }
    *gamser = NULL_F;
    return;
  }
  else{
    ap = a;
    del = sum = EINS_F / a;

    for (n = 1; n <= ITMAX; n++) {
      ap += EINS_F;
      del *= x / ap;
      sum += del;

      if (fabs (del) < fabs (sum) * EPS2) {
        *gamser = sum * exp (-x + a * log (x) - (*gln));
        return;
      }
    }

    nrerror ("a too large, ITMAX too small in function gser()");
    return;
  }
}


/* gcf() returns the incomplete gamma function Q(a,x)
   evaluated by its continued fraction representation as gammcf.
   Also returns ln Gamma(a) as gln.
*/

void gcf (gammcf, a, x, gln)
double	*gammcf, a, x, *gln;

{
  int		n;
  double		gold, g, fac, b1, 
		b0, anf, ana, an, a1, a0;

  gold = NULL_F;
  fac = EINS_F;
  b1 = EINS_F;
  b0 = NULL_F;
  a0 = EINS_F;

  *gln = gammln (a);
  a1 = x;

  /* We are here setting up the A's and B's of equation 5.2.4 
     for evaluating the continued fraction 
  */

  for (n = 1; n <= ITMAX; n++) {
    an= (double) n;
    ana = an - a;
    a0 = (a1 + a0 * ana) * fac; /* One step of the recurrence 5.2.5 */
    b0 = (b1 + b0 * ana) * fac;
    anf = an * fac;
    a1 = x * a0 + anf * a1;     /* The next step of the recurrence 5.2.5. */
    b1 = x * b0 + anf * b1;

    if (a1) {                   /* Shall we renormalize? */
      fac = EINS_F / a1;           /* Yes. Set fac so that it happens */
      g = b1 * fac;             /* New value of answer */

      if (fabs ((g - gold) / g) < EPS2) {              /* Converged? If so, exit. */
        *gammcf = exp (-x + a * log (x) - (*gln)) * g; /* Put factors in front */
        return;
      }
      gold = g;                 /* If not, save value */
    }
  }

  nrerror ("a too large, ITMAX too small in function gcf()");
}


/* gammq() returns the incomplete gamma function Q(a,x) = 1 - P(a,x)
   From Numerical recipes in C, by W.H.Press et. al., p. 173, 521
*/

double gammq (a, x)
double	a, x;

{
  double		gamser, gammcf, gln;

  if ((x < NULL_F) || (a <= NULL_F)) {
    nrerror ("Invalid arguments in function gammq()");
  }

  if (x < (a + EINS_F)) {          /* use the series representation */
    gser (&gamser, a, x, &gln);
    return (EINS_F - gamser);      /* and take its complement */
  }
  else {                        /* use the continued fraction representation */
    gcf (&gammcf, a, x, &gln);
    return (gammcf);
  }
}


void ludcmp (a, n, indx, d)
double	**a;
int	n, *indx;
double	*d;

{ 
  int		i, imax, j, k;
  double		big, dum, sum, temp, *vv;

  vv = vector (1, n);
  *d = EINS_F;
  imax = 0;

  for (i = 1; i <= n; i++) {
    big = NULL_F;
    for (j = 1; j <= n; j++) {
      if ((temp = fabs (a[i][j])) > big) {
        big = temp;
      }
    }
    if (big == NULL_F) {
      nrerror ("Singular matrix in function ludcmp()");
    }
    vv[i] = EINS_F / big;
  }

  for (j = 1; j <= n; j++) {
    for (i = 1; i < j; i++) {
      sum = a[i][j];
      for (k = 1; k < i; k++) {
        sum -= a[i][k] * a[k][j];
      }
      a[i][j] = sum;
    }

    big = NULL_F;

    for (i = j; i <= n; i++) {
      sum = a[i][j];
      for (k = 1; k < j; k++) {
        sum -= a[i][k] * a[k][j];
      }
      a[i][j] = sum;

      if ((dum = vv[i] * fabs (sum)) >= big) {
        big = dum;
        imax = i;
      }
    }

    if (j != imax) {
      for (k = 1; k <= n; k++) {
         dum = a[imax][k];
         a[imax][k] = a[j][k];
         a[j][k] = dum;
       }
      *d = -(*d);
      vv[imax] = vv[j];
    }

    indx[j] = imax;

    if (a[j][j] == NULL_F) {
      a[j][j] = TINY;
    }

    if (j != n) {
      dum = EINS_F / (a[j][j]);
      for (i = j + 1; i <= n; i++) {
        a[i][j] *= dum;
      }
    }
  }

  free_vector (vv, 1, n);
}


void lubksb (a, n, indx, b)
double	**a;
int	n, *indx;
double	b[];

{
  int		i, ii, ip, j;
  double		sum;

  ii = 0;

  for (i = 1; i <= n; i++) {
    ip = indx[i];
    sum = b[ip];
    b[ip] = b[i];

    if (ii) {
      for (j = ii; j <= i - 1; j++) {
        sum -= a[i][j] * b[j];
      }
    }
    else if (sum) {
      ii = i;
    }

    b[i] = sum;
  }

  for (i = n; i >= 1; i--) {
    sum = b[i];
    for (j = i+1; j <= n; j++) {
      sum -= a[i][j] * b[j];
    }

    b[i] = sum / a[i][i];
  }
}


double amotry (p, y, psum, ndim, funk, ihi, nfunk, fac)
double	**p, *y, *psum;
int	ndim;
double	(*funk)();
int	ihi, *nfunk;
double	fac;

{
  int		j;
  double		fac1, fac2, ytry, *ptry;

  ptry = vector (1, ndim);
  fac1 = (EINS_F - fac) / ndim;
  fac2 = fac1 - fac;

  for (j = 1; j <= ndim; j++) {
    ptry[j] = psum[j] * fac1 - p[ihi][j] * fac2;
  }

  ytry = (*funk)(ptry);
  ++(*nfunk);

  if (ytry < y[ihi]) {
    y[ihi] = ytry;
    for (j = 1; j <= ndim; j++) {
      psum[j] += ptry[j] - p[ihi][j];
      p[ihi][j] = ptry[j];
    }
  }
  free_vector (ptry, 1, ndim);

  return (ytry);
}


void amoeba (p, y, ndim, ftol, funk, nfunk, pstarmethod)
double	**p, y[];
int	ndim;
double	ftol, (*funk)();
int	*nfunk;
int	pstarmethod;

{
  int		i, j, ilo, ihi, inhi, mpts;
  double		ytry, ysave, sum, rtol, *psum;

  mpts = ndim + 1;
  psum = vector (1, ndim);
  *nfunk = 0;
  GET_PSUM

  for (;;) {
    ilo=1;
    ihi = y[1] > y[2] ? (inhi=2,1) : (inhi=1,2);

    for (i = 1; i <= mpts; i++) {
      if (y[i] < y[ilo]) {
        ilo = i;
      }
      if (y[i] > y[ihi]) {
         inhi = ihi;
         ihi=i;
       }
       else if (y[i] > y[inhi])
          if (i != ihi) inhi = i;
     }

     rtol = 2.0 * fabs (y[ihi] - y[ilo]) / (fabs (y[ihi]) + fabs (y[ilo]));
     if (rtol < ftol) {
       break;
     }
     if (*nfunk > NMAX) {
       nrerror ("Too many iterations in amoeba()");
     }
     else if (*nfunk > 0) {
       if (pstarmethod == 1) {
         fprintf(stdout, "%4d %7.2f %12.5f %12.5f *\n", *nfunk, y[ihi], p[ihi][1], p[ihi][2]);
         fflush (stdout);
       }
       else {
         fprintf(stdout, "%4d %7.2f %12.5f %12.5f %12.5f *\n", *nfunk, y[ihi], p[ihi][1], p[ihi][2], p[ihi][3]);
         fflush (stdout);
       }
       if ((y[ihi] < ftol) && (y[ilo] < ftol)) {
         break;
       }
     }
   
     ytry = amotry (p, y, psum, ndim, funk, ihi, nfunk, -ALPHA);
     if (ytry <= y[ilo]) {
       ytry = amotry (p, y, psum, ndim, funk, ihi, nfunk, GAMMA);
     }
     else if (ytry >= y[inhi]) {
       ysave=y[ihi];
       ytry = amotry (p, y, psum, ndim, funk, ihi, nfunk, BETA);

       if (ytry >= ysave) {
         for (i = 1; i <= mpts; i++) {
           if (i != ilo) {
             for (j = 1; j <= ndim; j++) {
               psum[j] = 0.5 * (p[i][j] + p[ilo][j]);
               p[i][j] = psum[j];
             }
             y[i] = (*funk)(psum);
           }
         }
         *nfunk += ndim;
         GET_PSUM
      }
    }
  }

  free_vector (psum, 1, ndim);
}


double adjustV (v, n, lexprop, Nzero, Vzero)
double	v, n, lexprop, Nzero, Vzero;

{
  return ((lexprop * (n / Nzero) * Vzero) + ((EINS_F - lexprop) * v));
}

double adjustVm (vm, n, lexprop, Nzero, spectrum)
double	vm, n, lexprop, Nzero, spectrum;

{
  return ((lexprop * (n / Nzero) * spectrum) + ((EINS_F - lexprop) * vm));
}


