proc(r)=gplmbootstraptest(code,x,t,y,h,nboot,opt)
; ----------------------------------------------------------------------------
; Library       gplm
; ----------------------------------------------------------------------------
;  See_also     gplmest gplmcore gplmopt gplminit gplmstat glmest 
; ----------------------------------------------------------------------------
;   Macro       gplmbootstraptest
; ----------------------------------------------------------------------------
;  Description  Bootstrap test for comparing GLM  vs. GPLM.
;               The hypothesis E[y|x,t] = G(x*b + t*g + c) is tested
;               against the alternative E[y|x,t] = G(x*b + m(t)).
;               This macro offers a convenient interface for GPLM 
;               estimation and testing. A preparation of data is 
;               performed (inclusive sorting). 
; ----------------------------------------------------------------------------
;   Keywords    GPLM, Generalized Partial Linear Models, GLM vs. GPLM, test
; ----------------------------------------------------------------------------
;   Notes       This routine uses in some cases the dynamically loaded 
;               shared library gplm.dll (Windows) or gplm.so (Unix). 
; ----------------------------------------------------------------------------
;   Reference   Haerdle/Mammen/Mueller, JASA, 1998
; ----------------------------------------------------------------------------
;   Link   ../tutorials/gplmstart.html Tutorial: GPLM in XploRe 
; ----------------------------------------------------------------------------
;   Usage        myfit = gplmbootstraptest(code,x,t,y,h,nboot{,opt})
;   Input
;     Parameter   code  
;     Definition        text string, the short code for the model (e.g. 
;                       "bilo" for logit or "noid" for ordinary PLM).
;     Parameter   x  
;     Definition        n x p  matrix, the discrete predictor variables.
;     Parameter   t  
;     Definition        n x q  matrix, the continuous predictor variables.
;     Parameter   y  
;     Definition        n x 1  vector, the response variables.
;     Parameter   h  
;     Definition        q x 1  vector, the bandwidth vector.
;     Parameter   nboot  
;     Definition        integer, number of bootstrap replications. 
;                       If nboot<=0, the test is performed using the
;                       asymptotic normal distribution of the test
;                       statistics.
;     Parameter   opt
;     Definition        optional, a list with optional input. The macro
;                       "gplmopt" can be used to set up this parameter.
;                       The order of the list elements is not important.
;                       Parameters which are not given are replaced by 
;                       defaults (see below).
;     Parameter   opt.b0  
;     Definition        p x 1  vector, the initial coefficients. If not 
;                       given, all coefficients are put =0 initially.
;     Parameter   opt.m0  
;     Definition        n x 1  vector, the initial values for the nonparametric
;                       part. If not given, a default is used.
;     Parameter   opt.tg  
;     Definition        ng x 1 vector, a grid for continuous part. If tg is
;                       given, the nonparametric function will also be 
;                       computed on this grid.
;     Parameter   opt.tdesign  
;     Definition        n x r matrix, design for parametric fit for
;                       m(t). This allows to test e.g. quadratic or
;                       cubic functions against m(t).
;                       If not given a linear function (incl. constant) 
;                       will be tested by using the design matrix(n)~t. 
;     Parameter   opt.weights
;     Definition        string, type of observation weights. Can be 
;                       "frequency" for replication counts, or "prior" 
;                       (default) for prior weights in weighted regression.
;     Parameter   opt.wx
;     Definition        scalar or n x 1 vector, frequency or prior 
;                       weights. If not given, set to 1.
;     Parameter   opt.wt  
;     Definition        n x 1  vector, weights for t (trimming factors). 
;                       If not given, set to 1.
;     Parameter   opt.wc 
;     Definition        n x 1  vector, weights for convergence criterion,
;                       w.r.t. m(t) only. If not given, opt.wt is used. 
;     Parameter   opt.wr  
;     Definition        n x 1  vector, weights for test statistics.
;                       If not given, set to 1.
;     Parameter   opt.off
;     Definition        scalar or n x 1 vector, offset. Can be used for
;                       constrained estimation. If not given, set to 0.
;     Parameter   opt.meth
;     Definition        integer, if -1, a backfitting is performed,
;                       if 1 a profile likelihood method is used, and
;                       0 a simple profile likelihood is used. 
;                       The default is 0.
;     Parameter   opt.fscor
;     Definition        integer, if exists and =1, a Fisher scoring is
;                       performed (instead of the default Newton-Raphson
;                       procedure). This parameter is ignored for
;                       canonical links.
;     Parameter   opt.shf
;     Definition        integer, if exists and =1, some output is produced 
;                       which indicates how the iteration is going on.
;     Parameter   opt.nosort
;     Definition        integer, if exists and =1, the continuous variables
;                       t and the grid tg are assumed to be sorted by the 
;                       1st column. Sorting is required by the algorithm,
;                       hence you should switch if off only when the data 
;                       are already sorted.
;     Parameter   opt.miter
;     Definition        integer, maximal number of iterations. The default 
;                       is 10.
;     Parameter   opt.cnv
;     Definition        integer, convergence criterion. The default is 0.0001.
;     Parameter   opt.pow
;     Definition        scalar, power for power link. If not given,
;                       set to 0.
;     Parameter   opt.nbk
;     Definition        scalar, extra parameter k for negative binomial
;                       distribution. If not given, set to 1 (geometric 
;                       distribution).
;   Output
;     Parameter   myfit.b
;     Definition        k x 1  vector, estimated coefficients 
;     Parameter   myfit.bv
;     Definition        k x k  matrix, estimated covariance matrix for b
;     Parameter   myfit.m
;     Definition        n x 1  vector, estimated nonparametric part
;     Parameter   myfit.mg
;     Definition        ngx 1  vector, estimated nonparametric part on grid
;     Parameter   myfit.rr
;     Definition        3 x 1 vector, 3 test statistics according
;                       to Haerdle/Mammen/Mueller
;     Parameter   myfit.alpha
;     Definition        3 x 1 vector, significance level for rejection of 
;                       the parametric hypothesis (for each of the three test 
;                       statisctics).
;     Parameter   myfit.stat
;     Definition        list with the following statistics:
;     Parameter   myfit.stat.deviance
;     Definition        deviance,
;     Parameter   myfit.stat.pearson
;     Definition        generalized pearson's chi^2,
;     Parameter   myfit.stat.loglik
;     Definition        log-likelihood,
;     Parameter   myfit.stat.r2
;     Definition        pseudo R^2,
;     Parameter   myfit.stat.it
;     Definition        2 x 1 vector, number of iterations needed in
;                       semiparametric and biased parametric fit
;     Parameter   myfit.stat.ret
;     Definition        scalar, return code: 
;                         0  o.k., 
;                         1  maximal number of iterations reached
;                            in estimation (if applicable),
;                        -1  missing values have been encountered
;                            in estimation,
;                        -2  missing values in test statistics encountered.
;                        -3  missing values in bootstrap encountered.
;     Parameter   myfit.stat.rrboot
;     Definition        nboot x 3 matrix, values of the bootstrap test
;                       statistics (if applicable).
; ----------------------------------------------------------------------------
;   Example   library("gplm")
;             ;=============================
;             ;  simulate data 
;             ;=============================
;             n=100
;             b=1|2
;             p=rows(b)
;             x=2.*uniform(n,p)-1
;             t=sort(2.*uniform(n)-1,1)
;             m=cos(pi.*t)
;             y=( 1./(1+exp(-x*b-m)).>uniform(n) )
;             ;=============================
;             ;  parametric (logit) fit 
;             ;=============================
;             pf=glmest("bilo",x~t~matrix(n),y)
;             b0  =pf.b[1:p]
;             gamma0=pf.b[p+1:rows(pf.b)]
;             m0  =(t~matrix(n))*gamma0
;             ;=============================
;             ;  semiparametric fit & test
;             ;=============================
;             h=0.6
;             opt=list(b0,m0)
;             sf=gplmbootstraptest("bilo",x,t,y,h,10,opt)
;             b~b0~sf.b
;             sf.alpha
;             ;==========================
;             ;  plot
;             ;==========================
;             library("plot")
;             true=setmask(sort(t~m),"line","thin")
;             linm=setmask(sort(t~m0),"line","red")
;             estm=setmask(sort(t~sf.m),"line","blue")
;             plot(true,estm)
; ----------------------------------------------------------------------------
;   Result    A generalized partially linear logit fit for E[y|x,t] is 
;             computed and tested against the parametric logit. 
;             sf.b contains the coefficients for the linear 
;             part. sf.m contains the estimated nonparametric part 
;             evaluated at observations t. The example gives the true 
;             b together with the logit estimate b0 and the GPLM 
;             estimate sf.b. Also the estimated function sf.m is  
;             displayed together with the true and the linear fit. 
;             sf.del contains the test results for the 3 test statistics 
;             proposed in Haerdle/Mammen/Mueller (1996). 
; ----------------------------------------------------------------------------
;   Author    Marlene Mueller, 2000/05/10
; ----------------------------------------------------------------------------
;
  glmmodels=getglobal("glmmodels")
  twoparfam = sum(code==(glmmodels.twoparfam)) >0
;
  n=rows(x)
  p=cols(x)
  q=cols(t)
  one=matrix(n)
  error(n!=rows(t),"x and t have different number of rows")
  wr=one
;
; Do we have options?
;
  if (exist(opt)==0)
    dummy=0
    opt=list(dummy)  ; dummy optional parameter, doesn't matter
  endif
  if (comp(opt,"tdesign")>0)
    notgood=(rows(opt.tdesign)!=n)+(rows(dim(opt.tdesign))>2)
    warning (notgood>0, "opt.tdesign not consistent with t, not used")
    if (notgood==0)
      tdesign=opt.tdesign
    endif
    if (comp(opt,"wr")>0)
      notgood=(dim(dim(opt.wr))!=1)*(rows(opt.wr)!=n)+(rows(dim(opt.wr))!=1)
      notgood=notgood+(sum(abs(opt.wr))==0)+(min(opt.wr)<0)
      warning(notgood>0, "opt.wr was unusable, used opt.wr=1 instead")
      if (notgood==0)
        wr=opt.wr
      endif
    endif
  endif
;
  havesimpledesign=1-exist(tdesign)
;
; now we can run gplminit
;
  {xs,ts,ys,wx,wt,wc,b0,m0,off,ctrl,tg,m0g,tro,tgro}=gplminit(code,x,t,y,h,opt)
  bad=countNaN(inv(ts[,1:q]'*(wx.*ts[,1:q])))
  error (bad>0,"gplmest: Matrix t is singular!")
  h=vec(h).*matrix(q)
;
  if (havesimpledesign)
    if (cols(ts)>q)
      wr=ts[,cols(ts)]
    endif
    tdesign=one~ts[,1:q]
  else
    if (cols(ts)>(q+cols(tdesign)))
      wr=ts[,cols(ts)]
    endif
    tdesign=ts[,(q+1):(q+cols(tdesign))]
  endif
  ts=ts[,1:q]
;
  shf=ctrl[1]
  fscor=ctrl[4]
  pow=ctrl[5]
  nbk=ctrl[6]
  havegrid=(countNaN(tg)!=prod(dim(tg)))
;
  if (havegrid)
    ng=rows(tg)
  endif
;
; set some defaults
;
  nosort=0
  noinit=0
  weights="prior"
;
; now check which other optional values are relevant
;
  if (comp(opt,"nosort")>0)
    nosort=(opt.nosort==1)*(dim(dim(opt.nosort))==1)
  endif
  if (comp(opt,"noinit")>0)
    nosort=(opt.noinit==1)*(dim(dim(opt.noinit))==1)
  endif
  if (comp(opt,"weights")>0)
    weights=opt.weights
  endif
;
; now do the GLM fit
;
  {b,bv,it,ret}=glmcore(code,xs~tdesign,ys,wx,off,ctrl[1:6])
  stat=glmstat(code,xs~tdesign,ys,b,bv,list(wx,weights,off,pow,nbk))
  append(stat,it)
  append(stat,ret)
  glmfit=list(b,bv,stat)
;  glmfit=glmest(code,xs~tdesign,ys,opt)
;
  b0=glmfit.b[1:p]
  m0=tdesign*(glmfit.b[(p+1):rows(glmfit.b)])
  if (havegrid)
    if (havesimpledesign)
      m0g=(matrix(ng)~tg)*(glmfit.b[(p+1):rows(glmfit.b)])
    else
      tmp=sker(ts,h',"qua",matrix(n)~m0,tg)
      m0g=tmp[,2]./tmp[,1]
    endif
  endif
  eta0=xs*b0+m0+off
  mu0=glmlink(code,xs*b0+m0,opt)
;
; now do the GPLM fit
;
  if (havegrid)
    r=gplmcore(code,xs,ts,ys,h,wx,wt,wc,b0,m0,off,ctrl,1,tg,m0g)
  else
    r=gplmcore(code,xs,ts,ys,h,wx,wt,wc,b0,m0,off,ctrl,1)
  endif
  b=r.b
  m=r.m
  eta=xs*b+m+off
  mu=glmlink(code,eta,opt)
  ret=r.ret
;
; finally, we need to rearrange, if t, tg were sorted
;
  if (countNaN(tro)!=prod(dim(tro)))
    r.m=r.m[tro]
    if (countNaN(tgro)!=prod(dim(tgro)))
      r.mg=r.mg[tgro]
    endif
  endif
;
; produce the statistics of the GPLM 
;
  stat=gplmstat(code,x,t,y,r.b,r.bv,r.m,r.df,list(wx,weights,off,pow,nbk))
  if (twoparfam)
    r.bv=stat.dispersion.*r.bv
  endif
;
; remove df, it, ret from r
;
  it=r.it
  tmp=comp(r,"df")
  if (tmp>0)
    delete(r,tmp)
  endif
  tmp=comp(r,"it")
  if (tmp>0)
    delete(r,tmp)
  endif
  tmp=comp(r,"ret")
  if (tmp>0)
    delete(r,tmp)
  endif
;
; add a smoothing bias to the GLM fit
;
  if (havegrid)
    tmp=gplmcore(code,xs,ts,mu0,h,wx,wt,wc,b0,m0,off,ctrl,0,tg,m0g)
  else
    tmp=gplmcore(code,xs,ts,mu0,h,wx,wt,wc,b0,m0,off,ctrl,0)
  endif
  mb=tmp.m
  etab=xs*b0+mb+off
  mub=glmlink(code,etab,opt)
;
; append auxiliary stuff to stat
;
  it = it|tmp.it
  append(stat,it)
;
; determine test statistics
;
  rr=matrix(3,1)
  tmp=glmll(code,mu,mu,opt)-glmll(code,mub,mu,opt)
  rr[1]= 2*sum(wr.*wx.*tmp)
;
  tmp1=glmlld(code+"fs",eta,ys,opt)
  tmp1=wr.*wx.*tmp1{2}
  tmp2= xs*(b0-b) + mb - m
  rr[2]=-sum(tmp1.*tmp2.*tmp2)
;
  tmp1=glmlld(code+"fs",eta0,ys,opt)
  tmp1=wr.*wx.*tmp1{2}
  rr[3]=-sum(tmp1.*tmp2.*tmp2)
;
  if (countNaN(rr)>0)
    warn=     "======================================================"
    warn=warn|" WARNING!"
    warn==warn|"======================================================"
    warn=warn|" Missing values in test statistics encountered!"
    ret=-2
    if (countNaN(rr)==3)
      nboot=0
      warn=warn|" Skipping bootstrap ..."
    endif
    warn==warn|"======================================================"
    warn
  endif  
;
; determine critical values
;
  if (nboot<=0.5) ; ASYMPTOTIC NORMAL DISTRIBUTION
    tmp=paf(ts,wr>0)
    tsupp=prod(max(tmp)-min(tmp),2)
    tmp=paf(wr,wr)
    error(max(tmp)!=min(tmp),"opt.wr is different from 0 or 1!")
    en=5*tsupp/(7*prod(h))
    vn=0.71862 ; sqrt(1168780/2263261)
    vn=vn.*sqrt(2*tsupp)./sqrt(prod(h))
    alpha = 1-cdfn((rr-en)/vn)
  else            ; BOOTSTRAP
    rrboot=0.*matrix(nboot,3)
    retboot=0.*matrix(nboot,3)
    iboot=0
    ctrl[1]=0
    while (iboot<nboot)
      iboot=iboot+1
      if (shf==1)
        tmp="======================================================"
        tmp=tmp|" gplmbootstraptest: BOOTSTRAP sample no. "+string("%1.0f", iboot)
        tmp=tmp|"======================================================"
        tmp
      endif
      tmp= genglm(code,xs~tdesign,glmfit.b,opt)
      ys = tmp{1}
      tmp= glmcore(code,xs~tdesign,ys,wx,off,ctrl[1:6])
      b0s= tmp.b[1:p]
      m0s= (tdesign)*tmp.b[p+1:rows(tmp.b)]
      eta0s=xs*b0s+m0s+off
      mu0s=glmlink(code,eta0s,opt)
      if (shf==1)
        b0s
        if (fscor==1)
          "gplmcore/code='"+code+"/fs': "+string("%1.0f iterations",tmp.it)
        else
          "gplmcore/code='"+code+"': "+string("%1.0f iterations",tmp.it)
        endif 
      endif
;  
      if (noinit!=1)
        tmp=gplmcore(code,xs,ts,ys,h,wx,wt,wc,b0s,m0s,off,ctrl,1)
;        tmp=gplmcore(code,xs,ts,ys,h,wx,wt,wc,b0,m0,off,ctrl,1)
      else
        tmp=gplmcore(code,xs,ts,ys,h,wx,wt,wc,b0s,m0s,off,ctrl,1)
      endif  
      bs=tmp.b
      ms=tmp.m
      etas=xs*bs+ms+off
      mus=glmlink(code,etas,opt)
      if (shf==1)
        bs
        if (fscor==1)
          "gplmcore/code='"+code+"/fs': "+string("%1.0f iterations",tmp.it)
        else
          "gplmcore/code='"+code+"': "+string("%1.0f iterations",tmp.it)
        endif 
      endif
      retboot[iboot]=tmp.ret
;
      tmp= gplmcore(code,xs,ts,mu0s,h,wx,wt,wc,b0s,m0s,off,ctrl,0)
      msb=tmp.m
      etasb=xs*b0s+msb+off
      musb=glmlink(code,etasb,opt)
;
      tmp=glmll(code,mus,mus,opt)-glmll(code,musb,mus,opt)
      rrboot[iboot,1]=2*sum(wr.*wx.*tmp)
;
      tmp1=glmlld(code+"fs",etas,ys,opt)
      tmp1=wr.*wx.*tmp1{2}
      tmp2= xs*(b0s-bs) + msb - ms
      rrboot[iboot,2]=-sum(tmp1.*tmp2.*tmp2)
;
      tmp1=glmlld(code+"fs",eta0s,ys,opt)
      tmp1=wr.*wx.*tmp1{2}
      rrboot[iboot,3]=-sum(tmp1.*tmp2.*tmp2)
;      
      retboot[iboot]=retboot[iboot].*(rrboot[iboot]!=NaN) -3.*(rrboot[iboot]==NaN)
    endo
;
    alpha=0.*matrix(3)
    if (max(max(abs(retboot)),2)>0) ; (0.30*nboot)
      warn="======================================================"
      warn=warn|" WARNING!"
      warn=warn|"======================================================"
      warn=warn|" Missing values in bootstrap encountered!"
      ret=-3
      igood=isNumber(rrboot)
      igood=(retboot==0)
      ngood=sum(igood)'
      alpha[1]=(sum(paf(rrboot[,1],igood[,1]).>=rr[1]))./(ngood[1]+1)
      alpha[2]=(sum(paf(rrboot[,2],igood[,2]).>=rr[2]))./(ngood[2]+1)
      alpha[3]=(sum(paf(rrboot[,3],igood[,3]).>=rr[3]))./(ngood[3]+1)
      warn=warn|" The actually used bootstrap sample sizes are:"
      tmp=string("   nboot[%1.0f] = ",1:3)
      tmp=tmp+string("%12.0f",ngood)
      tmp=tmp+string("  (%6.2f",100*ngood/nboot)+"%)"
      warn=warn|tmp
      warn=warn|"======================================================"
      warn
    else
      alpha[1]=(sum(rrboot[,1].>=rr[1]))./(nboot+1)
      alpha[2]=(sum(rrboot[,2].>=rr[2]))./(nboot+1)
      alpha[3]=(sum(rrboot[,3].>=rr[3]))./(nboot+1)
      ret=0
    endif
  endif
;
; append stuff to resulting list
;
  append(r,rr)
  append(r,alpha)
  append(r,glmfit)
  append(stat,ret)
  if (nboot>0)
    append(stat,rrboot)
  endif
  append(r,stat)
endp 




