proc(b,bse,bstan,bpval,Vin,MSSRin)=linregbs(x,y,colname,opt)   
; -----------------------------------------------------------------------
; Library      stats
; -----------------------------------------------------------------
; See_also     linregstep linregfs2 linregopt gls linregfs linregres doglm
; -----------------------------------------------------------------------
; Macro        linregbs
; -----------------------------------------------------------------------
; Description  linregbs computes a backward elimination of a multiple
;              linear regression model. 
; ----------------------------------------------------------------------
;   Notes      It is the opposite of forward selection. The regressors x
;              do not contain an intercept column. The optional parameter
;              opt can be set using linregopt. Otherwise the probability
;              of F-to-remove will be set 0.10.
;              The procedure begins with the model containing all potential
;              x variables and identifies the one with the smallest partial
;              F value. If the minimum F is less then F-to-remove, that 
;              variables is dropped. The model with the remaining variables
;              then fitted, and the next candidate for dropping identified.
;              The procedure continues until no further independent 
;              variables can be dropped. If all x variables are dropped the
;              estimate contains only the intercept.
; -----------------------------------------------------------------------
; Usage        {b,bse,bstan,bpval} = linregbs (x, y, colname{, opt})
; Input
;   Parameter  x 
;   Definition n x p x d1 x ... x dn array    
;   Parameter  y 
;   Definition n x 1 x d1 x ... x dn array  
;   Parameter  colname
;   Definition string vector
;   Parameter  opt
;   Definition scalar     
; Output
;   Parameter  b
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bse
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bstan
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bpval
;   Definition p x 1 x d1 x ... x dn array   
; -----------------------------------------------------------------------
; Example      library("stats") 
;              setenv("outputstringformat", "%s")      
;              x1 = #(7,1,11,11,7,11,3,1,2,21,1,11,10)
;              x2 = #(26,29,56,31,52,55,71,31,54,47,40,66,68)
;              x3 = #(6,15,8,8,6,9,17,22,18,4,23,9,8)
;              x4 = #(60,52,20,47,33,22,6,44,22,26,34,12,12)
;              x  = x1~x2~x3~x4
;              y  = #(78.5,74.3,104.3,87.6,95.9,109.2,102.7,72.5)
;              y  = y|#(93.1,115.9,83.8,113.3,109.4)
;              colname=string("X %.f",1:cols(x))
;              opt = linregopt("Fout", 3.9)
;              {beta,se,betastan,p} = linregbs(x,y,colname,opt) 
;-----------------------------------------------------------------------
; Result
; Contents of string
; [1,] Out : X 3
; Contents of string
; [1,] Out : X 4
; 
; Contents of Removal
; [ 1,] Backward Elimination
; [ 2,] -------------------------------
; [ 3,] F-to-remove 3.90
; [ 4,] probability of F-to-remove 0.94
; [ 5,] 
; [ 6,] Step  Multiple R      R^2        F        SigF        Variable(s)
; [ 7,]  1     0.9911       0.9824    111.479    0.000    Out:       none
; [ 8,]  2     0.9911       0.9823    166.823    0.000    Out: X 3
; [ 9,]  3     0.9893       0.9787    229.504    0.000    Out: X 4
; [10,] 
; [11,] Variable removed at Step Number  3 = X 4
; 
; Contents of ANOVA
; [ 1,] 
; [ 2,] A  N  O  V  A                   SS      df     MSS       F-test   P-value
; [ 3,] _________________________________________________________________________
; [ 4,] Regression                  2657.859     2  1328.929     229.504   0.0000
; [ 5,] Residuals                     57.904 1e+01     5.790
; [ 6,] Total Variation                 2716    12   226.314
; [ 7,] 
; [ 8,] Multiple R      = 0.98928
; [ 9,] R^2             = 0.97868
; [10,] Adjusted R^2    = 0.97441
; [11,] Standard Error  = 2.40634
; 
; Contents of Summary
; [1,] Variables in the Equation for Y:
; [2,]  
; [3,] 
; [4,] PARAMETERS         Beta         SE         StandB      t-test   P-value  Variable
; [5,]   __________________________________________________________________________________
; [6,] b[ 0,]=         52.5773       2.2862       0.0000     22.9980   0.0000   Constant   
; [7,] b[ 1,]=          1.4683       0.1213       0.5741     12.1047   0.0000   X 1
; [8,] b[ 2,]=          0.6623       0.0459       0.6850     14.4424   0.0000   X 2
; ----------------------------------------------------------------------   
; Author       Kerstin Zanter   980331
; ----------------------------------------------------------------------
; Reference    Neter, J., Wasserman, W. and Kutner, M. H. (1989),
;              Applied linear regression models, p. 452-460
;
;              Kotz, S., Johnson, N. L. and Read, C. B. (1989),
;              Encyclopedia of Statistical Science, Vol.8, p. 766-767
;
; ----------------------------------------------------------------------   
  if(exist(opt))
     switch
       case(opt.mode<>0)
         {Fout,out}=probbs()
       case(opt.Fout<>NaN)
         Fout=opt.Fout
       case(opt.out<>NaN)
         out=opt.out
       break
     endsw
   else
  out   = 0.10
  endif
  datahx= x
  Vi    = 0
  Vin   = 1:cols(datahx)
  Vinn  = 1:cols(datahx)
  n     = rows(datahx)
  p     = cols(datahx)
  ybar  = mean(y)
  dfM   = p                             ; rows(beta)-1  
  dfR   = n-p-1                         ; n-rows(beta)
  dfT   = n-1                           ; rows(datahx)-1
  x0    = datahx       ;start with model containing all x variables
  x0    = matrix(n,1)~x0     
  y0    = y
  beta  = gls(x0,y0)
  yhat  = x0*beta
  MSSM  = sum((yhat-ybar)^2)/dfM      ;mean sum of squares model
  MSSR  = sum((yhat-y)^2)/dfR         ;mean sum of squares residual
  MSST  = sum((y-ybar)^2)/dfT         ;mean sum of squares total
  MSSMin= MSSM                        ;all entries are equal in this case  
  MSSRin= MSSR             
  MSSTin= MSST 
  SSMin = MSSMin*dfM                  ;sum of squares model
  SSRin = MSSRin*dfR                  ;sum of squares residual             
  SSTin = MSSTin*dfT                  ;sum of squares total
  betain=beta
  MultipR=sqrt(SSMin/SSTin)
  Rsquare=SSMin/SSTin
  Fvalue=(SSMin * dfR)/(SSRin * dfM)
  FPvalue=1-cdff(Fvalue,dfM,dfR)
  Voutt=NaN                           ;only for output (unsorted)
  Vout=NaN 
  k=1                                 ;Step 1: full model
  rm = 1    
  nout=0        
  while(rm)        
    rm=0
    c=rows(Vin)
    nin=c
    if(rows(Vin)>1)                   ;are there still variables in the model?
      j = 0
      MSSR = matrix(1,1,rows(Vin))
      beta = NaN.*matrix(rows(Vin)) 
      while (j<rows(Vin))
        j    = j+1
        Vact = paf(Vin, Vin<>Vin[j])
        x1   = matrix(n)~x[,Vact]
        betai = gls(x1, y)
        yhat = x1*betai
        beta = beta~betai
        dfR  = n-rows(betai)
        dfM  = rows(betai)-1
        MSSR[1,1,j] = sum((yhat-y)^2)/dfR 
        MSSM[1,1,j] = sum((yhat-ybar)^2)/dfM 
      endo
    else
      nact = 0                        ;no variables left in  the model
      x1   = matrix(n)                ;estimation only with intercept if no X variable left
      dfM  = rows(beta)
      beta = gls(x1,y)               
      yhat = x1*beta
      dfR  = n-rows(beta)
      MSSR = sum((yhat-y)^2)/dfR 
      MSSM = sum((yhat-ybar)^2)/dfM 
    endif
    SSM  = MSSM*dfM  
    SSR  = MSSR*dfR 
    F    = (SSR-SSRin)/MSSRin         ;("reduced" model - "full" model)/"full" model
    if(exist(out))                
      Fout=qff(1-out,dfM,dfR)
    endif
    if (sum(F.<Fout, 3))              ;partial F-test for removal
      k=k+1                           ;count steps
      i      = minind(F, 3) 
      MSSMin = MSSM[1,1,i]            ;mean sum of squares model for variables left
      MSSRin = MSSR[1,1,i]            ;mean sum of squares residual after a variable[index i] is dropped
      SSMin  = MSSMin*dfM             ;sum of squares model for variables left
      SSRin  = MSSRin*dfR             ;sum of squares residual for variables left
      betain=beta[,i+1] 
      MultipR=MultipR|(sqrt(SSMin/SSTin))
      Rsquare=Rsquare|(SSMin/SSTin)
      Fvalue=Fvalue|((SSMin * dfR)/(SSRin * dfM))
      FPvalue=FPvalue|(1-cdff(Fvalue,dfM,dfR))
      Vi=Vin[i]
      string ("Out : %s", colname[Vi]) 
      if(k==2)                        ;first removal
        Vout=Vi
      else
        Vout   = Vout|Vi              ;index of variables not in the model
      endif
      nout   = nout+1                 ;number of variables not in the model
      if(rows(Vin)>1)
        Vin   = paf(Vin, Vin<>Vi)     ;index of variables in the model
        Vinn  = paf(Vinn, Vinn<>Vi)   ;index of variables in the model (unsorted)
        Vin   = sort(Vin)
      else
         Vin = 0                      ;if all variables removed 
      endif 
      nin    = nin-1                  ;number of variables in the model
      Voutt=Voutt|Vi 
      Vout   = sort(Vout)
      rm = (nout<cols(datahx))        ;do until there are no variables left to drop
    else
      // Vout = NaN                        ;no variable to remove
      dfR  = n-rows(beta)-1
      dfM  = rows(beta)
    endif    
    if(rows(betain)>1)
      betain=sort(betain~(0|Vinn), 2) ;sort betain for output
      betain=betain[,1]
    endif
  endo  
  b = betain
  if(Vin<>0) 
     x1   = matrix(n,1)~datahx[,Vin]  ;for output
  endif
  Removal=outputbs(MultipR,Rsquare,Fvalue,FPvalue,colname,betain,Voutt,Vout,k,Fout,dfM,dfR)
  Removal
  ANOVA=outputanova(dfR,dfM,dfT,MSSRin,MSSMin,MSSTin)
  ANOVA    
  {Summary,bse,bstan,bpval}=outputpar(colname,dfR,dfM,ybar,betain,x1,y,MSSRin,MSSMin,Vin,Vi,k)
  Summary 
 endp
