proc(b,bse,bstan,bpval,Vin,MSSRin)=linregstep(x,y,colname,opt)  
; -----------------------------------------------------------------------
; Library      stats
; -----------------------------------------------------------------
; See_also     linregfs2 linregbs linregopt gls linregfs linregres doglm
; -----------------------------------------------------------------------
; Macro        linregstep
; -----------------------------------------------------------------------
; Description  linregstep computes a stepwise regression for a multiple 
;              linear regression model. 
; ----------------------------------------------------------------------
;   Notes      The regressors x do not contain an intercept column. The 
;              optional parameter opt ca be set using linregopt. Otherwise
;              the probabilities of F-to-enter/ F-to-remove will be set 
;              0.05/ 0.10. The stepwise regression routine fits first a 
;              simple linear regression for each of the p potential x 
;              variables. For each simple linear regression model, a 
;              F-statistic for testing whether or not the slope is zero is
;              obtained. After entering a significant variable x_k (in the 
;              sense that this variable contributes significant to the SSM )
;              all regression models with two x variables are fitted where
;              x_k is one of the pair. The partial F-test statistic examines,  
;              whether another x variable is significant for inclusion.  
;              In each step with p >= 2 x variables a test is undertaken
;              whether any of the variables already in the model be dropped 
;              because it is not longer helpful in conjunction 
;              with variables added at later stages 
;              The procedure stops if no further x variable can either be 
;              added or deleted.If there is no variable significant to enter
;              in the first step, the estimation contains only the intercept.
;----------------------------------------------------------------------
; Usage        {b,bse,bstan,bpval} = linregstep (x, y, colname{, opt})
; Input
;   Parameter  x 
;   Definition n x p x d1 x ... x dn array    
;   Parameter  y 
;   Definition n x 1 x d1 x ... x dn array  
;   Parameter  colname
;   Definition string vector
;   Parameter  opt
;   Definition parameter vector     
;  Output
;   Parameter  b
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bse
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bstan
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bpval
;   Definition p x 1 x d1 x ... x dn array   
; -----------------------------------------------------------------------
; Example      library("stats") 
;              setenv("outputstringformat", "%s")    
;              x1 = #(7,1,11,11,7,11,3,1,2,21,1,11,10)
;              x2 = #(26,29,56,31,52,55,71,31,54,47,40,66,68)
;              x3 = #(6,15,8,8,6,9,17,22,18,4,23,9,8)
;              x4 = #(60,52,20,47,33,22,6,44,22,26,34,12,12)
;              x  = x1~x2~x3~x4
;              y  = #(78.5,74.3,104.3,87.6,95.9,109.2,102.7,72.5)
;              y  = y|#(93.1,115.9,83.8,113.3,109.4)
;              colname=string("X %.f",1:cols(x))
;              opt = linregopt("Fin", 4.0, "Fout", 3.9)
;              {b,bse,bstan,bpval} = linregstep(x,y,colname,opt)                                                    
;
;-----------------------------------------------------------------------
; Result
; Contents of string
; [1,] In : X 4
; Contents of string
; [1,] In  : X 1
; Contents of string
; [1,] In  : X 2
; Contents of string
; [1,] Out : X 4
; 
; Contents of EnterandOut
; [ 1,] Stepwise Regression
; [ 2,] -------------------
; [ 3,] F-to-enter 4.00
; [ 4,] probability of F-to-enter 0.95
; [ 5,] F-to-remove 3.90
; [ 6,] probability of F-to-remove 0.94
; [ 7,]  
; [ 8,] Variables entered and dropped in the following Steps:
; [ 9,] 
; [10,] Step  Multiple R      R^2        F        SigF       Variable(s)
; [11,]  1     0.8213       0.6745     22.799    0.001  In : X 4
; [12,]  2     0.9861       0.9725    176.627    0.000  In : X 1
; [13,]  3     0.9911       0.9823    166.832    0.000  In : X 2
; [14,]  3     0.9893       0.9787    229.504    0.000  Out: X 4
; 
; Contents of ANOVA
; [ 1,] 
; [ 2,] A  N  O  V  A                   SS      df     MSS       F-test   P-value
; [ 3,] _________________________________________________________________________
; [ 4,] Regression                  2657.859     2  1328.929     229.504   0.0000
; [ 5,] Residuals                     57.904 1e+01     5.790
; [ 6,] Total Variation                 2716    12   226.314
; [ 7,] 
; [ 8,] Multiple R      = 0.98928
; [ 9,] R^2             = 0.97868
; [10,] Adjusted R^2    = 0.97441
; [11,] Standard Error  = 2.40634
; 
; Contents of Summary
; [1,] Variables in the Equation for Y:
; [2,]  
; [3,] 
; [4,] PARAMETERS         Beta         SE         StandB      t-test   P-value  Variable
; [5,]   __________________________________________________________________________________
; [6,] b[ 0,]=         52.5773       2.2862       0.0000     22.9980   0.0000   Constant   
; [7,] b[ 1,]=          1.4683       0.1213       0.5741     12.1047   0.0000   X 1
; [8,] b[ 2,]=          0.6623       0.0459       0.6850     14.4424   0.0000   X 2
;-----------------------------------------------------------------------
; Author       Kerstin Zanter   980331
; ----------------------------------------------------------------------
; Reference    Neter, J., Wasserman, W. and Kutner, M. H. (1989),
;              Applied linear regression models, p. 452-460
;
;              Kotz, S., Johnson, N. L. and Read, C. B. (1989),
;              Encyclopedia of Statistical Science, Vol.8, p. 766-767
; ----------------------------------------------------------------------
  if(exist(opt)) 
    switch
      case(opt.mode<>0)
        {Fin,Fout,entry,out}=probsw() 
      case(opt.Fin<>NaN)
         Fin=opt.Fin
      case(opt.Fout<>NaN)
        Fout=opt.Fout
      case(opt.entry<>NaN)
        entry=opt.entry
      case(opt.out<>NaN)
        out=opt.out
      break
    endsw
  else
    out   = 0.10
    entry = 0.05
  endif
 result = NaN.*matrix(1,5)
  datahx = x
  n      = rows(datahx)
  p      = cols(datahx)
  ybar   = mean(y)
  x0     = matrix(n,1,p)~reshape(datahx, #(n,1,p))
  y0     = matrix(n,1,p).*y
  beta   = gls(x0,y0)
  yhat   = x0*beta
  dfM    = rows(beta)-1
  dfR    = n-rows(beta)
  dfT    = rows(datahx)-1
  MSSM   = sum((yhat-ybar)^2)/dfM                    ;mean sum of squares model
  MSSR0  = sum((yhat-y)^2)/dfR                       ;mean sum of squares residual
  MSST   = sum((y-ybar)^2)/dfT                       ;mean sum of squares total
  SSTin  = sum((y-ybar)^2)
  Voutt  = 0                                         ;for output:variables removed at step...
  F      = MSSM./MSSR0
  if(exist(entry))
    Fin=qff(1-entry,dfM,dfR)
  endif
;
  if (sum(F.>Fin, 3)) 
    Vin  = maxind(F, 3)                              ;variable to enter at Step 1
    string ("In : %s", colname[Vin]);
    Vinn = Vin                                       ;only for output
    Vi   = Vin 
    MSSMin = MSSM[1,1,Vin]                           ;Mean sum of squares model at Step 1
    MSSRin = MSSR0[1,1,Vin]                          ;Mean sum of squares residual at Step 1
    SSMin  = MSSMin*dfM                              ;sum of squares model at Step 1
    SSRin  = MSSRin*dfR                              ;sum of squares residual at Step 1
    betain=beta[,,Vin]  
    MultipR= sqrt(SSMin/SSTin)
    Rsquare= SSMin/SSTin
    Fvalue=(SSMin * dfR)/(SSRin * dfM)
    FPvalue=1-cdff(Fvalue,dfM,dfR)
    result = result|(Vi~MultipR~Rsquare~Fvalue~FPvalue)|(NaN.*matrix(1,5))
    nin  = 1                                         ;number of variables in the model (1)
    cont = 1
    if(rows(Vin)==cols(x))                           ;stopp procedure if X is one dimensional and yet in the model
      cont=0
    else
      Vout = paf(1:cols(datahx),(1:cols(datahx))<>Vin) ;index of variables not in the model   
    endif
    nout = p-1                                       ;number of variables not in the model
    while (cont)            
      cont = 0
      j = 0
      beta = NaN.*matrix(rows(Vinn)+2)
      dfR  = n-rows(beta)
      dfM  = rows(beta)-1
      MSSR = matrix(1,1,nout)
      MSSM = matrix(1,1,nout)
      while (j<nout)
        j  = j+1
        x1 = matrix(n)~datahx[,Vinn]~datahx[,Vout[j]]
       ;new regression, variable(s) Step k-1 in addition to all variables left at Step kep k
        betai = gls(x1,y)
        yhat = x1*betai
        beta = beta~betai
        MSSR[1,1,j] = sum((yhat-y)^2)/dfR  
        MSSM[1,1,j] = sum((yhat-ybar)^2)/dfM
      endo
      SSM  = MSSM*dfM
      SSR  = MSSR*dfR
      ;examination which variable is the next candidate for addition
      F = (SSRin-SSR)/MSSR                          ;partial F-test
      if(exist(entry))
        Fin=qff(1-entry,dfM,dfR)
      endif
;
      if (sum(F.>Fin, 3))                           ;new variable entered at step k
        i      = maxind(F, 3) 
        MSSRin = MSSR[1,1,i]                        ;mean sum of squares residual at Step k
        MSSMin = MSSM[1,1,i]                        ;mean sum of squares model at Step k
        SSRin  = MSSRin*dfR                         ;sum of squares residual at Step k
        SSMin  = MSSMin*dfM                         ;sum of squares model at Step k
        betain=beta[,i+1]  
        MultipR=(sqrt(SSMin/SSTin))
        Rsquare=(SSMin/SSTin)
        Fvalue=((SSMin * dfR)/(SSRin * dfM))
        FPvalue=(1-cdff(Fvalue,dfM,dfR))
        Vi     = Vout[i]                            ;new variable entered
        string ("In  : %s", colname[Vi])
        Vin    = Vin|Vi                             ;variables in the model
        Vinn   = Vinn|Vi                            ;only for output 
        result = result|(Vi~MultipR~Rsquare~Fvalue~FPvalue)
        nin    = nin+1                              ;number of variables in the model
        if (rows(Vout)>1)
          Vout   = paf(Vout, (1:rows(Vout))<>i)     ;index of variables not in the model
        endif
        nout   = nout-1                             ;number of variables not in the model
        Vin    = sort(Vin)
        Vout   = sort(Vout)
        cont   = 1
      else
        result = result|(NaN.*matrix(1,5))
        dfR  = n-rows(beta)+1                      ;no variable entered at current step
        dfM  = rows(beta)-2                        ;correction of dF
      endif
      ;checking procedure: should any of the variables already in the 
      ;model be dropped because it is not longer helpful in conjunction 
      ;with variables added at later stages? 
      c=rows(Vin)
      nin=c
      MSSR = NaN.*matrix(1, 1, rows(Vin))
      MSSM = NaN.*matrix(1, 1, rows(Vin))
      beta = NaN.*matrix(rows(Vin))
      if (rows(Vin)>1)      
        dfM  = rows(beta)-1
        dfR  = n-rows(beta)   
        j = 0
        while (j<rows(Vinn))
          j     = j+1
          Vact  = paf (Vinn, Vinn<>Vinn[j])
          x1    = matrix(n)~datahx[,Vact]
          betai = gls(x1, y)
          yhat  = x1*betai
          beta  = beta~betai
          MSSR[1,1,j] = sum((yhat-y)^2)/dfR 
          MSSM[1,1,j] = sum((yhat-ybar)^2)/dfM 
        endo
      else
        nact = 0                                   ;no variables left in  the model
        x1=matrix(n)                               ;estimation only with intercept if no X variable left
        beta = gls(x1, y)
        yhat = x1*beta
        dfM  = rows(beta)
        dfR  = n-rows(beta)   
        MSSR = sum((yhat-y)^2)/dfR 
        MSSM = sum((yhat-ybar)^2)/dfM 
      endif  
        SSM  = MSSM*dfM  
        SSR  = MSSR*dfR 
        F    = (SSR-SSRin)/MSSRin 
        if(exist(out))
          Fout=qff(1-out,dfM,dfR)
        endif
;
       if (sum(F.<Fout, 3))                      ;partial F-test for removal
          i      = minind(F, 3) 
          MSSMin = MSSM[1,1,i]                    ;mean sum of squares model for variables left
          MSSRin = MSSR[1,1,i]                    ;mean sum of squares residual  for variables left
          SSMin  = MSSMin*dfM                     ;sum of squares model at Step k
          SSRin  = MSSRin*dfR                     ;sum of squares residual at Step k
          betain=beta[,i+1] 
          MultipR=(sqrt(SSMin/SSTin))
          Rsquare=(SSMin/SSTin)
          Fvalue=((SSMin * dfR)/(SSRin * dfM))
          FPvalue=(1-cdff(Fvalue,dfM,dfR))
          V=Vinn[i]
          string ("Out : %s", colname[V])
          if(rows(Vin)>1)
            Vin   = paf(Vin, Vin<>V)              ;index of variables in the model
            Vinn  = paf(Vinn, Vinn<>V)            ;index of variables in the model (unsorted)
            Vin   = sort(Vin)
          else
            Vin = 0                               ;if all variables removed 
          endif
          nin    = nin-1                          ;number of variables in the model
          Voutt=Voutt|Vi
          Vout   = sort(Vout)
          rm = (nout<cols(datahx))                ;do until there are no variables left to drop
          result = result|(V~MultipR~Rsquare~Fvalue~FPvalue)
        else
          result=result|(NaN.*matrix(1,5))
          dfR  = n-rows(beta)-1                   ;no variable dropped in currrent Step
          dfM  = rows(beta)                       ;correction of dF
        endif 
      if (nout==0) 
        cont = 0
      endif
    endo  
    if(Vin<>0) 
      x1   = matrix(n,1)~datahx[,Vin]             ;for output
    endif
  else                                            ;no variable signifikant for inclusion
    Vout=0
    Vin=0
    Vi=0
    Vinn=0
    x1    = matrix(n,1)
    y1    = y                                     ;estimation only with intercept
    beta  = gls(x1,y1) 
    yhat  = x1*beta
    dfM   = 1
    dfR   = n-rows(beta)
    MSSMin= sum((yhat-ybar)^2)/dfM                ;mean sum of squares model
    MSSRin= sum((yhat-y)^2)/dfR                   ;mean sum of squares residual
    SSMin = MSSMin*dfM                            ;sum of squares model
    SSRin = MSSRin*dfR                            ;sum of squares residual
    betain= beta 
    MultipR=sqrt(SSMin/SSTin)
    Rsquare=SSMin/SSTin
    Fvalue= (SSMin * dfR)/(SSRin * dfM)
    FPvalue=1-cdff(Fvalue,dfM,dfR)    
    result = result|(1~MultipR~Rsquare~Fvalue~FPvalue)
  endif
  if(rows(betain)>1)
    betain=sort(betain~(0|Vinn), 2)    
    betain=betain[,1]
  endif
  b = betain
  result=floor((1:rows(result))/2)~result~(1+((1:rows(result))%2)) 
  EnterandOut=outputstep(colname,result,dfM,dfR,Fin,Fout)
  EnterandOut 
  ANOVA=outputanova(dfR,dfM,dfT,MSSRin,MSSMin,MSST)
  ANOVA  
  k=1:max(result)
  {Summary,bse,bstan,bpval}=outputpar(colname,dfR,dfM,ybar,betain,x1,y,MSSRin,MSSMin,Vin,Vi,k)
  Summary 
endp
