proc(b,bse,bstan,bpval,Vin,MSSRin)=linregfs2(x,y,colname,opt)  
; -----------------------------------------------------------------------
; Library      stats
; -----------------------------------------------------------------
; See_also     linregstep linregbs linregopt gls linregfs linregres glm doglm
; -----------------------------------------------------------------------
; Macro        linregfs2
; -----------------------------------------------------------------------
; Description  linregfs2 computes a forward selection for a multiple 
;              linear regression model.
; ----------------------------------------------------------------------
;   Notes      The regressors x do not contain an intercept column. The
;              optional parameter can be stet using lingeropt. Otherwise 
;              the probability of F-to-enter will be set 0.05. The forward
;              selection routine fits first a simple linear regression for
;              each of the p potential x variables. For each simple linear
;              regression model, a F-statistic for testing whether or not
;              the slope is zero is obtained. After entering a significant
;              variable x_k(in the sense that this variable contributes
;              significant to the SSM ) all regression models with two x 
;              variables are fitted where x_k is one of the pair. The
;              partial F-test statistic examines, whether another x 
;              variable is significant for inclusion. The procedure stops 
;              if no further x variable can be added. If there is no 
;              variable significant to enter in the first step, the
;              estimation contains only the intercept.
;----------------------------------------------------------------------
; Usage        {b,bse,bstan,bpval} = linregfs2 (x, y, colname{, opt})
; Input
;   Parameter  x 
;   Definition n x p x d1 x ... x dn array    
;   Parameter  y 
;   Definition n x 1 x d1 x ... x dn array  
;   Parameter  colname
;   Definition string vector
;   Parameter  opt
;   Definition scalar     
; Output
;   Parameter  b
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bse
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bstan
;   Definition p x 1 x d1 x ... x dn array 
;   Parameter  bpval
;   Definition p x 1 x d1 x ... x dn array   
; -----------------------------------------------------------------------
; Example      library("stats")
;              setenv("outputstringformat", "%s") 
;              x1 = #(7,1,11,11,7,11,3,1,2,21,1,11,10)
;              x2 = #(26,29,56,31,52,55,71,31,54,47,40,66,68)
;              x3 = #(6,15,8,8,6,9,17,22,18,4,23,9,8)
;              x4 = #(60,52,20,47,33,22,6,44,22,26,34,12,12)
;              x  = x1~x2~x3~x4
;              y  = #(78.5,74.3,104.3,87.6,95.9,109.2,102.7,72.5)
;              y  = y|#(93.1,115.9,83.8,113.3,109.4)
;              colname=string("X %.f",1:cols(x))
;              opt = linregopt("Fin",4.0)
;              {b,bse,bstan,bpval} = linregfs2(x,y,colname,opt)  
;                                                             
;-----------------------------------------------------------------------
; Result
; Contents of string
; [1,] In  : X 1
; Contents of string
; [1,] In  : X 2
; 
; Contents of Enter
; [ 1,] Forward Selection
; [ 2,] ------------------------------
; [ 3,] F-to-enter 4.00
; [ 4,] probability of F-to-enter 0.95
; [ 5,] 
; [ 6,] Step  Multiple R      R^2        F        SigF       Variable(s)
; [ 7,]  1     0.8213       0.6745     22.799    0.001    In: X 4
; [ 8,]  2     0.9861       0.9725    176.627    0.000    In: X 1
; [ 9,]  3     0.9911       0.9823    166.832    0.000    In: X 2
; [10,] 
; [11,] Variable entered at Step Number 3: X 2
;
; Contents of ANOVA
; [ 1,] 
; [ 2,] A  N  O  V  A                   SS      df     MSS       F-test   P-value
; [ 3,] _________________________________________________________________________
; [ 4,] Regression                  2667.790     3   889.263     166.832   0.0000
; [ 5,] Residuals                     47.973     9     5.330
; [ 6,] Total Variation                 2716    12   226.314
; [ 7,] 
; [ 8,] Multiple R      = 0.99113
; [ 9,] R^2             = 0.98234
; [10,] Adjusted R^2    = 0.97645
; [11,] Standard Error  = 2.30874
; 
; Contents of Summary
; [1,] Variables in the Equation for Y:
; [2,]  
; [3,] 
; [4,] PARAMETERS         Beta         SE         StandB      t-test   P-value  Variable
; [5,]   __________________________________________________________________________________
; [6,] b[ 0,]=         71.6483      14.1424       0.0000      5.0662   0.0007   Constant   
; [7,] b[ 1,]=          1.4519       0.1170       0.5677     12.4100   0.0000   X 1
; [8,] b[ 2,]=          0.4161       0.1856       0.4304      2.2418   0.0517   X 2
; [9,] b[ 3,]=         -0.2365       0.1733      -0.2632     -1.3650   0.2054   X 4
;-----------------------------------------------------------------------
; Author       Kerstin Zanter   980331
; ----------------------------------------------------------------------
; Reference    Neter, J., Wasserman, W. and Kutner, M. H. (1989),
;              Applied linear regression models, p. 452-460
;
;              Kotz, S., Johnson, N. L. and Read, C. B. (1989),
;              Encyclopedia of Statistical Science, Vol.8, p. 766-767
; ----------------------------------------------------------------------
   if(exist(opt))
     switch
       case(opt.mode<>0)
        {Fin,entry}=probfs2()
       case(opt.Fin<>NaN)
         Fin=opt.Fin
       case(opt.entry<>NaN)
         entry=opt.entry
       break
     endsw
   else
    entry = 0.05
  endif
  datahx= x
  n     = rows(datahx)
  p     = cols(datahx)
  ybar  = mean(y)
  x0    = matrix(n,1,p)~reshape(datahx, #(n,1,p))    ;start with a model containing 
  y0    = matrix(n,1,p).*y                           ;one x variable and an intercept
  beta  = gls(x0,y0)
  yhat  = x0*beta
  dfM   = rows(beta)-1 
  dfR   = n-rows(beta)
  dfT   = rows(datahx)-1
  MSSM  = sum((yhat-ybar)^2)/dfM                     ;mean sum of squares model
  MSSR0 = sum((yhat-y)^2)/dfR                        ;mean sum of squares residual
  MSST  = sum((y-ybar)^2)/dfT                        ;mean sum of squares total
  SSTin  = sum((y-ybar)^2)
  F     = MSSM./MSSR0                                ;F-test
  if(exist(entry))
    Fin=qff(1-entry,dfM,dfR)
  endif
  if (sum(F.>Fin, 3)) 
    k=1
    Vin  = maxind(F, 3)                              ;variable to enter at Step 1
    Vinn = Vin                                       ;only for output
    Vi   = Vin   
    MSSRin = MSSR0[1,1,Vin]                          ;mean sum of squares model at Step 1
    MSSMin = MSSM[1,1,Vin]                           ;mean sum of squares residual at Step 1
    SSRin=MSSRin*dfR                                 ;sum of squares model at Step 1
    SSMin=MSSMin*dfM                                 ;sum of squares residual at Step 1
    betain=beta[Vin]
    MultipR= sqrt(SSMin/SSTin)
    Rsquare= SSMin/SSTin
    Fvalue=(SSMin * dfR)/(SSRin * dfM)
    FPvalue=1-cdff(Fvalue,dfM,dfR)
    nin  = 1                                         ;number of variables in the model (1)
    cont = 1
    if(rows(Vin)==cols(x))                           ;stopp procedure if X is one dimensional and yet in the model
      cont=0
    else
      Vout = paf(1:cols(datahx),(1:cols(datahx))<>Vin) ;index of variables not in the model   
    endif    
    nout = p-1                                       ;number of variables not in the model   
;    
    while (cont)
      cont = 0
      ;new regression, variable(s) Step k-1 in addition to all variables left at Step k
      j = 0
      MSSR = matrix (1,1,nout)
      MSSM = matrix (1,1,nout)
      beta = NaN.*matrix(rows(Vin)+2)
      while (j<nout)
         j    = j+1
         x1   = matrix(n)~datahx[,Vinn]~datahx[,Vout[j]]
         betai = gls(x1,y)
         yhat = x1*betai
         beta = beta~betai
         dfR  = n-rows(betai)
         dfM  = rows(betai)-1 
         MSSR[1,1,j] = sum((yhat-y)^2)/dfR  
         MSSM[1,1,j] = sum((yhat-ybar)^2)/dfM
      endo
      SSM  = MSSM*dfM
      SSR =  MSSR*dfR
      ;examination which variable is the next candidate for addition      
      F    = (SSRin-SSR)/MSSR ; partial F-test 
      if(exist(entry))
        Fin=qff(1-entry,dfM,dfR)
      endif
;
      if (sum(F.>Fin, 3))                           ;new variable entered at Step k
        k=k+1     
        i      = maxind(F, 3) 
        string ("In  : %s", colname[Vout[i]])
        MSSMin = MSSM[1,1,i]                        ;mean sum of squares model at Step k
        MSSRin = MSSR[1,1,i]                        ;mean sum of squares residual at Step k
        SSMin  = MSSMin*dfM                         ;sum of squares model at Step k
        SSRin  = MSSRin*dfR                         ;sum of squares residual at Step k
        betain = beta[,i+1] 
        MultipR=MultipR|(sqrt(SSMin/SSTin))
        Rsquare=Rsquare|(SSMin/SSTin)
        Fvalue=Fvalue|((SSMin * dfR)/(SSRin * dfM))
        FPvalue=FPvalue|(1-cdff(Fvalue,dfM,dfR))
        Vi     = Vout[i]                           ;new variable entered
        Vin    = Vin|Vi                            ;variables in the model
        Vinn   = Vinn|Vi                           ;only for output 
        nin    = nin+1                             ;number of variables in the model
        Vin    = sort(Vin)      
        if (rows(Vout)>1)                          ;otherwise all variables are in the model
          Vout   = paf(Vout, (1:rows(Vout))<>i)    ;index of variables not in the model
          Vout   = sort(Vout)
          nout   = nout-1                          ; number of variables not in the model
          cont   = 1
        else
          cont   = 0                               ;all variables are in the model: stop procedure
        endif
      else
        dfR  = n-rows(beta)+1                      ;no variable entered at current step
        dfM  = rows(beta)-2                        ;correction of dF
      endif
    endo  
    x1   = matrix(n,1)~datahx[,Vin]               ;for output
  else                                            ;no variable significant for inclusion
    k=1
    Vin=0
    Vi=0
    Vinn=0
    x1    = matrix(n,1) ;estimation only with intercept
    y1    = y                   
    beta  = gls(x1,y1) 
    yhat  = x1*beta
    dfM   = 1
    dfR   = n-rows(beta)
    MSSMin= sum((yhat-ybar)^2)/dfM                ;mean sum of squares model
    MSSRin= sum((yhat-y)^2)/dfR                   ;mean sum of squares residual
    SSMin = MSSMin*dfM                            ;sum of squares model
    SSRin = MSSRin*dfR                            ;sum of squares residual
    betain= beta 
    MultipR=sqrt(SSMin/SSTin)
    Rsquare=SSMin/SSTin
    Fvalue= (SSMin * dfR)/(SSRin * dfM)
    FPvalue=1-cdff(Fvalue,dfM,dfR)
  endif
  if(rows(betain)>1)
    betain=sort(betain~(0|Vinn), 2)    
    betain=betain[,1]

 endif
  b = betain
;
  Enter=outputfs2(MultipR,Rsquare,Fvalue,FPvalue,colname,Vinn,Vin,Vi,k,Fin,dfM,dfR)
  Enter
  ANOVA=outputanova(dfR,dfM,dfT,MSSRin,MSSMin,MSST)
  ANOVA    
  {Summary,bse,bstan,bpval}=outputpar(colname,dfR,dfM,ybar,betain,x1,y,MSSRin,MSSMin,Vin,Vi,k)
  Summary 
endp
                                                  
