proc(xfs, bfs, pvl)=linregfs(x, y, alpha)
; -----------------------------------------------------------------------
; Library      stats  
; -----------------------------------------------------------------
; See_also     linregstep linregfs2 linregopt gls linregbs linregres 0glm doglm
; -----------------------------------------------------------------------
; Macro        linregfs
; -----------------------------------------------------------------------
; Description  linregfs computes a simple forward selection for a
;              multiple linear regression model. 
; ----------------------------------------------------------------------
;   Notes      The regressors x do not contain an intercept column. 
;              The forward selection chooses in each step the variable
;              with the highest correlation coefficient. If the optional
;              significance level alpha is given the selection procedures
;              stops if the regression coefficient is non-significant 
;              otherwise all coefficients are computed.
;
;              The returned values are xfs, the selected (q>=1)regressors. 
;              The first regressor is always the constant term.
;              bfs contains the regression coefficents. pvl contains
;              the selected variable, the p-values of the significance 
;              test and the residual sum of squares divided by 
;              the number of observations minus the number of parameters.
; -----------------------------------------------------------------------
; Usage        {xfs,bfs,pvl} = linregfs (x, y {, alpha})
; Input
;   Parameter  x 
;   Definition n x p      regressor variables
;   Parameter  y 
;   Definition n x 1      dependent variable
;   Parameter  alpha
;   Definition scalar     level of testing
; Output
;   Parameter  xfs
;   Definition n x q
;   Parameter  bfs
;   Definition q x 1      the estimated regression coefficients
;   Parameter  pvl
;   Definition q x 3      the returned p-values
; -----------------------------------------------------------------------
; Example      ; loads the library stats
;              library("stats")   
;              ; reset random generator 
;              randomize(0)
;              ; generate x
;              x = normal(1000, 3)
;              ; generate y
;              y = 10*x[,3]+x[,1].*x[,2]
;              ; do the forward selection
;              {xfs,bfs,pvl}=linregfs(x, y, 0.05)
;              ; print the regression coefficients and test results
;              bfs~pvl
; -----------------------------------------------------------------------
; Result       Contents of _tmp
;              [1,]  0.021854        0     +NAN     +NaN 
;              [2,]   9.9689        3        0   0.8867 
;              [3,]  0.069415        2  0.017246  0.88256 
; 
;              The result includes the constant term (always first), the 
;              third variable and the second variable. The first column
;              are the regression coefficients. The second column are 
;              the selected variables (0 means constant term). The 
;              third column contains the p-values whereas the last
;              column contains the residual sum of squares divided by
;              n-#parameters.
; -----------------------------------------------------------------------
; Author       Sigbert Klinke   970514,970820
; ----------------------------------------------------------------------
; Reference    Chatterjee, S. and Price, B. (1991),
;              Regression Analysis by Example, Whiley, p. 236
; ----------------------------------------------------------------------
  if (exist("alpha")<>1)
    alpha = 4
  endif
  n   = rows(x)
  p   = cols(x)
  xfs = matrix(n)
  bfs = mean(y)
  pvl = 0~NaN~NaN
  sel = matrix(p)
  yw  = y-mean(y)
  pv  = 1
  i   = 0
  while ((i.<p) && pv) 
    i = i+1
    c = corr(x~yw)
    c = c[1:p,p+1]
    b = 1-isNumber(c)
    if (sum(b))
      warning (1, "some variables have variance zero")
      k = paf(1:p, b)
      c[k] = 0
    endif
    c = c.*sel
    k = maxind(c)
    sel[k] = 0
    xk  = x[,k]~matrix(rows(x))
    xtx = inv(xk'*xk)
    b   = xtx*(xk'*yw)
    yw  = yw-xk*b
    t   = abs(b[1])./sqrt(xtx[1,1]*sum(yw^2)./(n-1))
    pv  = cdft(t, n-i-1).>(1-alpha/2)
    if (pv)
      xfs = xfs~x[,k]
      bfs = bfs|b[1]
      bfs[1] = bfs[1]+b[2]
      pvl = pvl|(k~(2-2.*cdft(t, n-i-1))~(sum(yw^2)/(n-i-1)))
    endif
  endo
endp


