proc()=twlinreg(x)
; ---------------------------------------------------------------------
; Library     tware
; ---------------------------------------------------------------------
; See_also    twrandomsample twpvalue twnormalize tw1d twpearson twclt 
; ---------------------------------------------------------------------
; Macro       twlinreg
; ---------------------------------------------------------------------
; Description teachware quantlet twlinreg gives visual insight into how 
;							least squares simple linear regression works, and the 
;							relationship between the regression of Y on X, X on Y, 
;							and total regression. 
; ---------------------------------------------------------------------
; Usage       twlinreg({x})
; Input       
; Parameter   x
; Definition  matrix (n x 2) with user defined data 
; ---------------------------------------------------------------------
; Notes       The data are bivariate Gaussian, 
;							and a menu allows control of the number of data points, 
;							and the correlation.Intuitive understanding of least squares 
;							fitting is conveyed through interactive manipulation of a 
;							candidate fit line. A menu gives control over this process, 
;							through incremental adjustments that are selected by check boxes, 
;							followed by a push of the "OK" button. The main graphics 
;							window shows the data scatterplot, together with the least 
;							squares fit line. A text component shows the equation of the 
;							current line (which changes as the line is manipulated), together 
;							with the Residual Sum of Squares which gives a numerical summary 
;							of the goodness of fit. Very effective visual indication 
;							of what RSS means comes from the lower graphics part of this 
;							window, which represents the residuals as vertical lines.  
;							When the fit is poor (and hence the RSS is large), the residual 
;							plot shows why, and give a clear indication of how the 
;							line should be moved to improve the quality of the fit to the data.
;
;             The parameter x is optional. If it is not given, the user
;							is asked to define the desired correlation interactively.
; ---------------------------------------------------------------------
; Example    ; load teachware library
;            library("tware")
;            ; predefine matrix 
;            x = normal(100,2) 
;            ; call twlinreg teachware quantlet
;            twlinreg(x)
; ---------------------------------------------------------------------
; Result    a three window port display with the scatterplot of the matrix x 
;						in the upper port and the values of the RSS and the regression
;						equation in the middle (text) port. The lower port is reserved for
;						display of the current residuals. 
; ---------------------------------------------------------------------
; Keywords    teachware
; ---------------------------------------------------------------------
; Reference   Hrdle, W., Klinke, S. and Marron, J.S. (1999) Connected
;							teaching of statistics
; ---------------------------------------------------------------------
; Link        
; ---------------------------------------------------------------------
; Author    SK, WH, 990103
; ---------------------------------------------------------------------
  if (exist("x")<>1)					// check for optional input		
    p = 30|0								
    p = readvalue("Datapoints"|"Correlation", p)	// ask for correlation
    sig = #(1, p[2])~#(p[2], 1)
    {e,v} = eigsm(sig)
    x = normal(p[1],2)*(v*diag(sqrt(e))*v')
  endif
  d = createdisplay(3,1)				// create three port display
  selhead = "Choose"
  selitem = "Increase slope"|"Decrease slope"|"Increase intercept"|"Decrease intercept"|"Regression of Y on X "|"Regression of X on Y "|"Total regression "
  xt = matrix(rows(x))~x[,1]				// find LS line
  yt = matrix(rows(x))~x[,2]				
  bx = inv(xt'*xt)*(xt'*x[,2])				// regression of Y on X
  by = inv(yt'*yt)*(yt'*x[,1])				// regression of X on Y
  mx = min(x)|max(x)
  xl = mx[,1]~(((1|1)~mx[,1])*bx)
  setmaskp(xl, 0,0,0)
  setmaskl(xl, 1~2, 1, 1, 1)
  yl = (((1|1)~mx[,2])*by)~mx[,2]
  setmaskp(yl, 0,0,0)
  setmaskl(yl, 1~2, 2, 1, 1)
  e  = eigsm(cov(x))
  tl = mean(x)
  if (e.values[1]>e.values[2])
    tl = tl+(mx[,1]/e.vectors[1,1]).*e.vectors[,1]'
  else
    tl = tl+(mx[,2]/e.vectors[2,2]).*e.vectors[,2]'
  endif
  setmaskp(tl, 0,0,0)
  setmaskl(tl, 1~2, 4, 1, 1)
  i1 = 1:4
  i2 = 5:7
  status = 0.*matrix(rows(i1|i2))
  inter  = bx[1]+aseq(-2,20,0.2)
  slope  = bx[2]+aseq(-2,20,0.2)
  ki     = ceil(rows(inter)*uniform(1))
  ks     = ceil(rows(slope)*uniform(1))
  yh     = inter[ki]+slope[ks].*x[,1]				// calculate max and min of res
  res    = x[,2]-yh						// over loops in order to
  mr     = mx[,1]~(min(res)|max(res))				// preserve scale for residuals
  setmaskp(mr,0,0,0)
	do
    yh  = inter[ki]+slope[ks].*mx[,1]
    rl  = mx[,1]~yh
    setmaskp(rl, 0,0,0)
    setmaskl(rl, 1~2, 0, 1, 1)
    pl  = list(x,rl)   
    if (status[i2[1]])
      append(pl, xl)
    endif
    if (status[i2[2]])
      append(pl, yl)
    endif
    if (status[i2[3]])
      append(pl, tl)
    endif
    switch 
    case (size(pl)==2)
      show (d,1,1,pl{1},pl{2})
      break
    case (size(pl)==3)
      show (d,1,1,pl{1},pl{2},pl{3})
      break
    case (size(pl)==4)
      show (d,1,1,pl{1},pl{2},pl{3},pl{4})
      break
    case (size(pl)==5)
      show (d,1,1,pl{1},pl{2},pl{3},pl{4},pl{5})
      break
    endsw
    yh  = inter[ki]+slope[ks].*x[,1]
    res = x[,2]-yh
    txt = string ("yhat = %6.2f + x * %6.2f", inter[ki], slope[ks])
    txt = txt|" "
    txt = txt|string("RSS  = %8.2f", sum(res^2))
    show(d,2,1,txt)
    res = (x[,1]~res)|(x[,1]~(0*matrix(rows(x))))
    setmaskp(res,0,0,0)
    setmaskl(res,(1:rows(x))~((rows(x)+1):(2*rows(x))), 0, 1, 0) 
    show(d,3,1,res)
    seli = selitem
    if (sum(status[i2]==0))
      ind  = paf(i2, status[i2]==0)
      seli[ind] = seli[ind]+"(add)"
    endif
    if (sum(status[i2]==1))
      ind  = paf(i2, status[i2]==1)
      seli[ind] = seli[ind]+"(delete)"
    endif
    sel = selectitem(selhead, seli)
    if (sel[1])
      if (ks<rows(slope)) 
        ks = ks+1
      endif
    endif
    if (sel[2])
      if (ks>1) 
        ks = ks-1
      endif
    endif
    if (sel[3])
      if (ki<rows(inter)) 
        ki = ki+1
      endif
    endif
    if (sel[4])
      if (ki>1) 
        ki = ki-1
      endif
    endif
    if (sel[5])
      status[5] = 1-status[5]
    endif
    if (sel[6])
      status[6] = 1-status[6]
    endif
    if (sel[7])
      status[7] = 1-status[7]
    endif
  until (sum(sel)==0)
endp