proc (edr, eigen) = sir2 (x, y, h)            
; -----------------------------------------------------------------
; Library        metrics
; -----------------------------------------------------------------
;  See_also      sir sssm
; -----------------------------------------------------------------
;   Macro        sir2
; -----------------------------------------------------------------
;   Keywords     sliced inverse regression, dimension reduction 
; -----------------------------------------------------------------
;   Description  Calculates the effective dimension-reduction (edr)  
;                directions by Sliced Inverse Regression II (Li, 1991)
; -----------------------------------------------------------------
;   Reference    Li (1991), "Sliced inverse regression, 
;                Journal of the American Statistical Association, 86, 316-342
; -----------------------------------------------------------------
;   Usage        {edr, eigen} = sir (x, y, h)
;   Input
;     Parameter  x
;     Definition n x p matrix, the explonatory variable
;     Parameter  y
;     Definition n vector, the dependent variable
;     Parameter  h
;     Definition scalar, number of slices (h>=2)
;                        width of slice (0 < h < 1)
;                        elements in a slice (-h)
;   Output
;     Parameter  edr
;     Definition p x p matrix containing estimates for the edr
;     Parameter  eigen
;     Definition p vector of the eigenvalues
; -----------------------------------------------------------------
;   Example    library("metrics")
;              randomize (10178)
;              x = normal (300, 3)
;              b = #(1, 0 ,1)
;              y = sin (x*b) + normal(300)
;              b = sir2 (x, y, -20)
; 	       b.edr
;	       b.eigen
; -----------------------------------------------------------------
;   Result    estimates of the edr, b.edr, and the eigenvalues, b.eigen
; -----------------------------------------------------------------
;   Author    TTK 961011
; -----------------------------------------------------------------
  n    = rows(x)              ; number of observations                 
  ndim = cols(x)              ; number of dimension                    
  error (-h > n, "Number of slice elements can't exceed number of data")
; calculate the covariance matrix and its inverse root to standardize X
; step 1 in original article
  xb   = mean(x)                         ; mean of x                       
  s    = (x'*x - n.*xb'*xb)./(n-1)       ; cov of x                        
  {eval, evec} = eigsm(s)                ; eigendecomposititon of cov   
  si2  = evec*diag(sqrt(1./eval))*evec'  ; compute cov(x)^(-1/2)        
  xt   = (x-xb)*si2                      ; stand. x to mean=0, cov=I    
; construct slices in Y space
;  step 2 in original article
  data  = sort(y~xt)                     ; sort data with respect to y   
; build slices 
;  ns     = number of slices 
;  condit = n values controlling which x-data fall
;           in a slice depending on choic
;  choice = vector of ns choices to build the slice subset of x 
; case 1: h<=-2  -> each slice with <= abs(h) elements                   
; if n div h != 0 then the first and last slice get the remainder        
switch
  case (h <= -2) 
  h      = abs(h)
  ns     = floor(n / h)
  condit = aseq(1, n, 1)             ; enumber the x values             
  choice = aseq(1, ns, 1).*h         ; take h values in each slice      
  if (h*ns != n)                     ; if there are remaining values    
    hk = floor((n-h*ns)/2)
    if (hk>=0)
      choice = hk|choice+hk          ; generate a new first ...         
    endif
    choice = choice|n                ; ... and last slice               
  endif
  ns = rows(choice)                  ; number of slices                 
  break
; case 2: h>=2  -> take h slices with equal width                        
  case (h >= 2) 
  ns        = h
  slwidth   = (data[n,1] - data[1,1]) / ns    ; width of slices          
              ; compute higher value of all ns slices                    
  slend     = aseq (data[1,1]+slwidth, ns, slwidth)
  slend[ns] = data[n,1]             ; avoids numerical problems        
  condit    = data[,1]              ; choose on original y values      
  choice    = slend                 ; choice to end of slice           
  break
; case 3: 0<h<1  -> take slices with range in percent. of the y-range    
  case ((0.<h) &&(h<=1)) 
  ns = floor(1/h)                   ; number of slices        
;   compute width of slices and vector of upper limit of slice intervals  
  slwidth = (data[n,1] - data[1,1]) * h
  slend   = aseq(data[1,1]+slwidth, ns, slwidth)
  if (slend[ns] > data[n-1,1])      ; does only the last value            
    slend[ns] = data[n,1]           ; lay behind the last slice ?         
  else
    slend = slend | data[n,1]       ; if not, build another slice         
  endif
  condit = data[,1]                 ; choose on original y values  
  choice = slend                    ; choice to end of slice              
  ns     = rows(choice)             ; compute the number of slices        
  break
  default
  error (1, "Error: h must not lay in (-2,0] !!")            
endsw
  error(ns == 1, "Warning: Only one slice is generated !")
;  run over all slices, compute estimate for V = Cov(E(x|y))            
;  step 3 
  hk    = 0                             
  sig2a = matrix (ndim, ndim)*0   ; initialise matrices to compute
  sig2b = matrix (ndim, ndim)*0   ; E[Cov(x|y)]
  ind   = matrix (n)              ; vector of length n containing 1
  d     = ndim            
  ndim  = ndim+1                   ; sorted x values are in data[,2:ndim]       
  j = 1
  while (j <= ns) 
    sborder = (condit <= choice[j,1]) && ind  ; sborder is index of
    p = sum (sborder)                         ; jth slice
    if (p != 0)                               ; are there some x in sclice
      ind    = ind - sborder                  ; don't take them anymore
      xslice = paf (data[,2:ndim], sborder)   ; vector of x in slice
      xmean  = mean (xslice)
      ph     = rows (xslice) / n
      covslice = (xslice-xmean)'*(xslice-xmean) / ph
      sig2a    = sig2a + ph*covslice*covslice
      sig2b    = sig2b + ph*covslice
      hk = hk+1                              ; count non empty slices
    endif
    j = j+1
  endo                   
  error (sum(ind) != 0, "Error: sum(ind) elements unused !!")
  v          = sig2a - sig2b*sig2b     ; this is E[Cov(x|y)]
  v          = (v+v')/2/n              ; for numerical errors 
  {eigen, b} = eigsm (v)               ; step 5, eigenvectors of V  
  b          = si2 * b                 ; calculate e.d.r. direction 
  data       = sqrt (sum(b^2))         ; ... and standardize them   
  edr        = b./data
  h     = sort (eigen~edr', -1)        ; sort eigenvalues
  eigen = h[,1]                        ; and e.d.r. directions
  edr   = h[,2:cols(h)]'
endp
