proc (KM) = kaplanmeier(y,intervals,disp)
; ----------------------------------------------------------------------
; Library 	  stats
; ----------------------------------------------------------------------
;  See_also	  hazard
;----------------------------------------------------------------------
;   Macro	  kaplanmeier
; ----------------------------------------------------------------------
;   Description   Calculation of the Kaplan-Meir (product limit) estimator
;                 of the hazard rate and the survivor function for a set of 
;                 durations. The first column of the input is a censorship 
;                 indicator variable, (equal to zero if the duration is 
;                 censored, and to one otherwise); the second column is the
;                 duration.  
; ----------------------------------------------------------------------
;   Notes         If no grid of intervals is provided, the macro
;		  returns in the first row the exit time, in the second row 
;		  the estimated hazard rate at this time, and in the third row 
;		  the estimated survivor function. If a grid of intervals is 
;		  provided, this macro evaluates the hazard rate and the 
;                 survivor functions by step functions on this  grid, and 
;		  returns in the first row the beginning of each interval,
;                 in the second row the estimated hazard for that interval,
;                 and in the third row the estimated survivor function.
;                 If a constant argument is provided, the macro plots also
;		  the estimated hazard and survivor functions. This constant
;                 argument should appear as last argument.
;                 kaplanmeier generates the grphical display.
; ----------------------------------------------------------------------
;   Usage         h = kaplanmeier(y {,intervals {,disp}})
;   Input 
;     Parameter   y 
;     Definition  n x 2 matrix
;     Parameter   intervals
;     Definition  vector
;     Parameter   disp
;     Definition  constant
;   Output
;     Parameter   h
;     Definition  vector
; ----------------------------------------------------------------------
;   Example     ;Nonparametric estimator of the hazard rate for the leukaemia 
;               ; data
;               library("stats") 
;               x = read("leukaemia.dat")
;               h = kaplanmeier(x)
;               h
; ----------------------------------------------------------------------
;   Result
;               Contents of h
;               [1,]        6  0.14286  0.85714 
,               [2,]        7  0.058824  0.80672 
;               [3,]       10  0.066667  0.75294 
;               [4,]       13  0.083333   0.6902 
;               [5,]       16  0.090909  0.62745 
;               [6,]       22  0.14286  0.53782 
;               [7,]       23  0.16667  0.44818 
; -----------------------------------------------------------------------
;   Example     ;Nonparametric estimator of the hazard rate for the leukaemia 
;               ; data.
;               library("stats")
;               x = read("leukaemia.dat")
;               h = kaplanmeier(x,1) 
;               h
; ----------------------------------------------------------------------
;   Result
;               Same as above. The estimated hazard rate and survivor 
;               functions are displayed, as a constant appears as last argument.
;
; ----------------------------------------------------------------------
;   Example     ; Nonparametric estimator of the hazard rate for the leukaemia 
;               ; data, on a grid interval.
;               library("stats")
;               x = read("leukaemia.dat") 
;               k = #(4,8,12,16,20,30); The grid intervals
;               h = kaplanmeier(x,k)
;               h
;-----------------------------------------------------------------------
;   Result
;               Contents of h
;               [1,]        4  0.42857  0.57143 
;               [2,]        8   0.3125  0.39286 
;               [3,]       12  0.33333   0.2619 
;               [4,]       16  0.27273  0.19048 
;               [5,]       20     0.25  0.14286 
;
; ----------------------------------------------------------------------
;   Example     ; Nonparametric estimator of the hazard rate for the leukaemia 
;               ; data, on a grid interval.
;               library("stats")
;               x = read("leukaemia.dat") 
;               k = #(4,8,12,16,20)
;               h = kaplanmeier(x,k,2)
;               h
;-----------------------------------------------------------------------
;   Result
;               Same as above. The estimated hazard rate and survivor 
;               functions are displayed, as a constant appears as last argument.
;
;-----------------------------------------------------------------------
;   Reference
;    Cox, D.R. and D. Oakes (1984): Analysis of Survival data.  
;      Chapman & Hall
; ----------------------------------------------------------------------
;   Keyword
;      Hazard functions
; ----------------------------------------------------------------------
;   Author        Gilles Teyssiere, 980530
; ----------------------------------------------------------------------
  ;; selection of the options
  opt = 0
  if (exist(intervals) == 1)
    if (rows(intervals) > 1)
      if (exist(disp) == 1)
	opt = 3
      else 
	opt = 2
      endif
    else
       opt = 1
    endif
  endif
  ;;
  y = sort(y,2)  ; sort the data 
  rd = paf(y,y[,1] == 1) ; selects the uncensored durations
  n = rows(rd)
  fd = rd[n,2]
  if (opt <=1)	
    ;; hazard estimated at each exit time
    TKM = matrix(n,2)*0
    i = 1
    test = 1
    while (test == 1) 
      t = rd[1,2]
      di = rows(paf(rd,rd[,2] == t));selects the uncensored data exiting at time t
      ni = rows(paf(y,y[,2] >= t))  ; selects the data at risk at time t
      TKM[i,1] = t
      TKM[i,2] = di/ni              ; calculates the hazard rate
      i = i+1
      if (t == fd)
        test = 0
      else 
        rd = paf(rd, rd[,2] > t)
      endif
    endo
    TKM = paf(TKM, TKM[,1] != 0)
    KM = TKM~cumprod(1-TKM[,2])
    if (opt == 1) ;if graphical display 
	DKM = KM
    endif
  else
    ;; hazard estimated for each interval of the grid
    id = rd[1,2] 
    n1 = rows(intervals)-1
    TKM = matrix(n1,2)
    i = 1
    while (i <= n1) 
      t1 = intervals[i]
      t2 = intervals[i+1]
      if (t1 > fd) 
        di = 0
      else 
        dtemp = (paf(rd,rd[,2] >= t1))
        if (dtemp[1] >= t2)
          di = 0
        else 
          di = rows(paf(dtemp,dtemp[,1]<t2))
        endif
      endif
      ni = rows(paf(y,y[,2] >= t1))
      TKM[i,1] = t1
      TKM[i,2] = di/ni
      i = i+1
    endo
    KM = TKM~cumprod(1-TKM[,2]) 
    if (opt == 3) ; if graphical display
      DKM = matrix(2*n1,3)
      i = 1
      j = 1
      while (i <= 2*n1)
        DKM[i,] = KM[j,]
        DKM[i+1,] = KM[j,]
        DKM[i+1,1] = KM[j+1,1]-0.00001
        i = i+2
        j = j+1
      endo
      DKM[2*n1,] = DKM[2*n1-1,]
      DKM[2*n1,1] = max(fd|intervals[n1+1])
    endif
  endif
  ;; Graphical display
  if ((opt == 1) || (opt == 3))
    KaplanMeier = createdisplay(2,1)
    w1 = setmask(DKM[,1]~DKM[,2],"line","solid","red")
    w2 = setmask(DKM[,1]~DKM[,3],"line","solid","blue")
    show(KaplanMeier,1,1,w1)
    show(KaplanMeier,2,1,w2)
    setgopt(KaplanMeier,1,1,"xlabel","time","ylabel","Hazard rate")
    setgopt(KaplanMeier,2,1,"xlabel","time","ylabel","Survivor function")
  endif
endp  







