proc(coefs,intervals,res,sol,dsol,cval,pval) = rqfit(x,y,tau,ci,alpha,iid,interp,tcrit)
; ----------------------------------------------------------------------------
; Library       metrics
; ----------------------------------------------------------------------------
;   Macro       rqfit
; ----------------------------------------------------------------------------
;   Description   Performs quantile regression of y on x using the original simplex 
;                 approach of Barrodale-Roberts/Koenker-d'Orey. 
; ----------------------------------------------------------------------------
;   References
;          [1] Koenker, R.W. and  Bassett,  G.W.  (1978).  Regression
;          quantiles, Econometrica, 46, 33-50.
;   
;          [2] Koenker, R.W. and d'Orey (1987). Computing  Regression
;          Quantiles. Applied Statistics, 36, 383-393.
;   
;          [3] Gutenbrunner,  C.  Jureckova,  J.  (1991).  Regression
;          quantile  and  regression rank score process in the linear
;          model and derived statistics, Annals  of  Statistics,  20,
;          305-330.
;   
;          [4] Koenker, R.W. and d'Orey (1994).  Remark  on  Alg.  AS
;          229:  Computing  Dual  Regression Quantiles and Regression
;          Rank Scores, Applied Statistics, 43, 410-414.
;   
;          [5]  Koenker,  R.W.  (1994).  Confidence   Intervals   for
;          Regression  Quantiles,  in P. Mandl and M. Huskova (eds.),
;          Asymptotic Statistics, 349-359, Springer-Verlag, New York.
; ----------------------------------------------------------------------------
;   Keywords    Quantile regression
; ----------------------------------------------------------------------------
;   Usage       z = rqfit(x,y{,tau,ci,alpha,iid,interp,tcrit})
;
;   Input
;     Parameter   x  
;     Definition        n x p design matrix of explanatory variables.
;     Parameter   y
;     Definition        n x 1 vector, dependent variable.
;     Parameter   tau
;     Definition        desired quantile, default value = 0.5.
;                       If tau is inside <0,1>, a single quantile solution
;                       is computed and returned.
;                       If tau is outside of <0,1>, solutions for all quantiles 
;                       are sought and the program computes the whole quantile 
;                       regression solution as a process in tau. The resulting 
;                       arrays containing the primal and dual solutions and
;                       betahat(tau) are called sol and dsol.
;                       It should be emphasized that this form of the solution 
;                       can be both memory and cpu quite intensive. On typical 
;                       machines it is not recommended for problems with n > 10,000.
;     Parameter   ci
;     Definition        logical flag for confidence intervals (nonzero = TRUE),
;                       default value = 0. If ci = 0, only the estimated coefficients
;                       are returned. If ci != 0, confidence intervals for the 
;                       parameters are computed using the rank inversion method of 
;                       Koenker (1994). Note that for large problems the option 
;                       ci != 0 can be rather slow. Note also that rank inversion 
;                       only works for p > 1, an error message is printed in the case 
;                       that ci != 0 and p = 1.
;     Parameter   alpha
;     Definition        the nominal coverage probability for the confidence 
;                       intervals, i.e., aplha/2 gives the level of significance 
;                       for confidence intervals, default value = 0.1.
;     Parameter   iid
;     Definition        logical flag for iid errors (nonzero = TRUE),
;                       default value = 1. 
;                       If iid != 0, then the rank inversion (see
;                       parameter ci) is based on an assumption of iid error model
;                       and the original version of the rank inversion intervals is 
;                       used (as in Koenker, 1994).
;                       If iid = 0, then it is based on the heterogeneity error 
;                       assumption. See Koenker and Machado (1999) for further details.
;     Parameter   interp
;     Definition        logical flag for smoothed confidence intervals (nonzero = TRUE),
;                       default value = 1.
;                       As with typical order statistic type confidence intervals 
;                       the test statistic is discrete, so it is reasonable to consider 
;                       intervals that interpolate between values of the parameter 
;                       just below the specified cutoff and values just above the 
;                       specified cutoff. 
;                       If interp != 0, this function returns a single interval based 
;                       on linear interpolation of the two intervals. 
;                       If interp = 0, then the 2 "exact" values above 
;                       and below on which the interpolation would be based are returned.
;                       Moreover, in this case c.values and p.values which give 
;                       the critical values and p.values of the upper and lower intervals
;                       are returned.
;     Parameter   tcrit
;     Definition        logical flag for finite sample adjustment using t-statistics 
;                       (nonzero = TRUE), default value = 1.
;                       If tcrit != 0, Student t critical values are used, 
;                       while for tcrit = 0 normal ones are employed.
;
;   Output
;     Parameter   z.coefs
;     Definition        p x 1 or p x m matrix.
;                       If tau is in <0,1>, the only column (p x 1) contains estimated 
;                       coefficients.
;                       If tau is outside <0,1>, then p x m matrix contains
;                       estimated coefficients for all quantiles = sol[4:(p+3),], 
;                       see sol description.
;
;     Parameter   z.intervals
;     Definition        nothing, p x 2, or p x 4 matrix containing confidence intervals.
;                       If ci = 0, then no confidence intervals are computed.
;                       If ci != 0 and interp != 0, then variable intervals has 2 columns, 
;                       interpolated "lower bound" and "upper bound".
;                       If ci != 0 and interp = 0, then variable intervals contains
;                       "lower bound", "Lower Bound", "upper bound", "Upper Bound".
;                       See description of ci and interp parameters for further information.
; 
;     Parameter   z.res
;     Definition        n x 1 vector of residuals.
;                       Not supplied if tau is not inside <0,1>.
;
;     Parameter   z.sol
;     Definition        The primal solution array. This is a (p+3) by J matrix whose 
;                       first row contains the 'breakpoints' tau_1,tau_2,...tau_J, 
;                       of the quantile function, i.e. the values in [0,1] at which the 
;                       solution changes, row two contains the corresponding quantiles 
;                       evaluated at the mean design point, i.e. the inner product of 
;                       xbar and b(tau_i), the third row contains the value of the objective
;                       function evaluated at the corresponding tau_j, and the last p rows 
;                       of the matrix give b(tau_i). The solution b(tau_i) prevails from 
;                       tau_i to tau_i+1. Portnoy (1991) shows that J=O_p(n log n).
;
;     Parameter   z.dsol
;     Definition        The dual solution array. This is an by J matrix containing the 
;                       dual solution corresponding to sol, the ij-th entry is 1 if 
;                       y_i > x_i b(tau_j), is 0 if y_i < x_i b(tau_j), and is between 
;                       0 and 1 otherwise, i.e. if the residual is zero. See Gutenbrunner and 
;                       Jureckova(1991) for a detailed discussion of the statistical
;                       interpretation of dsol. The use of dsol in inference is described 
;                       in Gutenbrunner, Jureckova, Koenker, and Portnoy (1994).
;
;     Parameter   z.cval
;     Definition        c-values, see the description of interp parameter for further information.
;                       Not supplied if tau is not in <0,1> or ci == 0.
;     Parameter   z.pval
;     Definition        p-values, see the description of interp parameter for further information.
;                       Not supplied if tau is not in <0,1> or ci == 0.
; ----------------------------------------------------------------------------
;   Example   library("metrics")
;             ; 
;             ; simulate data
;             ;
;             randomize(101)
;             x = uniform(100,3)
;             y = x[,1] + 2*x[,2] - x[,3] + normal(100)
;             ;
;             ; fit the data ... median regression
;             ;
;             z = rqfit(x,y)
;             z.coefs
; ----------------------------------------------------------------------------
;   Result    Contents of coefs - estimates of b = (1,2,-1)' coefficient vector
;             [1,]   0.8774
;             [2,]   2.0738
;             [3,]  -1.3159 
; ----------------------------------------------------------------------------
;   Example   ; procedure for graphical representation of the results estimated by rqfit quantlet
;             ; parameter obs1 is an outlier added to randomly generated data points
;             proc() = myquant(obs1)
;                 ;
;                 ; initialization
;                 ;
;                 n = 10                   ; number of observations
;                 randomize(17654321)      ; sets random seed
;                 beta = #(1, 2)           ; defines intercept and slope
;                 x = matrix(n)~sort(uniform(n))  
;                 ;                        ; creates design matrix   
;                 ;
;                 ; new x-observation is added
;                 ;
;                 x = x|(1~obs1[1])
;                 m = x*beta               ; defines regression line
;                 eps = 0.05*normal(n)     ; creates obs error
;                 ;
;                 ; new y-observation is added
;                 ;
;                 y = m[1:n] + eps         ; noisy line
;                 y = y|obs1[2]
;                 ;
;                 ; create graphical display and draw data points
;                 ;
;                 d = createdisplay(1,1)     
;                 dat = x[,2]~y                              
;                 outl = obs1[1]~obs1[2]
;                 setmaskp(outl,1,12,15)    ; outlier is blue big star
;                 tdat = x[,2]~m
;                 setmaskl(tdat, (1:rows(tdat))', 1, 1, 1) 
;                 ;                        ; thin blue line
;                 setmaskp(tdat, 0, 0, 0)  ; reduces point size to min
;                 ;
;                 ; estimation of the model using rqfit 
;                 ;
;                 z = rqfit(x,y,0.5)
;                 beta1 = z.coefs
;                 ;
;                 ; draw estimated regression line
;                 ;
;                 yhat = x*beta1
;                 hdat = x[,2]~yhat
;                 setmaskp(hdat, 0, 0, 0)    
;                 setmaskl(hdat, (1:rows(hdat))', 4, 1, 3)
;                 ;                        ; thick red line
;                 show(d, 1, 1, dat[1:n], outl, tdat, hdat)
;                 title="Quantile regression with outlier"
;                 setgopt(d,1,1,"title",title) 
;                 ;                        ; sets title 
;             endp                         ; end of myquant
;             ;
;             ; load metrics library
;             ;
;             library("metrics")
;             ;
;             ; call estimation function with outlier #(0.9,4.5)
;             ;
;             myquant(#(0.9,4.5))
; ----------------------------------------------------------------------------
;   Result    As a result, you should see a graph, in which observations are
;             denoted by black circles and an outlier (observation #(0.9,4.5))
;             is represented by the blue big star in the right upper corner of
;             the graph. 
;             The blue line depicts the true regression line (beta = #(1,2)),
;             while the thick red line shows the estimated regression line.
; ----------------------------------------------------------------------------
;   See_also  rrstest
; ----------------------------------------------------------------------------
;   Author   Pavel Cizek, 990920
; ----------------------------------------------------------------------------
;
; load dll
  if (getenv("os") == "unix")
    h = dlopen("rqbr.so")
  else
    h = dlopen("rqbr.dll")
  endif
;
; check parameter existence and their values
  error(exist(x) <> 1, "Missing data matrix!")
  error(exist(y) <> 1, "Missing dependent variable!")
  error(rows(x) <> rows(y), "x and y have different dimensions!")
  if (exist(tau) <> 1)
    tau = 0.5
  endif
  if (exist(alpha) <> 1)
    alpha = 0.1
  endif
  if (exist(ci) <> 1)
    ci = 0
  endif
  if (exist(iid) <> 1)
    iid = 1
  endif
  if (exist(interp) <> 1)
    interp = 1
  endif
  if (exist(tcrit) <> 1)
    tcrit = 1
  endif
;
; useful values
  tol = 1e-10                                    ; tol <- .Machine$double.eps^(2/3)
  eps = tol
  big = 1e300                                    ; big <- .Machine$double.xmax
  p = cols(x)
  n = rows(x)
  error(n <= p, "Number of observations less than number of parameters!")
  nsol = 2
  ndsol = 2
; parameter values parsing & preparation 
  if ((tau < 0) || (tau > 1)) 
    nsol = 3 * n
    ndsol = 3 * n
    lci1i = 0
    qn = 0 * matrix(p)                         ;    qn <- rep(0, p)
    cutoff = 0
    tau = -1
  else
    if (ci != 0) 
      lci1i = 1
; critical values
      if (tcrit != 0)
        cutoff = qft(1 - alpha/2, n - p)
      else 
        cutoff = qfn(1 - alpha/2)
      endif
; correction for non-iid error structure
      if (iid == 0)
        h = rqband(tau,n)                      ;      h <- bandwidth.rq(tau, n, hs = T)
        bhi = rqfit(x,y,tau+h,0)
        bhi = bhi.coefs
        blo = rqfit(x,y,tau-h,0)
        blo = blo.coefs
        dyhat = x * (bhi - blo)
        ;warning(sum((dyhat <= 0)) > 0, "Some fis <= 0!")
        f = max((eps*matrix(rows(dyhat)))~((2*h)/(dyhat - eps)), 2)
        f = diag(1 / f)   ; while lm takes weights, gls takes omega matrix
        qn = 0 * matrix(p)                         ;    qn <- rep(0, p)
        j = 1
        while (j <= p)                             ;    for (j in 1:p)
          yj = x[,j]
          xj = trans(paf(trans(x), (#(1:p) != j)))
          qnj = gls(xj, yj, f)
          rj = yj - xj * qnj                       ; compute residuals
          qn[j] = trans(rj) * rj
          j = j + 1
        endo
      else 
        qn = (inv(trans(x) * x) .* unit(p)) * matrix(p)      ;  qn = 1/diag(solve(crossprod(x)))
      endif
    else ; if ci != 0
      lci1i = 0
      qn = 0 * matrix(p)                         ;    qn <- rep(0, p)
      cutoff = 0
    endif ; ci != 0
  endif ; tau < 0 || tau > 1
;
; call the external Fortran routine - create all variables
; for meaning of parameters, see rqbr.f
;      subroutine rqbri(dims,a,b,x,e,sol,dsol,qn,ci,tnmat,iw,w)
;      double precision dims(14), w(m + m5*n4)
;      double precision b(m),sol(n3,nsol),a(m,nn),x(nn)
;      double precision e(m),dsol(m,ndsol)
;      double precision qn(nn),ci(4,nn),tnmat(4,nn)
;      integer iw(m + nn*nsol)
; mapping of parameters
; dims = #(m,nn,m5,n3,n4,t,toler,ift,nsol,ndsol,lsol,cutoff,big,lci1i)
  dims = 0 * matrix(14)
  dims = #(n,p,n+5,p+3,p+4,tau,tol,1,nsol,ndsol,0,cutoff,big,lci1i)
; a ... x
; b ... y (vector)
; x ... beta (vector)
  beta = 0 * matrix(p)
; e ... resids  (vector)
  resids = 0 * matrix(n)
; sol, dsol
  sol = 0 * matrix(p+3,nsol)
  dsol = 0 * matrix(n,ndsol)
; qn ... qn  (vector)
; ci
  ci = 0 * matrix(4,p)
; tnmat 
  tnmat = 0 * matrix(4,p)
; iw ... assumes that size of double is bigger than integer
  iw = 0 * matrix(n + p*nsol) 
; w
  w = 0 * matrix(n + (n + 5)*(p + 4)) 
; call the external Fortran routine
  if (getenv("os") == "unix")
    ret = dlcall(h, "irqbri_", dims,x,y,beta,resids,sol,dsol,qn,ci,tnmat,iw,w)
  else
    ret = dlcall(h, "_irqbri", dims,x,y,beta,resids,sol,dsol,qn,ci,tnmat,iw,w)
  endif
; reconstruction of results from dims()
  retcode = dims[8] ; return code - 0 OK, otherwise possible problems
  lsol = dims[11] 
;
; analyze results of the procedure rqbr
  warning(retcode != 0, "Numerical problem during computation!")
  warning(retcode == 1, "   Solution may be nonunique.")
  warning(retcode == 2, "   Premature end - possible conditioning problem in x.")
;
; put the right data on their place 
  res = resids
  coefs = beta
; special cases by parameter values
  if ((tau < 0) || (tau > 1))
     sol = sol[1:(p+3),1:lsol]
     dsol = dsol[1:n,1:lsol]
     coefs = sol[4:(p+3),]
  endif
; lci1i is just a backup of ci parameter
  if (lci1i != 0)
    if (interp != 0)
;     tnmat and ci has dim = 4 x p
      Tn = tnmat                 ; not needed tnmat[1:4,]
      Tci = ci                   ; not needed ci[1:4,]
      Tci[3,] = Tci[3,] + (abs(Tci[4,] - Tci[3,]) .* (cutoff*matrix(1,p) - abs(Tn[3,]))) / abs(Tn[4,] - Tn[3,])
      Tci[2,] = Tci[2,] - (abs(Tci[1,] - Tci[2,]) .* (cutoff*matrix(1,p) - abs(Tn[2,]))) / abs(Tn[1,] - Tn[2,])
      if (countNaN(Tci[2,]) > 0)
        Tnan = sort((1:p) .* isNaN(Tci[2,]))
        Tnan = Tnan[1:(countNaN(Tci[2,]))] 
        Tci[2,Tnan] = -big
      endif
      if (countNaN(Tci[3,]) > 0)
        Tnan = sort((1:p) .* isNaN(Tci[3,]))
        Tnan = Tnan[1:(countNaN(Tci[3,]))] 
        Tci[3,Tnan] = big
      endif
      intervals = trans(Tci[2:3,])
    else
      Tci = ci                   ; not needed [1:4,]
      intervals = trans(Tci)
      cval = trans(tnmat[1:4,])
      cval = cval[,4:1]
      pval = cdft(cval, n - p)
; not present in updated version       pval = pval[,1:4]
    endif
  endif
; close dll
  dlclose(h)
endp
;
; auxiliary functions
;
proc(h) = rqband(p,n,hs,a)
; rq.bandwidth <-
; function(p, n, hs = T, alpha = 0.9)
; Bandwidth selection for sparsity estimation:
; By default uses the Hall-Sheather bandwidth
; otherwise uses Bofinger bandwidth, Note that
; there is a difference in rates!
;
; check parameters
  error(exist(p) <> 1, "Missing p in rq bandwidth")
  error(exist(n) <> 1, "Missing n in rq bandwidth")
  if (exist(hs) <> 1)
    hs = 1
  endif
  if (exist(a) <> 1)
    a = 0.9
  endif
;  
  PI = 3.14159
  x0 = qfn(p)
  f0 = (1/sqrt(2*PI)) * exp(-((x0^2/2)))
  if (hs != 0)
    h = n^(-1/3) * qfn(1 - a/2)^(2/3) * ((1.5 * f0^2)/(2 * x0^2 + 1))^(1/3)
  else 
    h = n^(-0.2) * ((4.5 * f0^4)/(2 * x0^2 + 1)^2)^0.2
  endif
endp
