proc(desc)=descriptive(x,xvars)
; ----------------------------------------------------------------------------
; Library       stats
; ----------------------------------------------------------------------------
;  See_also     summarize mean var min max quantile kurtosis skewness
; ----------------------------------------------------------------------------
;   Macro       descriptive
; ----------------------------------------------------------------------------
;   Description  descriptive provides extensive descriptive statistics
;                for the columns of a data matrix. An additional vector of name
;                strings can be given to identify columns by names.
; ----------------------------------------------------------------------------
;   Usage        desc=descriptive(x{,xvars})
;   Input
;     Parameter   x  
;     Definition        n x p  matrix, the columns correspond to the 
;                       variables. (x may contain NaN, Inf or -Inf.)
;     Parameter   xvars
;     Definition        optional, p x 1  string vector, variable names. 
;   Output
;     Parameter   desc
;     Definition        string output, containing table of descriptive
;                       statistics.
; ----------------------------------------------------------------------------
;   Example   library("stats")
;             setenv("outputstringformat", "%s")  
;             x=read("bank2")
;             xvars="length"|"l_height"|"r_height"|"u_frame"|"l_frame"|"diagonal"
;             descriptive(x,xvars)
; ----------------------------------------------------------------------------
;   Result    descriptive statistics for all six variables, as e.g. for length:
;             [  1,]   
;             [  2,] ========================================================= 
;             [  3,]  Variable length 
;             [  4,] ========================================================= 
;             [  5,]   
;             [  6,]  Mean              214.896 
;             [  7,]  Std.Error        0.376554     Variance         0.141793 
;             [  8,]   
;             [  9,]  Minimum             213.8     Maximum             216.3 
;             [ 10,]  Range                 2.5 
;             [ 11,]   
;             [ 12,]  Lowest cases                  Highest cases  
;             [ 13,]          71:         213.8               6:        215.7 
;             [ 14,]          40:         213.9              24:        215.7 
;             [ 15,]         171:         213.9              57:        215.7 
;             [ 16,]          76:         214.1              35:        215.9 
;             [ 17,]         172:         214.2             167:        216.3 
;             [ 18,]   
;             [ 19,]  Median              214.9 
;             [ 20,]  25% Quartile        214.6     75% Quartile        215.1 
;             [ 21,]   
;             [ 22,]  Skewness         0.187724     Kurtosis         0.712654 
;             [ 23,]   
;             [ 24,]  Observations                    200 
;             [ 25,]  Distinct observations            21 
;             [ 26,]   
;             [ 27,]  Total number of {-Inf,Inf,NaN}    0 
;             [ 28,]   
;             [ 29,] ========================================================= 
;             [ 30,] 
; ----------------------------------------------------------------------------
;   Author    Marlene Mueller, 970903
; ----------------------------------------------------------------------------
  if (exist(x)==9)
    x=x{1}
  endif
  if (exist(xvars)>0)
    error(exist(xvars)!=2,"variable names should be strings!")
    if (cols(xvars)>1)
      xvars=reshape(xvars, cols(xvars)|rows(xvars))
    endif
  endif
;   
  error(rows(dim(x))>2,"first argument must be vector or matrix")
  error(exist(x)!=1,"first argument must be numeric")
  n=rows(x)
;
  if (exist(xvars)>0)
    error(rows(dim(xvars))>1,"variable names have wrong dimension")
    error(rows(xvars)!=cols(x),"variable names have wrong dimension")
  else
    xvars=string("%1.0f",1:n)
  endif
;
  x=(0*matrix(n,cols(x))).*(x==0)+x.*(x!=0)
  desc=" "
;
  j=0
  while (j<cols(x))
    j=j+1
    nmiss=0
    isnum=isNumber(x[,j])
    if (max(isnum)==1)
      xxj=paf(x[,j],isnum)
      nmiss=n-rows(xxj)
    else
      nmiss=n
      xxj=x[,j]
    endif
;
    desc=desc|"========================================================="
    desc=desc|" Variable "+xvars[j]
    desc=desc|"========================================================="
    desc=desc|" "
    if (nmiss<n)
      mj=mean(xxj)
      s2j=var(xxj)
      desc=desc|    " Mean         "+string("%12.6g",mean(xxj))
      cont=         " Std.Error    "+string("%12.6g",sqrt(s2j))
      cont=cont+"     Variance     "+string("%12.6g",s2j)
      desc=desc|cont|" "
;
      nj=rows(xxj)
      {xr,r}=discrete(xxj)
      r=rows(r)
      xmin=xr[1]
      xmax=xr[rows(xr)]
      cont=         " Minimum      "+string("%12.6g",xmin)
      cont=cont+"     Maximum      "+string("%12.6g",xmax)
      desc=desc|cont
      desc=desc|    " Range        "+string("%12.6g",xmax-xmin)
      desc=desc|" "
;
      qj=sort(xxj~(1:nj),1)
      cont=         " Lowest cases "
      cont=cont+"                 Highest cases "
      desc=desc|cont
      cont=      string("%11.0f: ",qj[1:min(nj|5),2])
      cont=cont+   string(" %12.6g",qj[1:min(nj|5),1])
      cont=cont+string("    %12.0f: ",qj[max(1|(nj-4)):n,2])
      cont=cont+   string("%12.6g",qj[max(1|(nj-4)):n,1])
      desc=desc|cont
      desc=desc|" "
;
      desc=desc|    " Median       "+string("%12.6g",qj[ceil(0.5*nj),1])
      cont=         " 25% Quartile "+string("%12.6g",qj[ceil(0.25*nj),1])
      cont=cont+"     75% Quartile "+string("%12.6g",qj[ceil(0.75*nj),1])
      desc=desc|cont|" " 
;
      skj=mean ((xxj-mj)^3) ./ s2j^1.5
      kuj=mean ((xxj-mj)^4) ./ (s2j^2) - 3
      cont=         " Skewness     "+string("%12.6g",skj)
      cont=cont+"     Kurtosis     "+string("%12.6g",kuj)
      desc=desc|cont|" "
    else
      r=0
    endif  
;
    if (nmiss>0)
      xnj=paf(x[,j],1-isnum)
      {xnr,rn}=discrete(xnj)
      rn=rows(rn)
    else
      rn=0
    endif
    cont=       " Observations              "+string("%9.0f",n)
    cont=  cont|" Distinct observations     "+string("%9.0f",r+rn)
    desc=desc|cont|" "
;
    cont=       " Total number of {-Inf,Inf,NaN} "+string("%4.0f",nmiss)
    desc=desc|cont
    if (nmiss>0)
      cont=         "   Inf   "+string("%4.0f",sum(xnj==Inf))
      cont=cont+"       NaN   "+string("%4.0f",sum(isNaN(xnj)))
      desc=desc|cont
      cont=         "  -Inf   "+string("%4.0f",sum(xnj==-Inf))
      desc=desc|cont
    endif
    desc=desc|" "
    desc=desc|"========================================================="
    desc=desc|" "
  endo
;
endp
 


