proc(xc,xcvars)=categorize(x,reftype,refvalue,xvars)
; ----------------------------------------------------------------------------
; Library       stats
; ----------------------------------------------------------------------------
;  See_also     frequency descriptive discrete
; ----------------------------------------------------------------------------
;   Macro       categorize
; ----------------------------------------------------------------------------
;   Description  creates dummy variables from a data with respect 
;                to distinct realizations. The default reference 
;                category is the minimal value in each column. 
;                Alternatively, categorization can be done by 
;                giving a value or the index (rank among the 
;                realizations) in a column.
; ----------------------------------------------------------------------------
;   Usage        {xc,xcvars}=categorize(x {,reftype,refvalue} {,xvars})
;   Input
;     Parameter   x  
;     Definition        n x p  matrix, the columns correspond to the 
;                       variables.
;     Parameter   reftype
;     Definition        optional, either "value" when reference
;                       values shall be given or "index" when indices of
;                       reference values shall be given.
;     Parameter   refvalue
;     Definition        optional, p x 1 vector, reference values or indices
;                       of reference values. If it is impossible to use 
;                       one of these values, the corresponding
;                       variable is not categorized.
;     Parameter   xvars
;     Definition        optional, p x 1 string vector, variable names. 
;   Output
;     Parameter   xc
;     Definition        n x pc matrix, the categorized data.
;     Parameter   xcvars
;     Definition        pc x 1  string vector, variable names of
;                       categorized data, where the numbers of categories
;                       are preceeded by "#".
;                       If xvars was not given, the variable names for x
;                       are set as "Variable 1", "Variable 2" etc. 
; ----------------------------------------------------------------------------
;   Example   library("stats") 
;             randomize(0)
;             x=ceil(normal(50,3)-0.5)
;             {xc,xcvars}=categorize(x,"value",1|0|NaN)
; ----------------------------------------------------------------------------
;   Result    The first variable (column) is categorized with value "1"
;             as reference, the second with value "0", and the third is
;             kept as it is, since there are no NaN's in x.
; ----------------------------------------------------------------------------
;   Author    Marlene Mueller, 970502
; ----------------------------------------------------------------------------
  error(rows(dim(x))>2,"first argument must be vector or matrix")
  error(exist(x)!=1,"first argument must be numeric")
  if (exist(reftype)>0)
    error(exist(reftype)!=2,"second argument must be string")
  endif
;
  p=cols(x)
;
  havenoref=1
  havexvars=0
;
  if (exist(reftype)>0)
    if (sum(reftype==("value"|"index"))==0)
      xvars=reftype
      havenoref=1
      havexvars=1
      error(exist(refvalue)>0,"second or third argument is wrong!")
    else
      error(exist(refvalue)!=1,"third argument must be numeric")
      havenoref=0
      if (exist(xvars)==2)
        havexvars=1
      endif
    endif
  endif
;
  if (havexvars)
    if (cols(xvars)>1)
      xvars=reshape(xvars, cols(xvars)|rows(xvars))
    endif
    bad=(rows(xvars)!=p)||(dim(dim(xvars))!=1)
    error(bad,"variables names should be in a vector of length cols(x)")
  endif
;
  if (1-havexvars)
    xvars=string("Variable %2.0f",1:cols(x))
  endif
;
  if (1-havenoref)
    haveindex=(reftype=="index")
    havevalue=(reftype=="value")
  endif
;
  if (havenoref)
    refvalue=matrix(p)
    haveindex=1
  endif
;
  j=0
  while (j<cols(x))
    j=j+1
    xj =x[,j]
    xjs=xvars[j]
    switch
      case (haveindex)
        {xr,r}=discrete(xj)
        ix=((1:rows(xr))!=refvalue[j])
        if (sum(ix)<rows(xr))
          tmp=paf(1:rows(xr),ix)
          xjnew=(xj==xr[tmp]')
          xjsnew=string(xjs+"#%1.0f",tmp)
        else
          xjnew=xj
          xjsnew=xjs
        endif    
        break
      case (havevalue)
        {xr,r}=discrete(xj)
        refNaN=isNaN(refvalue[j])
        if (refNaN)
          ix=(xr!=refvalue[j])&&(isNaN(xr)!=refNaN)
        else
          ix=(xr!=refvalue[j])
        endif
        if (sum(ix)<rows(xr))
          tmp=paf(xr,ix)
          xjnew=(xj==tmp')||(isNaN(xj)*isNaN(tmp)')
          tmp=paf(1:rows(xr),ix)
          xjsnew=string(xjs+"#%1.0f",tmp)
        else
          xjnew=xj
          xjsnew=xjs
        endif    
        break
      default
        ; nothing
    endsw  
;
    if (j==1)
      xc=xjnew
      xcvars=xjsnew
    else
      xc=xc~xjnew
      xcvars=xcvars|xjsnew
    endif
  endo
endp

