proc(y)=cartcv(xmat,ymat,type,opt,wv)
;The program cartcv calls cartsplit,
;osita.xpl, ositalog.xpl, prunecv.xpl,
;unidis.xpl,
;leafnum.xpl, pred.xpl, init.xpl, final.xpl, gform.xpl,
;omaind.xpl, maketr.xpl.  
; -----------------------------------------------------------------------
; Library      xclust
; -----------------------------------------------------------------------
; See_also     cartsplit, cartsplitopt, leafnum, maketr,  pred,
;              prederr, prune, prunecv, pruneseq, prunetot, ssr, kuva
; -----------------------------------------------------------------------
; Macro        cartcv
; -----------------------------------------------------------------------
; Description   Performs cross validation for the CART:
;               subtracts from the data in a given number of ways a 
;               test set, with the rest of the data a regression 
;               tree is formed and a sequence of subtrees
;               is pruned from the initial tree. For each tree, the
;               test set is used to calculate the prediction error.
; -----------------------------------------------------------------------
; Usage         cross = cartcv (x, y, type, opt, wv)
; Input        
;   Parameter  x
;   Definition n x p matrix: data matrix of regression variables
;   Parameter  y
;   Definition n x 1 vector: contains the values of the response variable
;   Parameter  type
;   Definition  p x 1 vector: contains the types of the regression 
;               variables,
;               1 means that the corresponding variable is continuous and
;               0 that it is categorical
;   Parameter  opt
;   Definition  list of scalars: determines when the growing of the 
;               tree is stopped. Consists of opt.mincut, opt.minsize,
;               opt.mindev. See cartsplit for the description of these
;               parameters.
;   Parameter  wv
;   Definition integer >=2, wv fold cross-validation is performed, that
;              is, the data is divided in wv number of ways to an 
;              estimation set and a test set.
;              Division is formed randomly.
; Output       
;   Parameter  cross
;   Definition  list of vectors, consists of cross.alfa, cross.lnumber, 
;               cross.cv, cross.cvstd.
;               The elements of the list cross are vectors with the number
;               of elements equal to the number of trees in the
;               sequence of pruned subtrees of the tree grown 
;               with data x and y.
;               The vector cross.alfa contains the values of the complexity
;               parameter alfa.
;               The vector cross.lnumber contains the numbers of leaves
;               in the sequence of the pruned subtrees.
;               The vector cross.cv contains the estimates for the
;               expected value of the mean of squared residuals.
;               The vector cross.cvstd contains the estimates for the
;               standard deviation of the estimator for the
;               expected value of the mean of squared residuals.
; -----------------------------------------------------------------------
; Notes        Input parameters x, y, type, and opt are similar to the 
;              input parameters of cartsplit. 
; -----------------------------------------------------------------------
; Example      
; ;load the library xclust
; library ("xclust")
; ;let us make some deterministic data
; x1=#(0,0,0,0,1,1,1,1,1,2)
; x2=#(0,0,0,0,0,0,0,1,1,1)
; x=x1~x2
; y=#(0,0,0,0,100,100,100,120,120,120)
; opt=cartsplitopt("minsize",1,"mindev",0,"mincut",1)
; cross=cartcv(x,y,#(0,1),opt,3)
; cross             
; -----------------------------------------------------------------------
; Result 
; Contents of cross.alfa
; [1,] 0.000000 
; [2,] 60.000000 
; [3,] 2904.000000 
; Contents of cross.lnumber
; [1,] 3.000000 
; [2,] 2.000000 
; [3,] 1.000000 
; Contents of cross.cv
; [1,] 33.333333 
; [2,] 71.555556 
; [3,] 1582.086168 
; Contents of cross.cvstd
; [1,] 57.735027 
; [2,] 26.342474 
; [3,] 2595.969560  
; -----------------------------------------------------------------------
; Author    Jussi Klemela", 980323           
; -----------------------------------------------------------------------
n=dim(xmat[,1])
ositteet=osita(n,wv)
tr=cartsplit(xmat,ymat,type,opt)
prseq=pruneseq(tr)
alfa=prseq.alfa
lnumber=prseq.lnumber
tuldim=dim(alfa)
pralfa=alfa
pralfa[1]=0  
pralfa[tuldim]=alfa[tuldim]
i=2
while (i<tuldim)
  pralfa[i]=sqrt(alfa[i]*alfa[i+1])
  i=i+1
endo   
tulos=0*matrix(tuldim)
apu=matrix(wv,tuldim)
i=1
while (i<=wv)
  osite=ositalog(n,ositteet,i)
  kaanto=matrix(n)-osite
  xdata=paf(xmat,kaanto)
  ydata=paf(ymat,kaanto)
  tree=cartsplit(xdata,ydata,type,opt)
  lnum=leafnum(tree,1)
  xtest=paf(xmat,osite)
  ytest=paf(ymat,osite)
  apu[i,]=prunecv(tree,pralfa,xtest,ytest,type)
  i=i+1
endo
cv=trans(mean(apu))
cvstd=trans(sqrt(var(apu)))
y=list(alfa,lnumber,cv,cvstd)
endp
