proc(y)=prunecv(tree,alfaseq,xmat,ymat,type)
;The program prunecv.xpl calls 
;calls endpoint.xpl, gform.xpl, init.xpl, leafnum.xpl, 
;maketr.xpl, ssr.xpl, final.xpl, suitind.xpl,
;omaind.xpl
;pred.xpl, prederr.xpl. holmo.xpl.
; -----------------------------------------------------------------------
; Library      xclust
; -----------------------------------------------------------------------
; See_also     cartsplit, cartsplitopt, cartcv, leafnum, maketr,  pred,
;              prederr, prune, pruneseq, prunetot, ssr, kuva
; -----------------------------------------------------------------------
; Macro        prunecv
; -----------------------------------------------------------------------
; Description  Given a regression tree and observations,
;              gives the mean predicton error of the regression
;              trees for that data, for every tree in the sequence
;              of subtrees, pruned from the original tree.
; -----------------------------------------------------------------------
; Usage         resu = prunecv (tr, alfaseq, x, y, type)
; Input        
;   Parameter  tr
;   Definition  list of vectors: data structure which represents a binary tree
;               and is produced by cartsplit procedure, contains vectors 
;               tr.val, tr.vec, tr.mean, tr.ssr, tr.nelem.
;               See cartsplit for the description of tr.
;   Parameter  alfaseq
;   Definition  m x 1 vector: m different values for the complexity 
;               parameter.
;   Parameter  x
;   Definition  n x p vector: represents n points in the sample space
;               at which the prediction error of the regression trees will
;               be calculated.
;   Parameter  y
;   Definition  n x 1 vector: 
;               contains the values of the response variable. 
;  Parameter  type
;  Definition  p x 1 vector: 
;              contains the types of the original regression variables,
;              1 means that the corresponding variable is continuous and
;              0 that it is categorical. This vector should be similar to
;              the vector which was given originally as an input to the
;              cartsplit which produced the regression tree tr. 
; Output       
;   Parameter  resu
;   Definition vector whose length
;              equals the number of leaves in the original tree.
;              Elements of the vector are
;              means of the squared residuals, that is,
;              sums of squared differences between predictions and
;              observations, divided by the number of observations n.
;              The program prunecv prunes original tree and calculates
;              mean of the squared residuals for every subtree in the
;              sequence of the pruned subtrees.
; -----------------------------------------------------------------------
; Notes    Pruning is described by Breiman, Friedman, Olshen, and Stone,
;          Classification and Regression Trees, 1984, Wadsworth,
;          pages 63, 284. 
; -----------------------------------------------------------------------
; Example      ; loads the library xclust
;              library ("xclust")
; ;let us generate a tree by cartsplit procedure
; x1=#(0,0,0,0,1,1,1,1,1,2)
; x2=#(0,0,0,0,0,0,0,1,1,1)
; x=x1~x2
; y=#(0,0,0,0,100,100,100,120,120,120)
; tr=cartsplit(x,y,#(0,1))
; tr
; alseq=pruneseq(tree)
; alseq
; xmat1=#(0,1,0,1,0)
; randomize(1)
; xmat2=uniform(5)
; xmat=xmat1~xmat2
; xmat
; ymat=120*uniform(5,1)
; ymat
; resu=prunecv(tr,alseq.alfa,xmat,ymat,#(0,1))
; resu
; -----------------------------------------------------------------------
; Result  
; Content of object tr.val.split0
; [1,] 0 
; [2,] 1,2 
; Content of object tr.val.split1
; [1,] NaN 
; Content of object tr.val.split2
; [1,] 0 
; Content of object tr.val.split3
; [1,] NaN 
; Content of object tr.val.split4
; [1,] NaN 
; Content of object tr.vec
; [1,] 1.000000 
; [2,] NaN 
; [3,] 2.000000 
; [4,] NaN 
; [5,] NaN 
; Content of object tr.mean
; [1,] 66.000000 
; [2,] 0.000000 
; [3,] 110.000000 
; [4,] 100.000000 
; [5,] 120.000000 
; Content of object tr.var
; [1,] 29640.000000 
; [2,] 0.000000 
; [3,] 600.000000 
; [4,] 0.000000 
; [5,] 0.000000 
; Content of object tr.nelem
; [1,] 10.000000 
; [2,] 4.000000 
; [3,] 6.000000 
; [4,] 3.000000 
; [5,] 3.000000 
; 
; Contents of alseq.lnumber
; [1,] 3.000000 
; [2,] 2.000000 
; [3,] 1.000000 
; Contents of alseq.alfa
; [1,] 0.000000 
; [2,] 60.000000 
; [3,] 2904.000000
; 
; Content of object xmat
; [1,] 0.000000 0.000433 
; [2,] 1.000000 0.734705 
; [3,] 0.000000 0.024197 
; [4,] 1.000000 0.503982 
; [5,] 0.000000 0.036124
; Content of object ymat
; [1,] 14.045734 
; [2,] 88.864122 
; [3,] 21.345143 
; [4,] 16.407608 
; [5,] 91.224802
;  
; Content of object resu
; [1,] 4135.137768 
; [2,] 3636.224687 
; [3,] 1662.353206 
; -----------------------------------------------------------------------
; Author       Jussi Klemela", 980323         
; -----------------------------------------------------------------------
dime=dim(alfaseq)
tulos=matrix(dime)/0
tr=init(tree)
g=gform(tr)
tmin=omaind(g)
curalfa=g[tmin]
subtr=tr
curlnum=leafnum(tr,1)
i=1
while (i<=dime)    
    while ((curlnum > 1) && (curalfa <= alfaseq[i]))
      tr=subtr
      g=gform(tr)
      tmin=omaind(g)
      curalfa=g[tmin]
      subtr=maketr(tr,tmin)
      curlnum=leafnum(subtr,1)
    endo
    if ((curlnum==1) && (curalfa <= alfaseq[i]))
     tulos[i]=prederr(subtr,xmat,ymat,type)
    else
     tulos[i]=prederr(tr,xmat,ymat,type)
    endif
    i=i+1
endo
y=tulos
endp
