; -----------------------------------------------------------------
; Library        cafpe
; -----------------------------------------------------------------
;  See_also     tp/cafpe/cafpe, tp/cafpe/cafpedefault, tp/cafpe/capfeload
; -----------------------------------------------------------------
;   Macro       cafpefull
; -----------------------------------------------------------------
;   Keywords     nonparametric time series analysis, lag selection,
;                Final Prediction Error, CAFPE, local linear regression
; -----------------------------------------------------------------
;   Description Quantlet to conduct lag selection for conditional mean or
;               conditional volatility function of nonlinear autoregressive
;               models. It also allows for prior data transformations.
;               It can be based on either the
;               local linear estimation of the Asymptotic Final Prediction
;               (AFPE) or a corrected version (CAFPE). However, only for CAFPE
;		    the used plug-in bandwidth is consistent.
; -----------------------------------------------------------------
;   Usage { crmin,crpro,crstore,crstoreadd,hstore,hstoretest } = cafpefull(y,truedat,xresid,trueres,xdataln,xdatadif,xdatastand,lagmax,volat,searchmethod,dmax,selcrit,robden,perA,perB,startval,noutputf,outpath)
;   Input
;	 Parameter          ATTENTION:
;      Definition this quantlet requires to open locling.dll, density.dll (NT)
;               or locling.so, denc.so (UNIX). This can be done with the quantlet cafpeload or 
;               directly with
;               garb = dlopen ("<path>\locling.dll") on NT,
;               garb = dlopen ("<path>\density.dll") on NT,
;               garb = dlopen ("<path>/locling.so") on UNIX,
;               garb = dlopen ("<path>/denc.so") on UNIX.

;       Parameter   y
;       Definition  (n x 1) vector of univariate time series
;       Parameter   truedat
;       Definition  character variable that contains path and name
;                    of ascii data file if y=0
;       Parameter   xresid
;       Definition  (nr x 1) vector of residuals for selecting lags
;                   of conditional volatility function; if not needed
;                   set xresid = 0
;       Parameter   trueres
;       Definition   character variable that contains path and name
;                   of residuals stored in ascii file if xresid=0;
;                   needed if volat = resid
;       Parameter   xdataln
;       Definition  character variable: "yes" takes natural logs, "no" doesn't
;       Parameter   xdatadif
;       Definition  character variable: "yes" takes first differences of data, "no" doesn't
;       Parameter   xdatastand
;       Definition  character variable: "yes" standardizes data, "no" doesn't
;       Parameter   lagmax
;       Definition  scalar, largest lag to be considered
;       Parameter   volat
;       Definition  character variable: "no" conducts lag selection for conditional mean function;
;                   "resid" conducts lag selection for conditional volatility function,
;                   the residuals of fitting a conditional mean
;                   function have to be contained in xresid
;       Parameter   searchmethod
;       Definition  character variable for determining search method
;                   "full": full search over all possible input variable combinations;
;                   "directed":  directed search a la Tjostheim & Auestad (1994)
;       Parameter   dmax
;       Definition  scalar, maximal number of lags
;       Parameter   selcrit
;       Definition  character variable to select lag selection critierion:
;                   "lqafpe":  estimating the asymptotic Final Prediction Error (AFPE) using
;                              local linear estimation and a  plug-in bandwidth based on partial
;                              local quadratic estimator
;                   "lqcafpe": estimating the corrected asymptotic Final Prediction Error (CAFPE) using
;                              local linear estimation and a  plug-in bandwidth based on partial
;                              local quadratic estimator
;       Parameter   robden
;       Definition  character variable for switching on/off robustification
;                               of density estimation a la Tjostheim & Auestad (1994),
;                               see also Section 5 in TY
;                               "yes": on; "no": off
;       Parameter   perA
;       Definition  scalar, parameter used for screening off 0 <= perA <= 1 percent of the observations
;                   with the lowest density for estimating A, see  eq. (3.1) and Section 5 in TY
;       Parameter   perB
;       Definition  scalar, parameter like perA but for screening of perB observations
;                   with lowest density for estimating B
;       Parameter   startval
;       Definition  character variable, to control treatment of starting values
;                   "different": for each lag vector as few starting
;                                values are used as necessary;
;                   "same": for each lag vector the same starting value is used which
;                           is determined by the largest lag used in the lag selection
;                           quantlet xorigxe
;       Parameter   noutputf
;       Definition  character variable, name of output file
;       Parameter   outpath
;       Definition  character variable, path for output file
;   Output
;       Parameter   crmin
;       Definition     vector that stores for all considered lag combinations
;                      in the first dmax rows the selected lag vector
;                      in the dmax+1 row the estimated criterion
;                      in the dmax+2 row the computed Ahat if (C)AFPE is
;                      used
;                      in the dmax+3 row the estimated variance of the white
;                      noise process
;       Parameter   crpro
;       Definition     matrix that stores for each number of lags
;                      in the first dmax rows the selected lag vector
;                      in the dmax+1 row the estimated asymptotically
;                      optimal bandwidth for estimating A and (C)AFPE
;                      in the dmax+2 row the used bandwidth for estimating B
;                      in the dmax+3 row the used bandwidth for estimating C
;                      in the dmax+4 row the estimated criterion
;                      in the dmax+5 row the computed Ahat if (C)AFPE is
;                      used
;                      in the dmax+6 row the estimated variance of the white
;                      noise process
;       Parameter   crstore
;       Definition  matrix that stores lag vector and criterion value for
;                      all lag combinations and bandwidth values considered
;                      in the first dmax rows all considered lag vector are
;                      stored
;                      in the dmax+1 to dmax+number of bandwidths in grid
;                      the estimated criterion for each lag vector and
;                      bandwidth combination is stored
;       Parameter    crstoreadd
;       Definition   matrix that stores those criteria that
;                      are evaluated in passing for all
;                      lag combinations where all values for one lag
;                      combination are stored in one column, see program
;                      for details
;       Parameter    hstore
;       Definition    row vector that stores the bandwidths used in
;                      computing (C)AFPE  for each lag vector
;       Parameter   hstoretest
;       Definition    matrix that stores for each lag vector in one column
;                      the estimated asymptotically optimal
;                      bandwidth, hS(m+2) and hC (see section 5 in TY)

; -----------------------------------------------------------------
;   Example    	pathcafpe 	= "tp/cafpe/" 		; path for CAFPE quantlets
;		library("xplore")
;		library("times")
;		func(pathcafpe + "cafpeload"); load required XploRe files of CAFPE
;	        cafpeload(pathcafpe)    	
;              	n              	= 50
;              	truedat      	= ""
;  	       	xresid		= 0
;	       	trueres 	= ""		; name of potential residuals file
;              	xdataln      	= "no";
;              	xdatadif     	= "no";
;              	xdatastand   	= "no";
;              	lagmax       	= 4
;              	searchmethod 	= "full"
;              	dmax         	= 4
;		volat 		= "no"		; conduct lag selection for cond. volatility
;  		selcrit		= "lqcafpe"	; use CAFPE with plug-in bandwidth
;		robden    	= "yes"		; robustify density estimation
;		perA  		= 0
;		perB  		= 0.05		; screen off data with lowest density
;		startval	= "different"
;		noutputf	= ""		; name of output file
;		outpath		= "test"	; path for output file
;
;              ; generate data
;              randomize(0)     ; sets seed value for random number generator
;              y=genexpar(1,0.1,0.3|0.6, 2.2|-0.8,normal(n))  ; generate exponential AR(2) process
;
;              ; plot generated data
;              title        = "Realisation of exponential AR(2) process"
;              xlabel       = "Time"
;              ylabel       = "Values"
;              plot(1:n~y)
;
;              ; conduct lag selection
;  		{crmin,crpro,crstore,crstoreadd,hstore,hstoretest}= cafpefull(y,truedat,xresid,trueres,xdataln,xdatadif,xdatastand,lagmax,volat,searchmethod,dmax,selcrit,robden,perA,perB,startval,noutputf,outpath)
;              "selected lag vector, estimated CAFPE "
;              crmin[,1:dmax+1]
;              "number of lags, chosen lag vector, estimated CAFPE, plug-in bandwidth"
;              (0:dmax)~crpro[,1:dmax|(dmax+4)|(dmax+1)]
; -----------------------------------------------------------------
;   Result    the selected lags according to the CAFPE critierion
;             and the estimated bandwidths
;             as described by Tschernig and Yang (2000)
; -----------------------------------------------------------------
;   Result   the selected lags according to the CAFPE critierion
;             and the estimated bandwidths
;             as described by Tschernig and Yang (2000)
; -----------------------------------------------------------------
;   Reference Tjostheim, D. and Auestad, B. (1994) Nonparametric
;                           identification of nonlinear time series: selecting significant lags.
;                           Journal of the American Statistical Association 89, 1410-1419.
;
;                           Tschernig, R. and Yang, L. (TY) (2000), Nonparametric
;             Lag Selection for Time Series,
;             Journal of Time Series Analysis,
;             forthcoming  (http://www.blackwellpublishers.co.uk)
; -----------------------------------------------------------------
;   Author    Tschernig  000420
; -----------------------------------------------------------------

proc (crmin,crpro,crstore,crstoreadd,hstore,hstoretest) = cafpefull(y,truedat,xresid,trueres,xdataln,xdatadif,xdatastand,lagmax,volat,searchmethod,dmax,selcrit,robden,perA,perB,startval,noutputf,outpath)

; ------------------------ load data set(s) ------------------------------

if (!(truedat == ""))
    xdata = read(truedat);
else
    xdata = y;
endif
n = rows(xdata);
"number of observations for lag selection"
n

if (volat == "resid")
    if (xresid == 0);
        xresid = read(trueres);
    endif
    xresid      = xresid^2;
endif;

; ----------set selection estimator ------------------------

if (substr(selcrit,1,2) == "l");
    estimator = "loclin"    ; local linear estimator
endif
estimator = "loclin"
kernel  = "gaussian"        ; use gaussian kernel throughout

; ------------------------- Initialize output file  --------------

outputfip = outpath + noutputf + ".pro";


; ---- determine number of lag combinations that will be considered ----

if (searchmethod == "full")         ; all possible lag combinations
                                    ; are considered

    lagnor= lagmax                  ; total number of variables to be considered

    error(lagnor < dmax, "check dmax; its larger than the total number of input variables");


    lagcomb    = zeros(dmax,1)      ; vector that contains number of all
                                    ; possible combinations of lags for
                                    ; a given number of lags
    dc     = 1;
    do

        lagcomb[dc]   = binom(lagnor,dc);
        dc             = dc + 1;

    until (dc > dmax);

    lagcombtot    = sum(lagcomb);


else

    lagcombtot = sum(#(dmax:lagmax));
 
endif;

; ------------ Print results -----------------------------------------

write( "=======================================================================",outputfip);
write("Monte Carlo Simulation:  Nonparametric Selection of Input variables",outputfip,"a");;
write("-----------------------------------------------------------------------",outputfip,"a");;
write(" ------------- Data and prior data transformations ----------",outputfip,"a");;"";
write("data file: time series:    " + truedat, outputfip,"a");
if (volat == "resid")
    write( "file of residuals:          " + trueres, outputfip,"a")
endif
write("natural logarithms are taken:      "+ xdataln, outputfip,"a");
write("first differences are taken:       "+ xdatadif, outputfip,"a");
write("transformed data are standardized: " + xdatastand, outputfip,"a");

write(" ------ Parameters for lag selection -----", outputfip,"a")
write(string("highest lag considered:          %10.3f",lagmax), outputfip,"a")
write("cond. volatitility data:           " + volat, outputfip,"a")
write("denominator in Nadaraya-Watson-kernel robustified: " + robden,outputfip,"a")
write("kernel used:                       " +  kernel,outputfip,"a")
write("local estimator used:              " + estimator,outputfip,"a")
write(string("number of maximal lags:          %10.3f", dmax), outputfip,"a")
write("search method for lags:            " + searchmethod,outputfip,"a")
write("startup values for search:         " + startval, outputfip,"a")
write("selection criterion:               " + selcrit, outputfip,"a")
write(string("fraction weighted for computing Ahat: %3.3f", perA), outputfip,"a")
write(string("fraction weighted in Bhat,Chat:       %3.3f", perB),outputfip,"a")

; -------------- prior data transformation --------------------------

if (xdataln == "yes");
    if (min(xdata,2) > 0);
        xdata   = log(xdata);
    else;
        error(min(xdata,2)<0,"taking logarithms is not possible as there are negative observations");
    endif;
endif;
if (xdatadif == "yes");
    xdata   = xdata[2:rows(xdata)] - xdata[1:rows(xdata)-1];
endif;
if (xdatastand == "yes");
    xraw    = xdata ./ sqrt(var(xdata));
    if (rows(xresid) > 1);
        xresid  = xresid ./ sqrt(var(xresid));
    endif;
else;
    xraw    = xdata;
endif;
sig2hat = (var(xraw));

; ---------- Begin of computation ---------------------------------

; ------------------ do model selection --------------------

{ crmin,crpro,crstore,crstoreadd,hstore,hstoretest } =  fpenps(xraw,xresid,lagmax,volat,startval,robden,estimator,kernel,selcrit,perA,perB,searchmethod,dmax);

write("",outputfip,"a")
write("---------------------- Results -------------------------",outputfip,"a")
write("selected lag vector                     estimated CAFPE ",outputfip,"a")
write(crmin[,1:dmax+1],outputfip,"a")
write("number of lags, chosen lag vector,        estimated CAFPE, plug-in bandwidth",outputfip,"a")
write((0:dmax)~crpro[,1:dmax|(dmax+4)|(dmax+1)],outputfip,"a")

endp
