//returns
//r(N)				scalar, number of observations in sample
//r(sample)			macro, conditional for being in sample (if , in)
//					for example, replace x = 2 `rsample'
//r(info)			matrix of information criteria 
//				(row # = predictor #)
//r(best1) ... r(bestN)		macros giving the best 1,...,N predictor models
//leaps_and_bounds.ado
program leaps_and_bounds, rclass
version 10.0
syntax varlist [if] [in]

tempname prevest
preserve
capture estimates preserve `prevest'
noi display "`0'"
qui marksample touse
qui markout `touse'
qui keep if `touse'
qui keep `varlist'

return scalar N = _N

tempfile zefile
qui save `zefile', replace

local n : word count `varlist'
//not counting the response
local n = `n' - 1
tokenize `varlist'
local response "`1'"

//order predictors by influence on regression sum of squares
//1 is most influential
//2 second
//etc.

qui reg `varlist'
local x: word count "`varlist'"

if (e(df_m) < `x') {
	di as error "design matrix not full rank"
	exit 198
}

forvalues i = 1/`n' {
	local j = `i' + 1
	qui test ``j'' = 0
	local var_`i'  "``j''" 
	local zef_`i' = -r(F) 
}
clear
qui set obs `n'
qui gen var = ""
qui gen zef = .
forvalues i = 1/`n' {
	qui replace var = "`var_`i''" if _n == `i'
	qui replace zef = `zef_`i'' if _n == `i'
}
sort zef
local ordlist = ""
forvalues i = 1/`n' {
	local a = var[`i']
	local ordlist "`ordlist' `a'" 
}
use `zefile', clear
order `response' `ordlist'

local varlist "`response' `ordlist'"
local predlist "`ordlist'"

//variables are now properly ordered

noi di "`varlist'"
tokenize "`ordlist'"

noi mata: leaps_bounds("`response'","`ordlist'")
capture estimates restore `prevest'
restore
end



mata:
struct node {
		//first subset of predictors
        rowvector p1
		//second subset of predictors
        rowvector p2
		//points to child nodes - 
		//ith child has i-1 children
                //rules for getting predictors in children from previous work
                //first child of parent
                //      subset 1 = parent's subset 2 - last predictor
                //      subset 2 = parent's subset 2 - second to last predictor
		//nth child of parent
		//      subset 1 = (n-1)th child's subset 1 - last predictor
		//      subset 2 = parent's subset 2 - (n+1) to last predictor
        pointer (struct node scalar) rowvector children
		//point to parent node
        pointer (struct node scalar) scalar parent
                //rss of subset 1 regression
        real scalar p1rss
               //inverse of resonse correlation matrix for subset 1 regression
        real matrix p1i
                //rss of subset 2 regression
        real scalar p2rss
                //inverse of response correlation matrix for subset 2 regression
        real matrix p2i
}

//returns a permutation matrix that will shift ith row/column to end
real matrix pm(real scalar i, real scalar n) {
	if(i != n) {
		Y = ((I(i-1),J(i-1,n-(i-1),0)) \ (J(1,n-1,0),1))
		Y = (Y \ (I(n-1)[i::(n-1),1::(n-1)],J(n-i,1,0)))
		return(Y)
	}
	else {
		return(I(n))
	}
}

void leaps_bounds(string scalar response, string scalar ordlist) {
	X = st_data(.,.)
	//ordlist
	//initialize respone and predictor data
	Y = st_data(.,response)
	//put intercept constant on the left
	X = (J(rows(X),1,1),X[.,(2::cols(X))])		

	//initialize root
	struct node scalar root
	root.p2 = (1..(cols(X)-1))
	D = (Y,X)
	//cm = (Y,X)'*(Y,X)
	cm = cross(D,D)


	//best model matrix
	Best = J(cols(X)-1,cols(X)-1,.)
	//minRSS colvector
	minRSS = J(cols(X)-1,1,.)
	minRSS_lag = J(cols(X)-1,1,.)
	my = mean(Y)
	constRSS = crossdev(Y,my,Y,my)
	run = 0

	//start through the tree
	traverse(&root,&Best,&minRSS,&cm,.,constRSS,0,0,&run,&minRSS_lag)
	stata("di as text" + char(34) +  "Actual Regressions   " + ///
	char(34) + " as result  " +  strofreal(run))
	stata("di as text" + char(34) +  "Possible Regressions " + ///
	char(34) + " as result  " +  strofreal(2^rows(minRSS)))

	//models are ready now.
	stata("tokenize " + ordlist)

	// build model specification macros
	for(i=1;i<=cols(Best);i++) {
		for(j=1;j<=cols(Best);j++) {
			if (Best[i,j] != 0) {
	st_local("best" + strofreal(i), st_local("best" + strofreal(i)) ///
+ " " + st_local(strofreal(Best[i,j])))
			}
		}
	}
	
	//record information criteria 
	RSS = st_addvar("double",st_tempname())
	st_store((1::cols(Best)),RSS,minRSS)
	R2ADJ = st_addvar("double",st_tempname())
	temp = (1::rows(minRSS))
	temp = (-temp) :+ rows(Y) :- 1
	temp = (temp :^ -1) :* (rows(Y) - 1)
	temp = (minRSS/constRSS) :* temp
	temp = -temp :+ 1
	st_store((1::cols(Best)),R2ADJ,temp)
	AIC = st_addvar("double",st_tempname())
	temp = (1::rows(minRSS))
	temp = (-temp) :+ rows(Y) :- 1
	temp = rows(Y)*ln(minRSS :/ rows(Y)) + (((-temp) :+ rows(Y)) :*2)
	st_store((1::cols(Best)),AIC,temp)
	AICC = st_addvar("double",st_tempname())
	temp2 = (1::rows(minRSS))
	temp2 = ((temp2 :+ 2) :* (temp2 :+ 3) :* 2) :/ ///
	(((temp2 :+ 2) :* -1) :+ rows(Y) :- 1)
	temp = temp  :+  temp2
	st_store((1::cols(Best)),AICC,temp)
	BIC = st_addvar("double",st_tempname())
	temp = (1::rows(minRSS))
	temp = (-temp) :+ rows(Y) :- 1
	temp = rows(Y)*ln(minRSS :/ rows(Y)) + ln(rows(Y))*((-temp) :+ rows(Y))
	st_store((1::cols(Best)),BIC,temp)
	vnRSS  = st_varname(RSS)
	stata("char " + vnRSS + "[varname] RSS")
	vnR2ADJ  = st_varname(R2ADJ)
	stata("char " + vnR2ADJ + "[varname] R2ADJ")
	vnAIC  = st_varname(AIC)
	stata("char " + vnAIC + "[varname] AIC")
	vnAICC  = st_varname(AICC)
	stata("char " + vnAICC + "[varname] AICC")
	vnBIC  = st_varname(BIC)
	stata("char " + vnBIC + "[varname] BIC")
	stata("format  "+  vnRSS + " %12.0g")
	stata("l " + vnRSS + "-" + vnBIC +  " in 1/" + ///
	strofreal(rows(minRSS)) + " , subvarname clean")

	for(i=1; i <= cols(Best);i++) {
		stata("return local best" + strofreal(i) + " " + char(34) + ///
	"`" + "best" + strofreal(i) + "'" + char(34) )
	}

	baba = st_tempname()
stata("mkmat " +  vnRSS + "-" + vnBIC + " in 1/" + strofreal(rows(minRSS)) + ///
",matrix(" + baba + ")")
stata("matrix colnames " + baba + "= RSS R2ADJ AIC AICC BIC")
stata("return matrix info =" + baba)
}

//sn points current node
//Best points to Best predictor list matrix (preds in row, padded with zeroes)
//minRSS points to the minimum RSS for predictor lists of size 1-p
//cm points to the correlation matrix of predictors and response 
//(including intercept)
//cn is the child index of current node
//constRSS is the RSS for the regression on the intercept
//depth is the node depth
//run is the iteration number of the tree search/generation algorithm
//minRSSlag is the minRSS from the previous iteration
//forward = 0 indicates tree is being initialized with the root or first level.
void traverse(pointer(struct node scalar) scalar sn, ///
pointer(real matrix) scalar Best, ///
pointer(real colvector) scalar minRSS, pointer(real matrix) scalar cm, ///
real scalar cn, real scalar constRSS, real scalar depth, ///
real scalar forward, pointer (real scalar) scalar run, ///
pointer (real colvector) scalar minRSS_lag) {

if(cn == .) {
	//root node
	//subset 1 is empty
	//subset 2 is all predictors
	(*sn).p1rss = constRSS
	(*sn).p2i = invsym((*cm)[(2::cols(*cm)),(2::cols(*cm))])
	(*sn).p2rss = (*cm)[1,1] - ///
	((*cm)[(2::cols(*cm)),1])'*((*sn).p2i)*((*cm)[(2::cols(*cm)),1])
	(*sn).p2 = (1..rows(*minRSS))
}
else {
		//child node, predictor list already filled out
		//compute first subset's RSS and inverse
	if (cn == 1) {
		//first child
		X = (*((*sn).parent)).p2i
		xn = cols(X)
		(*sn).p1i = X[1::(xn-1),1::(xn-1)] - ///
X[1::(xn-1),xn]*X[xn,1::(xn-1)]/X[xn,xn]
		(*sn).p1rss = (*cm)[1,1] - ///
(*cm)[(2,((*sn).p1 :+ 2)),1]' * ((*sn).p1i) * (*cm)[(2,((*sn).p1 :+ 2)),1]
	}
	else {
		X =(*((*sn).parent)).p2i
 		(*sn).p1i = 	X[1::(cols(X)-cn),1::(cols(X)-cn)] - ///
X[(1::(cols(X)-cn)),((cols(X)-cn+1)::cols(X))]* ///
invsym(X[((cols(X)-cn+1)::cols(X)),((cols(X)-cn+1)::cols(X))])* ///
X[((cols(X)-cn+1)::cols(X)),(1::(cols(X)-cn))]
		(*sn).p1rss = (*cm)[1,1] - ///
(*cm)[(2,(*sn).p1 :+ 2),1]' * (*sn).p1i * (*cm)[(2,(*sn).p1 :+ 2),1]
	}
	//check second subset 
	//compute RSS
	X = (*((*sn).parent)).p2i
	x = cols(X)
	Z = pm(x-cn,x)' * X * pm(x-cn,x)
	(*sn).p2i = Z[(1::(x-1)),(1::(x-1))] - ///
Z[(1::(x-1)),x]*Z[(1::(x-1)),x]'/Z[x,x]
	(*sn).p2rss = (*cm)[1,1] -  ///
(*cm)[(2,(*sn).p2 :+ 2),1]' * (*sn).p2i * (*cm)[(2,(*sn).p2 :+ 2),1]
}

//so first and second subset rss's are initialized
//update minRSS and Best
if (cols((*sn).p1) > 0) {
if ((*minRSS)[cols((*sn).p1),1] > (*sn).p1rss) {
	(*minRSS)[cols((*sn).p1),1] = (*sn).p1rss
	(*Best)[cols((*sn).p1),] = ((*sn).p1,J(1,rows(*Best)-cols((*sn).p1),0))
}
(*run) = (*run) + 1
}
if ((*minRSS)[cols((*sn).p2),1] > (*sn).p2rss) {
        (*minRSS)[cols((*sn).p2),1] = (*sn).p2rss
        (*Best)[cols((*sn).p2),] = ((*sn).p2,J(1,rows(*Best)-cols((*sn).p2),0))
}
(*run) = (*run) + 1

//create children of *sn
                //points to child nodes -
                //ith child has i-1 children
                //rules for getting predictors in children from previous work
                //first child of parent
                //      subset 1 = parent's subset 2 - last predictor
                //      subset 2 = parent's subset 2 - second to last predictor
                //nth child of parent
                //      subset 1 = (n-1)th child's subset 1 - last predictor
                //      subset 2 = parent's subset 2 - (n+1) to last predictor

struct node children
if(cn == .) {
children = node(1,cols((*sn).p2)-1)
(*sn).children = J(1,cols((*sn).p2)-1,NULL)
}
else if(cn > 1) {
	children  = node(1,cn-1)
	(*sn).children = J(1,cn-1,NULL)
}
if(cn != 1) {
		//we have children
		//first child predictor sets
	children[1,1].p1 = (*sn).p2[,(1::(cols((*sn).p2)-1))]
	if (cols((*sn).p2) > 2) {
		children[1,1].p2 = ///
(*sn).p2[,((1..(cols((*sn).p2)-2)),cols((*sn).p2))]
	}
	else	{
		children[1,1].p2 = (*sn).p2[,cols((*sn).p2)]
	}
		//and parent
	children[1,1].parent = sn
	((*sn).children)[1,1] = &(children[1,1])
		//remaining child predictor sets, and parent
	for(i=2;i<=cols(children)-1;i++) {
		children[1,i].p1 = ///
(children[1,i-1]).p1[,(1::(cols(children[1,i-1].p1)-1))]
		children[1,i].p2 = ///
(*sn).p2[,((1..(cols((*sn).p2)-(i+1))), ///
((cols((*sn).p2)-(i-1))..(cols((*sn).p2))))]
		children[1,i].parent = sn
	      ((*sn).children)[1,i] = &(children[1,i])
	}
	if (cols(children) > 1) {
		i = cols(children)
                children[1,i].p1 = ///
(children[1,i-1]).p1[,(1::(cols(children[1,i-1].p1)-1))]
		if(cols(children) == cols((*sn).p2)-1) {
			children[1,i].p2 = (*sn).p2[,(2::(cols((*sn).p2)))]
		}
		else {
	             children[1,i].p2 = ///
(*sn).p2[,((1..(cols((*sn).p2)-(i+1))), ///
((cols((*sn).p2)-(i-1))..(cols((*sn).p2))))]
		}
		children[1,i].parent = sn 
	        ((*sn).children)[1,i] = &(children[1,i])
	}
}

//things are setup, move to next stage

if(cn==.) {
// we are at root node, evaluate all child nodes
              for(i=1; i <= cols((*sn).children); i++) {
               traverse((*sn).children[1,i],Best,minRSS,cm,i,constRSS, ///
depth+1,forward,run,minRSS_lag)
              }
}
else {
	if (cols((*sn).children) > 0) { 	// we have children
	x = max((1, cols((*sn).p1)))
	if ( (*minRSS)[x,1] > (*sn).p2rss) {
		//we need to examine some of the descendants of the node
		//find the maximal k so that we can skip first k children 
		//of the node
		ktoplim = cols((*sn).p2)-cols((*sn).p1) - 1
		maxk = 0
		for(k = 1; k <= ktoplim-1; k++) {
			if (k > maxk & (*minRSS)[cols((*sn).p2)-k,1] != . ///
& (*minRSS)[cols((*sn).p2)-k,1] <= (*sn).p2rss  & ///
(*minRSS)[cols((*sn).p2)-k-1,1] != . & (*sn).p2rss < ///
(*minRSS)[cols((*sn).p2)-k-1,1]) {
				maxk = k
			}
		}
	        //handle k + 1 = cols((*sn).p2) case
            if (ktoplim > maxk & (*minRSS)[cols((*sn).p2)-ktoplim,1]  != . ///
& (*minRSS)[cols((*sn).p2)-ktoplim,1] <= (*sn).p2rss ///
			& (*sn).p2rss < constRSS) {
	                maxk = ktoplim
		}
		//we can skip the first maxk children of the node
		for (i=maxk+1; i <= cols((*sn).children); i++) {
			traverse((*sn).children[1,i],Best,minRSS,cm,i, ///
constRSS,depth+1,forward,run,minRSS_lag)
		}
	}
	//kill all pointers in children
	for (i=1; i <=cols((*sn).children);i++) {
		(*((*sn).children[1,i])).parent = NULL
        	((*sn).children)[1,i] = NULL
	}
}
}
}




end
