//Stata implementation of lowess used by R lowess.  
//Described in text.  Iterative resmoothing is done when iter > 1.
//Oversmoothing with fraction of smoothing points used  > 1.0  is prevented.
//This happens iter times or until convergence.

//uses More Mata ado package for one command mm_median
//obtain by 
// net search more mata
//
//TITLE
//      'MOREMATA': module (Mata) to provide various functions
//      Requires: Stata version 9
//      Distribution-Date: 20070415
//      Author: Ben Jann, ETH Zurich
//      Support: email ben.jann@soz.gess.ethz.ch

//meets generic smoother output guidlines
//generic smoother guidelines:
// 	takes 		input  two variables response and predictor, 
//			output two variables fitted and new predictor gridpts. 
// 	may increase observations (with missing values on other variables) 
// 		to fit in new predictor gridpts.
//	ignores missing observations
//this smoother does not create new observations to store new gridpoints and 
//fitted values


//takes as input varlist  	y 		response
//               		x 		predictor
//      	 options	Fraction 	proportion of points used 
//						to compute each fitted value 
//						(0 < F < 1)
//				ITERations	maximum number of iterations 
//						performed in robust fit
//						(>=0)
//				DELta		threshold (in units of x)
//						for giving x values the same 
//						fitted value
//						reduces redundant calculations
//						for similar predictor values
//						(>=0.)
//						(default 1/100 * range of x)
//				STOre(yn xn) specifies variables that will 
//						store gridpoints and
//						fitted values
//						(yn != y)						 					
program lowess_ties_optim 
version 10.0
syntax varlist(numeric min=2 max=2) [if] [in] , [Fraction(real .66667)] [DELta(numlist min=1 max=1)] [ITERations(integer 0)] STore(varlist numeric min=2 max=2)
qui {
		//check arguments for veracity
	tokenize `varlist'
	local x1: type `1'
	local x2: type `2'
	tokenize `store'
	local x3: type `1'
	local x4: type `2'
	forvalues i = 1/4 {
		if (substr("`x`i''",1,3) == "str") {
			di "Specified Variables must be numeric"
			exit 198
		}
	}
	if (`fraction' <= 0 | `fraction' > 1) {
		di as error "Fraction must be in (0,1]
		exit 198
	}
	if (`iterations' < 0) {
		di as error "Iterations must be a positive integer"
		exit 198
	}
	if("`delta'" != "") {
		if (`delta' < 0) {
			di as error "Delta must be non-negative"
			exit 198
		}
	}
	tokenize `store'
	local fit  "`1'"
	tokenize `varlist'
	if ("`1'" == "`fit'") {
		di as error "Response can't be overwritten by fitted"
		exit 198
	}

		//preserve estimates
	tempname prevest
	_estimates hold `prevest', nullok
		//initialize Delta with default if it is not specified
	if("`delta'" == "") {
		tokenize `varlist'
		sum `2'
		local delta = (1/100)*(r(max)-r(min))
	}
             //sort on predictor for now , return sort to original order later
	tokenize `varlist'
	tempvar order
	gen `order' = _n
	bysort `2' `order': assert _n == 1 
	preserve
		//keep relevant observations
	keep if `2' != . & `1' != .
	if ("`if'" != "") {
	        keep if `if'
	}
	if ("`in'" != "") {
		keep in `in'
	}
		//read predictors and response into mata
        tempname x y yfit
	mata:	st_view(`y'=.,.,"`1'")
        mata:   `x' = st_data((1,rows(`y')),"`2'")
			//make prediction
	mata:	`yfit' = mRClowess(`x',`y',rows(`x'),`fraction', ///
`iterations', `delta')
		//put yfit where it is specified to belong in generate()
		//store generated variables
	tempvar yfitholder xfitholder
	gen `yfitholder' = .
	gen `xfitholder' = .
	mata:	st_store((1,rows(`x')),"`yfitholder'", `yfit')
	mata:	st_store((1,rows(`x')),"`xfitholder'", `x')
	
	bysort `order': assert _n == 1
	keep `order' `xfitholder' `yfitholder'
	tempfile a
	save `a'
	restore

	bysort `order': assert _n == 1
	tempvar mop
	capture rename _merge `mop'
	merge `order' using `a', update
		// not all may have matched (missing, if, in restrictions)
	assert _m == 3 | _m == 1
	bysort `order': assert _n == 1
	drop `order' _merge
	capture rename `mop' _merge

	//fill in yn and xn variables
	tokenize `store'
	replace `2' = `xfitholder' if `xfitholder' != .
	replace `1' = `yfitholder' if `yfitholder' != .

	drop `xfitholder'
	drop `yfitholder'
	_estimates unhold `prevest'
}
end




mata:
version 10.0
mata clear

//helper function for mRClowess
//	x	x coordinates
//	y	y coordinates
//	n	dimension of x,y,ws, and rw
//	xs	x point at which smooth is to occur
//	nleft	index of first point that should be considered in computing
//		fitted value
//	nright	index of last point that should be considered in computing
//		fitted value
//	userw   binary indicator, 1 if robust fit is done using rw
//	rw	robustness weights
//	
//returns 
//        rowvector 	(ys 	fitted value at xs  							
//			 ok) 	indicator 0 if all weights for smooth are zero, fitted not computed	
//        The smooth at XS is computed using (robust) locally weighted
//        regression of degree 1.  The tricube weight function is used
//        with h equal to the maximum of XS-X(NLEFT) and X(NRIGHT)-XS.
real rowvector mRClowest(real colvector x, real colvector y, real scalar n, ///
real scalar xs, real scalar nleft, real scalar nright, real scalar userw, ///
real colvector rw) {
real scalar nrt, j,a, b, c, h, range

		//initialize range and h
    	range = x[n]-x[1]
	h = max((xs-x[nleft], x[nright]-xs))

		//initialize weights
	Ws = J(n,1,0)

		//sum of weights, used for ok initialization
	a = 0
		//check for ties to right
		//and compute weights
    	j = nleft
    	while (j <= n) {
	        	// compute weights
        		// (pick up all ties on right) 
        	if (abs(x[j,1] - xs) <= 0.999*h) {
			if (abs(x[j,1] - xs) <= .001*h) {
	               		Ws[j,1] = 1
			}
		        else {
                		Ws[j,1] = (1.-(abs(x[j,1] - xs)/h)^3)^3
			}
			if (userw!=0) {
				// adjust weights with robustness weights
	                	Ws[j,1] = Ws[j,1]*rw[j,1];
			}
				//update a
           	 	a = a + Ws[j,1]
        	}	
        	else if (x[j,1] > xs) {
				//no more ties
			break
		}
        	j = j+1;
    	}

		//rightmost pt (may be greater 
    		//than nright because of ties) 
	nrt = j-1;
    	if (a <= 0) {
        	return ((-777,0))
	}
    	else {
        	//do weighted least squares
        	//make sum of Ws[j,1] == 1 
		Ws = Ws :/a
        	if (h > 0.) {
            			//use linear fit 
            			//weighted center of x values 
			a = Ws[(nleft::nrt),1]'*x[(nleft::nrt),1]
		        b = xs - a;
	            	c= Ws[(nleft::nrt),1]'*(x[(nleft::nrt),1] :- a):^2
        	    	if (sqrt(c) > 0.001*range) {
                		b = b/c 
        		//points are spread out 
			//enough to compute slope 
			//no need to keep Ws same dimensions as x or y anymore
				Ws = Ws[(nleft::nrt),1] :* ///
((x[(nleft::nrt),1] :- a):*b :+ 1)
			}
			else {
				Ws = Ws[(nleft::nrt),1]
			}
            	}
		else {
			Ws = Ws[(nleft::nrt),1]
		}
        	        //fit prediction
	        return(((Ws' * y[(nleft::nrt),1]),1))
        }
}

//      x       x coordinates
//      y       y coordinates
//      n       dimension of x,y,ws, and rw
//	f 	Input; specifies the amount of smoothing; F is
//                  the fraction of points used to compute each
//                 fitted value; as F increases the smoothed values
//                  become smoother; choosing F in the range .2 to
//                  idea which value to use, try F = .5.
//     nsteps   Input; the number of iterations in the robust
//                  fit; if NSTEPS = 0, the nonrobust fit is
//                  returned; setting NSTEPS equal to 2 should serve
//                 most purposes.
//     DELTA    input; nonnegative parameter which may be used
//                  to save computations; if N is less than 100, set
//                  DELTA equal to 0.0; if N is greater than 100 you
//                  should find out how DELTA works by reading the
//                  additional instructions section.
//     Returns
//             YS = Output; fitted values; YS(I) is the fitted value
//                  at X(I); to summarize the scatterplot, YS(I)
//                  should be plotted against X(I).
real colvector mRClowess(real colvector x, real colvector y, real scalar n, ///
real scalar f, real scalar nsteps, real scalar delta) {
	real scalar i, iter, j, last, nleft, nright, ns, alpha, ///
cmad, cut, d1, d2, denom, r, sc
	real rowvector pys
	real colvector ys
nsteps
delta
		//initialize ys
	ys = J(n,1,0)
		//trivial case
	if (n < 2) {
		return((y[1],1))
	}
		//at least two, at most n points to be used in calculation
	ns = max((2,min((n,trunc(f*n+1e-7)))))
		//robustness iterations
	iter = 1
		//initialize robustness weights
	rw = J(n,1,1)
	while (iter <=nsteps + 1) {
	"Iteration" 
	iter
	//initialize nright and nleft, the window for smoothing of point i
		nleft = 1
		nright = ns
			//index of current point
		i = 1
			//index of last point
		last = 0
			//go until within loop criteria break out of loop
		while(0!=1) {
				//check if window should be shifted to the right
			if (nright < n) {
				d1 = x[i] - x[nleft]
				d2 = x[nright+1] - x[i]
				if (d1 > d2) {
				//radius will not decrease by move right
					nleft = nleft + 1
					nright = nright + 1
					continue
				}
			}
				//fitted value at x[i]
			pys = mRClowest(x,y,n,x[i],nleft,nright,1*(iter>1),rw)
			if (pys[2] == 0) {
				// all weights zero, copy over y value
				ys[i] = y[i]
			}
			else {
				ys[i] = pys[1]
			}
		
		        if (last < i-1) {
					//interpolate skipped points
	                	denom = x[i]-x[last];
	                	for(j = last+1; j < i; j++) {
                    			alpha = (x[j]-x[last])/denom
                  			ys[j] = alpha*ys[i] +(1.-alpha)*ys[last]
                		}
            		}

		        	// we've now estimated point at index i
				//and cleaned up in our path
            		last = i
				//estimate ahead a little with delta
				//we've already done the work
	         	cut = x[last]+delta;
            		for (i = last+1; i <= n; i++) {
                		if (x[i] > cut) {
                    			break
				}
                		if (x[i] == x[last]) {
                    			ys[i] = ys[last]
                    			last = i;
                		}
            		}
				//reinitialize i
		        i = max((last+1, i-1))
            		if (last >= n) {
				//we have reached the end
				break
			}
        	}
		
			// calculate residuals for robustness weights.
		res = y :- ys
			//calculate overall scale estiamte for convergence check
		sc = sum(abs(res))
		sc = sc/n
			//compute robustness weights, unless it's last iteration
		if(iter > nsteps) {
			break
		}
		rw = abs(res)
			//compute cmad = 6* median(rw)
		cmad = 6*mm_median(rw)
		"Iteration"
		iter
		cmad

        	if(cmad < 1e-7 * sc)  {
				//cmad effectively zero
			break
		}
        	for(i = 1 ; i < n ; i++) {
			r = abs(res[i])
			if (r <= .001*cmad) {
				rw[i] = 1
		   	}	
			else if (r <= .999*cmad) {
        	        	rw[i] = (1.-(r/cmad)^2)^2
		    	}
	                else {
        	        rw[i] = 0
			}
        	}
        iter++
	}
	return(ys) 
}

end

