#####Chapter 4: Multiple Regression

####Section 4.1:  Basic Elements of Multiple Regression
x1 <-c(1,9,1,5,6,8,2,4,2,8,7,7)
x2 <-c(2,7,1,8,5,7,4,7,1,4,3,4)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)
X <-cbind(1,x1,x2)

#Create standardized variables
z1 <-scale(x1, center = T, scale = T)
z2 <-scale(x2, center = T, scale = T)
zy <-scale(y,center=T,scale=T)
Z <-cbind(z1,z2,zy)

#Correlation matrix
corr <-(t(Z)%*%Z)/(length(y)-1)
corr

#Function for finding Correlation Probabilities 
cor.prob <- function(X, dfr = nrow(X) - 2) {
	 R <- cor(X)
	 above <- row(R) < col(R)
	 r2 <- R[above]^2
	 Fstat <- r2 * dfr / (1 - r2)
	 R[above] <- 1 - pf(Fstat, 1, dfr)
	 R
}
cor.prob(cbind(x1,x2,y))

#Multiple Regression
mod <-lm(y~x1+x2)
summary(mod)
fitted(mod)
resid(mod)

#Sum of Squares
SS.res <-sum(mod$resid^2)
SS.tot <-(var(y)*(length(y)-1))
SS.reg <-SS.tot-SS.res
df=length(y)-ncol(X)
MS.res <-SS.res/df
F.test <-(SS.reg/2)/MS.res; F.test
1-pf(F.test,2,df) 

###Section 4.2: Regression Coefficients
x1 <-c(1,9,1,5,6,8,2,4,2,8,7,7)
x2 <-c(2,7,1,8,5,7,4,7,1,4,3,4)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)
X <-cbind(1,x1,x2)
mod <-lm(y~x1+x2)

#Regress x1 on x2 and save residual
reg.1 <-lm(x2~x1); res2=(resid(reg.1))
unique2 <-lm(y~res2);summary(unique2)

#Regress x2 on x1 and save residual
reg.2 <-lm(x1~x2); res1=(resid(reg.2))
unique1 <-lm(y~res1);summary(unique1)

#Covariance Matrix and Standard Errors
SS_res <-sum(mod$resid^2)
df <-length(y)-ncol(X)
MS_res <-SS_res/df
C <-solve(t(X)%*%X)*MS_res
std.err <-cbind(sqrt(C[1,1]), sqrt(C[2,2]), sqrt(C[3,3])) 
std.err

#Confidence Intervals
confint(mod)

#Forecasting with x1=3 and x2=4
p <-c(1,3,4)
yhat <-t(p)%*% coef(mod)

#Forecasting Average Values
std.error.ave <-sqrt(t(p)%*%vcov(mod)%*%p);std.error.ave
t.crit <-abs(qt(.025,df))
CI.lo.ave <-yhat-(t.crit*std.error.ave )
CI.hi.ave <-yhat+(t.crit*std.error.ave )
CI.ave <-cbind(CI.lo.ave,CI.hi.ave );CI.ave 

#Forecasting Individual Values
msres <-(sum((y-mod$fitted)^2)/df) 
std.error.ind <- sqrt((1+t(p)%*%solve(t(X)%*%X)%*%p)*msres);std.error.ind
CI.lo.ind <-yhat-(t.crit*std.error.ind)
CI.hi.ind <-yhat+(t.crit*std.error.ind)
CI.ind <-cbind(CI.lo.ind,CI.hi.ind);CI.ind

#Standardized Regression Coefficients
z1 <-scale(x1, center = T, scale = T)
z2 <-scale(x2, center = T, scale = T)
zy <-scale(y,center=T,scale=T)
Z <-cbind(z1,z2,zy)
summary(zmod <-lm(zy~z1+z2-1))

#Test difference between two coefficients
b.dif <-(mod$coef[2]-mod$coef[3])/(sqrt(vcov(mod)[2,2]+vcov(mod)[3,3]-2*(vcov(mod)[2,3])))
b.dif
pt(b.dif,df)*2


###Section 4.3: Partionining the Variance
x1 <-c(1,9,1,5,6,8,2,4,2,8,7,7)
x2 <-c(2,7,1,8,5,7,4,7,1,4,3,4)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)
X <-cbind(1,x1,x2)
mod <-lm(y~x1+x2)
rsqr.mod <-summary(mod)$r.squared

#Regress x1 on x2 and save residual
reg.1 <-lm(x2~x1)
summary(x1only <-lm(y~ resid(reg.1)))

#Regress x2 on x1 and save residual
reg.2 <-lm(x1~x2)
summary(x2only <-lm(y~ resid(reg.2)))

#Semi-partial correlations
semi.1 <-cor (resid(reg.1),y)
semi.2 <-cor (resid(reg.2),y)

#Squared semi-partial correlations
semi.1.sqr <-semi.1^2
semi.2.sqr <-semi.2^2

#Squared semi-partials as changes in R^2
just1 <-lm(y~x1)
just2 <-lm(y~x2)
anova(mod,just1,test="F")
anova(mod,just2,test="F")

rsqr.just1 <-summary(just1)$r.squared
rsqr.just2 <-summary(just2)$r.squared
x1.adds <-rsqr.mod-rsqr.just1
x2.adds <-rsqr.mod-rsqr.just2
allsemi <-cbind(semi.1, semi.1.sqr, x1.adds,semi.2,semi.2.sqr,x2.adds) 
allsemi

#Partial Correlations
#regress y on x1 and save residual
reg.y1 <-lm(y~x1)
#regress y on x2 and save residual
reg.y2 <-lm(y~x2)

partial.1 <-cor (resid(reg.1),resid(reg.y1))
partial.2 <-cor (resid(reg.2), resid(reg.y2))
partial.1.sqr <-partial.1^2
partial.2.sqr <-partial.2^2
allpartial <-cbind(partial.1, partial.1.sqr, partial.2,partial.2.sqr)
allpartial

###Section 4.4: Complete Matrix Calculations for Multiple Regression
x1 <-c(1,9,1,5,6,8,2,4,2,8,7,7)
x2 <-c(2,7,1,8,5,7,4,7,1,4,3,4)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)
V <-cbind(1,x1,x2,y)
VV <-t(V)%*%V
X =cbind(1,x1,x2)
XX =t(X)%*%X
Vadj <-solve(VV)*det(VV); Vadj
Xadj <-solve(XX)*det(XX); Xadj
df = length(y)-1

#Compute Sum of Squares and Coefficient of Determination
SS.res <-det(VV)/det(XX); SS.res
SS.tot <-var(y)*df
SS.reg <-SS.tot-SS.res
R2 <-1-(det(VV)/(det(XX)*SS.tot)); R2

#Coefficients for x1
b.1 <- -Vadj[2,4]/det(XX)
beta.1 <- -Vadj[2,4]*sqrt(var(x1)*df)/(det(XX)*sqrt(SS.tot))
semi.1 <- -Vadj[2,4]/sqrt(det(XX)*(SS.tot)*Xadj[2,2])
partial.1 <-  -Vadj[2,4]/sqrt(det(XX)*Vadj[2,2])
stderr.1 <- sqrt(SS.res/9*(Xadj[2,2]/Vadj[4,4]))
all.1 <-cbind(b.1,beta.1,semi.1,partial.1,stderr.1)
all.1

#Coefficients for x2
b.2 <- -Vadj[3,4]/det(XX)
beta.2 <- -Vadj[3,4]*sqrt(var(x2)*df)/(det(XX)*sqrt(SS.tot))
semi.2 <- -Vadj[3,4]/sqrt(det(XX)*(SS.tot)*Xadj[3,3])
partial.2 <-  -Vadj[3,4]/sqrt(det(XX)*Vadj[3,3])
stderr.2 <- sqrt(SS.res/9*(Xadj[3,3]/Vadj[4,4]))
all.2 <-cbind(b.2,beta.2,semi.2,partial.2,stderr.2)
all.2
