#####Chapter 2: Simple Linear Regression

###Section 2.2:  Simple Linear Regression
x <-c(1,9,1,5,6,8,2,4,2,8,7,7)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)

#Preliminary Model Without Intercept
model.1 <-lm(y~x-1)
summary(model.1)
fitted(model.1)
resid(model.1)   
SSres.1 <-sum(resid(model.1)^2);SSres.1

#Complete Model With Intercept
model.2 <-lm(y~x)
summary(model.2)
fitted(model.2)
resid(model.2)   
SSres.2 <-sum(resid(model.2)^2);SSres.2

#Scatterplot with line of best fit
plot(x, y, main="Scatterplot: Simple Linear Regression", 
 	 xlab="x ", ylab="y", pch=19)
abline(lm(y~x), col="red") 

#Standardized coefficients (Beta) from standardized scores after suppressing intercept
zmodel <-lm(scale(y)~scale(x)-1)
summary(zmodel)

#Instandardized and standardized coefficients using simple algebra
b.coef <- cov(x,y)/var(x);b.coef
beta.coef <-cov(x,y)/(sd(x)*sd(y));beta.coef

#Correlation coefficient
correl <-cor(x,y);correl 

###Section 2.3: Hypothesis Testing
x <-c(1,9,1,5,6,8,2,4,2,8,7,7)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)
model <-lm(y~x)
summary(model)

#Sum of Squares
ssreg <-sum((model$fitted-mean(y))^2);ssreg
ssres <-sum((y-model$fitted)^2);ssres
sstot <-ssreg+ssres;sstot

#Coefficient of determination
r.squared <-ssreg/sstot
r.squared
F.test <-r.squared/1/((1-r.squared)/(12-1-1))
F.test
1 - pf(F.test,1,10)

#F test using Sum of Squares
msreg =ssreg/1
msres = ssres/10
ftest = msreg/msres
ftest

#Covariance Matrix, Standard Errors and Test Coefficients
X <-cbind(1,x)
covariance <-(solve(t(X)%*%X)*msres)
covariance
std.err <-sqrt(diag(covariance))
std.err
t.intercept <-model$coef[1]/std.err[1]
t.intercept
t.slope <-model$coef[2]/std.err[2]
t.slope

#Confidence Intervals for Regression Coefficient
t.crit <-abs(qt(.025,length(x)-2))
slopeCI.low <-model$coef[2]-((t.crit)*std.err[2])
slopeCI.high <-model$coef[2]+((t.crit)*std.err[2])
slope.CI <-c(slopeCI.low,slopeCI.high)
slope.CI

#R Functions for Covariance Matrix and Confidence Intervals
cov <-vcov(model);cov
confint(model)

###Section 2.4:  Forecasting
x <-c(1,9,1,5,6,8,2,4,2,8,7,7)
y <-c(3,8,2,8,5,9,4,5,2,4,2,6)
model <-lm(y~x)
p <-c(1,3)
yhat <-t(p)%*%coef(model)
df <-length(x)-2

#Forecasting Average Values
std.error.ave <-sqrt(t(p)%*%vcov(model)%*%p);std.error.ave
t.crit <-abs(qt(.025,df))
CI.lo.ave <-yhat-(t.crit*std.error.ave )
CI.hi.ave <-yhat+(t.crit*std.error.ave )
CI.ave <-cbind(CI.lo.ave,CI.hi.ave );CI.ave 

#Forecasting Individual Values
msres <-(sum((y-model$fitted)^2)/df)
X <-cbind(1,x) 
std.error.ind <- sqrt(msres*(1+t(p)%*%solve(t(X)%*%X)%*%p));std.error.ind
CI.lo.ind <-yhat-(t.crit*std.error.ind)
CI.hi.ind <-yhat+(t.crit*std.error.ind)
CI.ind <-cbind(CI.lo.ind,CI.hi.ind);CI.ind
