#####Chapter 10: Polynomial Regression

###Section 10.1  Cubic Polynomial
x=c(36,36,37,37,38,39,39,39,40,39,42,43)
y=c(3,2,4,7,8,8,6,6,5,3,4,2)

#Plot function
plot(x, y, cex = 1, col ="black", ,ylab = 'Violent Crimes', xlab = 'Temperature', main = "Scatterplot")

#Regression
dx=scale(x, center = TRUE, scale = FALSE)
summary(linear <-lm(y~x))
summary(quad <-lm(y~dx+I(dx^2)))

#Predicted Values
p0 <-rep(1,3);p1 <-c(-sd(x),0,sd(x));p2 <-c(sd(x)^2,0,sd(x)^2)
P.mat <-round(rbind(p0,p1,p2),digits=5)
P <-round(rbind(p0,p1,p2),digits=5)
pred.val <-t(P)%*%coef(quad)
dimnames(pred.val)=list(c("lo.x","med.x","hio.x"))
pred.val

#Plotting Predicted Values
dev.new()
plot(pred.val,type="l",ylab = 'Violent Crime', xlab = 'Temperature', main = "Predicted Values")

#Simple Slopes and Simple Standard Errors
s0 <-rep(0,3);s1 <-rep(1,3);s2 <-c(-2*sd(x),0,2*sd(x))
S <-round(rbind(s0,s1,s2),digits=5)
simp.slope <-t(S)%*%coef(quad)
simp.cov <-t(S)%*%vcov(quad)%*%S
simp.err <-sqrt(diag(simp.cov))
simples <-simp.slope/sqrt(diag(simp.cov))
tvalues <-2*pt(-abs(simples),df=(length(x)-nrow(S)))
crit <-abs(qt(0.025,(length(x)-nrow(S))))
CI.low <-simp.slope-(crit*simp.err)
CI.high <-simp.slope+(crit*simp.err)
simp.table<-round(matrix(c(simp.slope,simp.err,simples,tvalues,CI.low,CI.high),nrow=3,ncol=6),digits=5)
dimnames(simp.table)=list(c("x@z.low","x@z.med","x@z.high"),c("slope", "stderr", "t","p","CI.low","CI.high"))
simp.table

#Maximum Point
maximum <-(-quad$coef[2]/(2*quad$coef[3]));maximum

###Section 10.2:  Polynomial Interaction
x=c(37,35,38,38,35,40,36,39,40,39,36,37)
z=c(2,1,8,3,6,9,7,4,3,8,2,8)
y=c(4,2,8,2,1,3,6,5,4,8,5,9)
dx=scale(x, center = TRUE, scale = FALSE)
dz=scale(z, center = TRUE, scale = FALSE)

#Compare Linear and Polynomial Model
summary(lin.reg <-lm(y~dx*dz))
summary(poly.reg <-lm(y~dx+I(dx^2)+dz+dx*dz+I(dx^2)*dz))
anova(lin.reg,poly.reg)

#Predicted values
p0 <-rep(1,9);p1 <-rep(c(-sd(x),0,sd(x)),3);p2 <-rep(c(sd(x)^2,0,sd(x)^2),3)
p3 <-c(rep(-sd(z),3),rep(0,3),rep(sd(z),3));p4 <-p1*p3;p5 <-p2*p3
P <-rbind(p0,p1,p2,p3,p4,p5)
preds <-matrix(t(P)%*%coef(poly.reg),nrow=3,ncol=3)
dimnames(preds)=list(c("low.z","med.z","high.z"),  c("low.x","med.x","high.x"))
preds

#Simple Polynomial Coefficients
q0 <-rep(0,6);q1 <-c(rep(1,3),rep(0,3));q2 <-c(rep(0,3),rep(1,3))
q3 <-rep(0,6);q4 <-c(-sd(z),0,sd(z),rep(0,3));q5 <-c(rep(0,3),-sd(z),0,sd(z))
Q <-round(rbind(q0,q1,q2,q3,q4,q5),digits=5)

simp.slope <-t(Q)%*%coef(poly.reg)
simp.cov <-t(Q)%*%vcov(poly.reg)%*%Q
simp.err <-sqrt(diag(simp.cov))
simples <-simp.slope/sqrt(diag(simp.cov))
tvalues <-2*pt(-abs(simples),df=(length(x)-nrow(Q)))
crit <-abs(qt(0.025, df=(length(x)-nrow(Q))))
CI.low <-simp.slope-(crit*simp.err)
CI.high <-simp.slope+(crit*simp.err)
simp.poly<-round(matrix(c(simp.slope,simp.err,simples,tvalues,CI.low,CI.high),nrow=6,ncol=6),digits=5)
dimnames(simp.poly)=list(c("linear@z.low","linear@z.med","linear@z.high","quad@x.low","quad@x.med","quad@x.high"),c("slope","stderr","t","p","CI.low","CI.high"))
simp.poly

#Simple Slopes
s0 <-rep(0,9);s1 <-rep(1,9);s2 <-rep(c(-sd(x)*2,0,sd(x)*2),3)
s3 <-rep(0,9);s4 <-c(rep(-sd(z),3),rep(0,3),rep(sd(z),3));s5 <-s2*s4
S <-round(rbind(s0,s1,s2,s3,s4,s5),digits=5)

simp.slope <-t(S)%*%coef(poly.reg)
simp.cov <-t(S)%*%vcov(poly.reg)%*%S
simp.err <-sqrt(diag(simp.cov))
simples <-simp.slope/sqrt(diag(simp.cov))
tvalues <-2*pt(-abs(simples),df=(length(x)-nrow(S)))
crit <-abs(qt(0.025, df=(length(x)-nrow(S))))
CI.low <-simp.slope-(crit*simp.err)
CI.high <-simp.slope+(crit*simp.err)
simp.slop <-round(matrix(c(simp.slope,simp.err,simples,tvalues,CI.low,CI.high),nrow=9,ncol=6),digits=5)
dimnames(simp.slop)=list(c("x.lo@z.low","x.med@z.low","x.hi@z.lo","x.lo@z.med","x.med@z.med","x.hi@z.med","x.lo@z.hi","x.med@z.hi","x.hi@z.med"),c("slope","stderr", "t","p","CI.low","CI.high"))
simp.slop

#maximum value
max<-(-(poly.reg$coef[2]+poly.reg$coef[5]*-sd(z))/((2*(poly.reg$coef[3]+poly.reg$coef[6]*-sd(z)))))
max

###Section 10.3: Piecewise Polynomials
x=seq(1:12)
y=c(1.0,1.5,1.6,5.0,6.0,2.0,1.4,1.25,3.0,4.0,7.2,7.3)

#Plot 
plot(x, y, cex = 1, col ="gray", main = "Scatterplot")
lines(x = x, y = y, lwd = 1, col = "red")

#Linear model
lin.reg <-lm(y~x)
summary(lin.reg)

#Global Cubic
dx=scale(x, center = TRUE, scale = FALSE)
summary(cubic <-lm(y~dx+I(dx^2)+I(dx^3)))

#Piecewise cubic spline using two knots
k1=5
k2=8
dum1 <-ifelse(x <= k1, 0, (x-k1))^3
dum2 <-ifelse(x <= k2, 0,(x-k2))^3
summary(piece <-lm(y~x+I(x^2)+I(x^3)+dum1+dum2))
 
#Compare Fit of Cubic and Piecewise Polynomial
anova(cubic,piece)

##Natural Cubic Spline Without Penalty
x <-seq(1:12)
x<-x-min(x);x<-x/max(x)
y=c(1,1.5,1.6,5,6,2,1.4,1.25,3,4,7.2,7.3)
kn <- c(x[5],x[8])

#Cubic Spline Function 
basis<-function(x,z) {
 ((z-0.5)^2-1/12)*((x-0.5)^2-1/12)/4-
 ((abs(x-z)-0.5)^4-(abs(x-z)-0.5)^2/2+7/240)/24
}

# Model Matrix for Unpenalized Natural Cubic Spline
spline.X<-function(x,kn){
  q<-length(kn)+2 
  n<-length(x) 
  X<-matrix(1,n,q) 
  X[,2]<-x 
  X[,3:q]<-outer(x,kn,FUN=basis) 
  X
}

X<-spline.X(x,kn) 
nat.spline<-lm(y~X-1) 
fitted(nat.spline)

###Section 10.3.5:  Penalized Cubic Splines (Returned values approximate textbook values)
x=seq(1:12)
y=c(1,1.5,1.6,5,6,2,1.4,1.25,3,4,7.2,7.3)
spl.x <-smooth.spline(x,y,spar=.25);fitted(spl.x);spl.x$lambda

z=c(1.45,2.91,.3,3.35,.35,.75,1.10,.25,2.15,2.25,3.35,4)
spl.z <-smooth.spline(z,y,spar=.875);fitted(spl.z);spl.z$lambda

#Create split plots
old.par <- par(mfrow=c(1, 2))
plot (x,fitted(spl.x))
plot(z,fitted(spl.z))
par(old.par)
