######Chapter 9: Cross-Product Terms and Interactions

###Section 9.1:  Testing a Cross-Product Term
x=c(1,9,5,2,5,6,4,7,6,8,2,6)
z=c(3,7,1,6,5,6,9,1,1,5,7,9)
y=c(2,9,6,1,5,6,2,3,5,8,2,6)

#Center Variables
dx=scale(x, center = TRUE, scale = FALSE)
dz=scale(z, center = TRUE, scale = FALSE)

#Regression with all three terms
mod <-lm(y~dx*dz)     #asterisk includes lower order terms             
summary(mod)

#View model matrix and covariance matrix
X  <-model.matrix(mod)
X
covar <-vcov(mod)
covar

#Simple slopes as Weighted Averages
simp.x <-mod$coef[2]+(dz*mod$coef[4])
simp.z <-mod$coef[3]+(dx*mod$coef[4])
simple <-cbind(simp.x,simp.z)
simple
mean(simple[,1])
mean(simple[,2])

###Section 9.2: Predicted Values and Simple Slopes
x=c(1,9,5,2,5,6,4,7,6,8,2,6)
z=c(3,7,1,6,5,6,9,1,1,5,7,9)
y=c(2,9,6,1,5,6,2,3,5,8,2,6)
dx=scale(x, center = TRUE, scale = FALSE)
dz=scale(z, center = TRUE, scale = FALSE)
mod <-lm(y~dx*dz)     #asterisk includes lower order terms             

#Construct P Matrix for Predicted Values
p0 <-rep(1,9);p1 <-c(rep(-sd(x),3),rep(0,3),rep(sd(x),3))
p2 <-rep(c(-sd(z),0,sd(z)),3);p3 <-p1*p2
P <-round(rbind(p0,p1,p2,p3),digits=5);P
pred.val <-t(P)%*%coef(mod)
dimnames(pred.val)=list(c("lo.x/lo.z","lo.x/med.z","lo.x/hi.z","med.x/lo.z","med.x/med.z","med.x/hi.z","hi.x/lo.z","hi.x/med.z","hi.x/hi.z"))
pred.val

#Plot Predicted Values 1
byrow <-rbind(c(pred.val[1:3]),c(pred.val[4:6]),c(pred.val[7:9]))
matplot((byrow), main = "Simple Slopes of Weather at Three Levels of Free Time", type="l",ylab = 'Mood', xlab = "Weather",lwd=2)
legend("topleft",legend=c("Low Free Time","Average Free Time","Lots of Free Time"),
lty=1,lwd=2,pch=21,col=c("black","red","darkgreen"),
ncol=1,bty="n",cex=0.8,
text.col=c("black","red","darkgreen"),
inset=0.01)
dev.new()
#Plot Predicted Values 2
bycol <-cbind(c(pred.val[1:3]),c(pred.val[4:6]),c(pred.val[7:9]))
matplot((bycol), main = "Simple Slopes of Free Time at Three Levels of Weather", type="l",ylab = 'Mood', xlab = "Free Time")
legend("topleft",legend=c("Lousy Weather","Average Weather","Great Weather"),
lty=1,lwd=2,pch=21,col=c("black","red","darkgreen"),
ncol=1,bty="n",cex=0.8,
text.col=c("black","red","darkgreen"),
inset=0.01)

#Construct S Matrix for Simple Slopes and Simple Standard Errors
s0 <-rep(0,6);s1 <-c(rep(1,3),rep(0,3));s2 <-c(rep(0,3),rep(1,3))
s3 <-c(-sd(z),0,sd(z),-sd(x),0,sd(x))
S <-round(rbind(s0,s1,s2,s3),digits=5);S
simp.slope <-t(S)%*%coef(mod)
simp.cov <-t(S)%*%vcov(mod)%*%S
simp.err <-sqrt(diag(simp.cov))
simples <-simp.slope/sqrt(diag(simp.cov))
tvalues <-2*pt(-abs(simples),df=(length(x)-nrow(S)))
crit <-abs(qt(0.025,(length(x)-nrow(S))))
CI.low <-simp.slope-(crit*simp.err)
CI.high <-simp.slope+(crit*simp.err)
simp.table<-round(matrix(c(simp.slope,simp.err,simples,tvalues,CI.low,CI.high),nrow=6,ncol=6),digits=5)
dimnames(simp.table)=list(c("x@z.low","x@z.med","x@z.high","z@x.low","z@x.med","z@x.high"),  c("slope", "stderr", "t","p","CI.low","CI.high"))
simp.table

###Section 9.2.2:  Johnson-Neyman Regions of Significance
#Johnson-Neyman Regions of Significance
x=c(1,9,5,2,5,6,4,7,6,8,2,6)
z=c(3,7,1,6,5,6,9,1,1,5,7,9)
y=c(2,9,6,1,5,6,2,3,5,8,2,6)
dx=scale(x, center = TRUE, scale = FALSE);
dz=scale(z, center = TRUE, scale = FALSE)

#Function for 2-way regions -- enter 1 for x; 2 for z
JN <-function(simple){
      mod <-lm(y~dx*dz)      
	coef <-mod$coef[2:4]
	cov  <-vcov(mod)[2:4,2:4]
      df <-length(y)-4
	t.crit <-abs(qt(.025,df))
	A <-(t.crit^2*cov[3,3])-coef[3]^2
	B <-2*((t.crit^2*cov[simple,3]-(coef[simple]*coef[3])))
	C <-(t.crit^2*cov[simple,simple])-coef[simple]^2
	lower <-(-B+sqrt(B^2-4*A*C))/(2*A)
	upper <-(-B-sqrt(B^2-4*A*C))/(2*A)
	return(cbind(lower,upper))
}
JN(1)
JN(2)

###Section 9.3:  Higher-Order Interactions
x=c(1,8,2,2,5,6,5,7,4,8,3,5)
z=c(3,7,1,6,6,6,7,2,3,6,5,8)
w=c(7,4,3,1,8,8,5,1,8,1,5,2)
y=c(3,7,8,3,4,5,4,2,5,9,3,6)
dx=scale(x, center = TRUE, scale = FALSE)
dz=scale(z, center = TRUE, scale = FALSE)
dw=scale(w, center = TRUE, scale = FALSE)
mod <-lm(y~dx*dz*dw)

#Function for Predicted Values
pp <-function(a,b,c){
p0 <-rep(1,8)
p1<-c(rep(c(rep(-sd(a),2),rep(sd(a),2)),2))
p2 <-c(rep(c(-sd(c),sd(c)),4))
p3 <-c(rep(-sd(b),4),rep(sd(b),4))
p4 <-p1*p2
p5 <-p1*p3
p6 <-p2*p3
p7 <-p1*p2*p3
P <-round(rbind(p0,p1,p2,p3,p4,p5,p6,p7),digits=5)
pred.val <-t(P)%*%coef(mod)
dimnames(pred.val)=list(c("lo.x,lo.z,lo.w","lo.x,hi.z,lo.w","hi.x,lo.z,lo.w","hi.x,hi.z,lo.w","lo.x,lo.z,hi.w","lo.x,hi.z,hi.w","hi.x,lo.z,hi.w","hi.x,hi.z,hi.w"))
return(list(P,pred.val))
}
predicted <-pp(x,w,z)
predicted

#Function for Simple Slopes Tests
ss <-function(a,b,c,d){
s0 <-rep(0,7)
s1 <-c(0,rep(1,3),rep(0,3));s2 <-c(0,rep(0,3),rep(1,3))
s3 <-rep(0,7);s4 <-c(1,-sd(b),0,sd(b),-sd(a),0,sd(a))
s5 <-c(0,rep(d*sd(c),3),rep(0,3));s6 <-c(0,rep(0,3),rep(d*sd(c),3))
s7 <-c(d*sd(c),-sd(b)*d*sd(c),0,sd(b)*d*sd(c),-sd(a)*d*sd(c),0,sd(a)*d*sd(c))
S <-round(rbind(s0,s1,s2,s3,s4,s5,s6,s7),digits=5)
simp.slope <-t(S)%*%coef(mod)
simp.err <-sqrt(diag(t(S)%*%vcov(mod)%*%S))
ttests <-simp.slope/simp.err
pvalues <-2*pt(-abs(ttests),df=(length(x)-nrow(S)))
crit <-abs(qt(0.025, df=(length(x)-nrow(S))))
CI.low <-simp.slope-(crit*simp.err)
CI.high <-simp.slope+(crit*simp.err)
simp.table<-round(matrix(c(simp.slope,simp.err,ttests,pvalues,CI.low,CI.high),nrow=7,ncol=6),digits=5)
dimnames(simp.table)=list(c("xz@low","x@z.low","x@z.med","x@z.high","z@x.low","z@x.med","z@x.high"),  c("slope", "stderr", "t","p","CI.low","CI.high"))
return(list(S,simp.table))
}

#Simple Slopes Tests (first two variables define simple slopes at third variable; specify -1 for @ low and 1 for @ high
simple <-ss(x,z,w,-1)
simple 

#Compare any two simple slopes
simptest =function(a,b,c,d,e,f) #use 0 for effect of interest, 1 or -1 for high or low
{
s1<-c(0, 1, 0, 0, b*sd(z), c*sd(w), 0, b*sd(z)*c*sd(w))
s2<-c(0, 0, 1, 0, a*sd(x), 0, c*sd(w), a*sd(x)*c*sd(w))
s3<-c(0, 0, 0, 1, 0, b*sd(z), c*sd(w), a*sd(x)*b*sd(z))
if (a == 0) {slope1 <- s1}
else if(b==0) {slope1<-s2}
else slope1 <-s3
s4<-c(0, 1, 0, 0, e*sd(z), f*sd(w), 0, e*sd(z)*f*sd(w))
s5<-c(0, 0, 1, 0, d*sd(x), 0, f*sd(w), d*sd(x)*f*sd(w))
s6<-c(0, 0, 0, 1, 0, e*sd(z), f*sd(w), d*sd(x)*e*sd(z))
if (d == 0) {slope2 <- s4}
else if(e==0) {slope2<-s5}
else slope2 <-s6
S= cbind(slope1,slope2)
SB<-t(S)%*%coef(mod);
sb<-SB[1]-SB[2]
SC<-t(S)%*%vcov(mod)%*%S
seb<-sqrt(SC[1]+SC[4]-(2*SC[2]))
ttest <-sb/seb
p <-2*pt(-abs(ttest),df=(length(x)-nrow(S)))
slopetest <-cbind(ttest,p)
return(list(S,slopetest))
}

S<-simptest(0, -1, -1,   0, -1, 1) #simple slope of X at Z_low, W_low vs. simple slope of X at Z_low, W_high
S

###Section 9.3.2: Recentering for Simple Slopes
x=c(1,8,2,2,5,6,5,7,4,8,3,5)
z=c(3,7,1,6,6,6,7,2,3,6,5,8)
w=c(7,4,3,1,8,8,5,1,8,1,5,2)
y=c(3,7,8,3,4,5,4,2,5,9,3,6)
dx=scale(x, center = TRUE, scale = FALSE)
dz=scale(z, center = TRUE, scale = FALSE)
dw=scale(w, center = TRUE, scale = FALSE)
mod <-lm(y~dx*dz*dw)

#Simple Slopes by Recentering Variables
lo.x <-dx+sd(x)
hi.x <-dx-sd(x)
lo.z <-dz+sd(z)
hi.z <-dz-sd(z)
lo.w <-dw+sd(w)
hi.w <-dw-sd(w)


#Simple slopes of x and z at low w -- (interpret only effects that do NOT include recentered variable)
simple.1 <-lm(y~dx*dz*lo.w)
summary(simple.1)

#Simple slope of x for high z and low w --(interpret only effects that do NOT include recentered variable)
simple.2 <-lm(y~dx*hi.z*lo.w)
summary(simple.2)

###Section 9.4: Effect Size and Power
x=c(1,8,2,2,5,6,5,7,4,8,3,5)
z=c(3,7,1,6,6,6,7,2,3,6,5,8)
w=c(7,4,3,1,8,8,5,1,8,1,5,2)
y=c(3,7,8,3,4,5,4,2,5,9,3,6)
dx=scale(x, center = TRUE, scale = FALSE)
dz=scale(z, center = TRUE, scale = FALSE)
dw=scale(w, center = TRUE, scale = FALSE)
mod <-lm(y~dx*dz*dw)

#Calculate Effect Size for Highest Order Cross-Product Term
r.2 <-summary(mod)$r.squared
ss.y <-var(y)*(length(y)-1)
ms.e <-(sum(resid(mod)^2)/(length(y)-nrow(summary(mod)$coef)))
r.cha <-((((mod$coef[8]/sqrt(vcov(mod)[8,8]))^2)*ms.e)/ss.y)
f.2 <-r.cha/(1-r.2)
f.2

#Sample Size Needed for 80% Power  - enter effect size and k
sampsize <-function(f,k){
 N = 7.85/f+(k-1)
}
sample <-sampsize(.03,7)
sample
