#####Chapter 6: Problematic Observations

###Section 6.1:  Leverage
#Regression Diagnostics
x=c(2,4,5,7,2)
y=c(3,4,5,6,7)
model=lm(y~x)
summary(model)

#Returns:  DFBETA DFFIT COV.R COOK.D HAT INFLUENCE
influence.measures(model)

#Compute Hat Matrix
X <-cbind(1,x)
hat <- X%*%(solve(t(X)%*%X))%*%t(X)
hat

#Calculate Residuals
resid <-resid(model)
standardized <-resid(model)/sqrt(sum(resid(model)^2)/3)
studentized <-rstandard(model)
deleted <-rstudent(model)
all.resids <-cbind(resid, standardized, studentized, deleted)
all.resids

###Section 6.2: Departures from Normality
x=c(1.11,1.25,3.25,3.26,4.40,4.47,5.52,5.59,6.76,6.86,6.95,7.95)
y=c(3.15,2.72,4.42,3.60,2.42,3.02,3.34,3.66,3.60,4.32,7.74,4.82)
model=lm(y~x)
summary(model)
influence.measures(model)

#Studentized (called standard) and Deleted (called student) Residuals
model.student <-rstandard(model)
model.student
model.deleted <-rstudent(model)
model.deleted

#QQ Plot Using Residuals
qqnorm(model$resid);qqline(model$resid)

#Function to calculate skew, kurtosis, and Jarque-Bera test
jarque.bera <-function(model){
e <-model$resid
n <- length(e)
skew <-(sum((e - mean(e))^3)/n/(sum((e - mean(e))^2)/n)^(3/2))
kurtosis <-n* sum((e - mean(e))^4)/(sum((e - mean(e))^2)^2)
jb.test <- n*((skew^2/6)+((kurtosis-3)^2)/24)
p <- 1 - pchisq(jb.test, df = 2)
jb.table<-round(matrix(c(skew,kurtosis,jb.test,p),nrow=1,ncol=4),digits=5)
dimnames(jb.table)=list(c(),c("skew","kurtosis","jarque-bera","p"))
return(list(jb.table))
}
jarque.bera(model)

#Box-Cox Test   
library(MASS)                        #attach MASS package
dev.new()
bc <-boxcox(y~x)
bc$x[which.max(bc$y)]

#Transform y using reciprocal and rerun regression and jarque-bera
y.1=y^-1
reg.recip <-lm(y.1~x)
summary(reg.recip)
dev.new()
qqnorm(reg.recip$resid);qqline(reg.recip$resid)
jarque.bera(reg.recip)

###Section 6.3: Collinearity
x1=c(.52,.19,.81,.80,.62,.46,.42,.26,.70,.64,.17,.08)
x2=c(.86,.39,.94,.22,.05,.62,.70,.88,.41,.60,.72,.28)
x3=c(.14,.78,.35,.53,.65,.28,.61,.76,.38,.69,.22,.71)
x4=c(.51,.65,.47,.44,.67,.01,.47,.28,.07,.23,.79,.26)
x5=c(.33,.48,.52,.39,.31,.32,.45,.47,.37,.48,.28,.27)
y=c(.08,.60,.47,.39,.58,.11,.42,.37,.11,.38,.09,.20)

#Complete correlation matrix
V <-cbind(x1,x2,x3,x4,x5,y)
cor(V)

#Regression
model<-lm(y~x1+x2+x3+x4+x5)
summary(model)

#Variance Inflation Factor 
library(car)              #attach car package
vif(model)

#Condition Index and Variance Decomposition
X <-cbind(1,x1,x2,x3,x4,x5)
N <-X%*%(solve(sqrt(diag(1,ncol(X))*(t(X)%*%X))))
eigs <-eigen(t(N)%*%N);eigs
library(perturb)          #attach perturb package
colldiag(model)
