#################################################
#################################################
#								#
#                APPENDIX B: 				#
#								#
# last update: 27/08/2012				#
#################################################
#################################################

rm(list=ls(all=TRUE))
setwd("c:/book/appendix")

##############################################################
#								             #
# B.1 Maximum Likelihood Estimation and Likelihood Inference #
#								             #
##############################################################

#Standardised qualitative results:
#NEG: <50iu/ml
#EQI:50-100iu/ml
#POS: >100iu/ml

#Samples with antibody activity levels >24 U/ml were considered positive, 
#samples with antibody activity levels<20 were considered negative 
#and samples in between were considered equivocal

# Reading the parvo data, excluding the first 6 months
data<-read.table("c:/book/chapter4/VZV-b19-BE.dat",header=T)
subsetc<-(data$age<41)&(!is.na(data$age))&(!is.na(data$VZVmUIml))
datac<-data[subsetc,]
z<-log(datac$VZVmUIml[order(datac$age)]+1)
y<-datac$VZVres[order(datac$age)]
a<-datac$age[order(datac$age)]
s<-datac$sex[order(datac$age)]
cbind(length(a),sum(is.na(a)))
cbind(length(y),sum(is.na(y)))
cbind(length(z),sum(is.na(z)))

d=y[!(is.na(y))]
age=a[!(is.na(y))]

#### FIGURE B.1a
# Illustration of normal likelihood
normallogll=function(z,mu,sigma){
n=length(z)
-n/2*log(2*pi)-sum(log(sigma))-0.5*sum((z-mu/sigma)^2)
}

sdvec=rep(1,length(z))
normallogll1=function(mu) normallogll(z,mu,sdvec)

mugrid=seq(0,10,by=0.01)
logllgrid=rep(NA,length(mugrid))
for (i in (1:length(mugrid))) logllgrid[i]=normallogll1(mugrid[i])
plot(mugrid,logllgrid,ylim=c(-50000,0),xlab="mean",ylab="log-likelihood",type="n")
lines(mugrid,logllgrid)
abline(v=mean(z))

subz=z[sample(length(z),1000)]
sdvec=rep(1,length(subz))
normallogll1=function(mu) normallogll(subz,mu,sdvec)
logllgrid=rep(NA,length(mugrid))
for (i in (1:length(mugrid))) logllgrid[i]=normallogll1(mugrid[i])
lines(mugrid,logllgrid,lty=2)
mean(subz)

subz=z[sample(length(z),100)]
sdvec=rep(1,length(subz))
normallogll1=function(mu) normallogll(subz,mu,sdvec)
logllgrid=rep(NA,length(mugrid))
for (i in (1:length(mugrid))) logllgrid[i]=normallogll1(mugrid[i])
lines(mugrid,logllgrid,lty=3)
mean(subz)

#### FIGURE B.1b
# Illustration of binary likelihood
bernoullilogll=function(d,p){
n=length(d)
sum(d*log(p))+sum((1-d)*log(1-p))
}

bernoullilogll1=function(p) bernoullilogll(d,p)

pigrid=seq(0.6,1,by=0.01)
logllgrid=rep(NA,length(pigrid))
for (i in (1:length(pigrid))) logllgrid[i]=bernoullilogll1(pigrid[i])
plot(pigrid,logllgrid,ylim=c(-1500,0),xlab="pi",ylab="log-likelihood",type="n")
lines(pigrid,logllgrid)
abline(v=mean(d))

subd=d[sample(length(d),1000)]
bernoullilogll1=function(p) bernoullilogll(subd,p)

logllgrid=rep(NA,length(pigrid))
for (i in (1:length(pigrid))) logllgrid[i]=bernoullilogll1(pigrid[i])
#plot(pigrid,logllgrid,xlab="pi",ylab="log-likelihood",type="n")
lines(pigrid,logllgrid,lty=2)
mean(d)

subd=d[sample(length(d),100)]
bernoullilogll1=function(p) bernoullilogll(subd,p)

logllgrid=rep(NA,length(pigrid))
for (i in (1:length(pigrid))) logllgrid[i]=bernoullilogll1(pigrid[i])
#plot(pigrid,logllgrid,xlab="pi",ylab="log-likelihood",type="n")
lines(pigrid,logllgrid,lty=3)
mean(d)


### code as used in chapter
# Illustration of ML for logistic regression

# Using the glm function
fit=glm(d~age,family=binomial)
summary(fit)
logLik(fit)

# Using the optimization function nlm
# Defining -(log-likelihood)
minuslogll_lr=function(beta){
n=length(d)
p=exp(beta[1]+beta[2]*age)/(1+exp(beta[1]+beta[2]*age))
-(sum(d*log(p))+sum((1-d)*log(1-p)))
}
# Calling nlm (non-linar minimization with Newton-type algorithm)
nlmfit=nlm(minuslogll_lr,c(mean(d),0),hessian=T)
beta_est=nlmfit$estimate
beta_se=sqrt(diag(solve(nlmfit$hessian)))
# showing estimates and corresponding se-estimates
round(cbind(beta_est,beta_se),5)

### Testing slope beta1=0
# LRT
lrt=2*(logLik(glm(d~age,family=binomial))-logLik(glm(d~1,family=binomial)))[[1]]
c(lrt,1-pchisq(lrt,1))
# Wald
wald=(beta_est[2]/beta_se[2])^2
c(wald,1-pchisq(wald,1))
# CI
c(beta_est[2]-1.96*beta_se[2],beta_est[2]+1.96*beta_se[2])

### Illustration full likelihood for binary logistic regression
flogll=function(beta0,beta1){
n=length(d)
p=exp(beta0+beta1*age)/(1+exp(beta0+beta1*age))
sum(d*log(p))+sum((1-d)*log(1-p))
}

beta0grid=seq(-0.10,0.10,by=0.001)
beta1grid=seq(0.17,0.2,by=0.001)
flogllgrid=matrix(nrow=length(beta0grid),ncol=length(beta1grid))
for (i in (1:length(beta0grid))){
for (j in (1:length(beta1grid))){
flogllgrid[i,j]=flogll(beta0grid[i],beta1grid[j])
}}

#### FIGURE B.2
contour(beta0grid,beta1grid,flogllgrid,nlevels=20,xlab="beta0",ylab="beta1")
abline(v=fit1$coeff[1])
abline(h=fit1$coeff[2])

# a first candidate model: linear in dose with logit link
logitfit1=glm(d~age,family=binomial)
print(c(AIC(logitfit1),-2*logLik(logitfit1)[1]+2*2))

# a second candidate model: quadratic in dose with logit link
logitfit2=glm(d~age+I(age^2),family=binomial)
print(c(AIC(logitfit2),-2*logLik(logitfit2)[1]+2*3))
summary(logitfit2)

# a third candidate model: linear in dose with probit link
probitfit=glm(d~age,family=binomial(link="probit"))
print(c(AIC(probitfit),-2*logLik(probitfit)[1]+2*2))


#####################################
#                                   #
#   B.2 Generalized Linear Models   #
#                                   #
#####################################

### a simulation program is provided in the book

###################################################
#                                                 #
#  B.3 Profile Likelihood and Other Likelihoods   #
#                                                 #
###################################################

### Illustration profile likelihood for binary logistic regression 
### using GLM with an offset (continuous from previous code)                                        

flogll(fit1$coeff[1],fit1$coeff[2])

# profile confidence intervals

confint(fit1, level=.95)

plogll=function(beta1){
fit=glm(d~1, offset=beta1*age,family=binomial)
return(logLik(fit))
}

betagrid=seq(-0.2,0.5,by=0.01)
plogllgrid=rep(NA,length(betagrid))
for (i in (1:length(betagrid))) plogllgrid[i]=plogll(betagrid[i])
plot(betagrid,plogllgrid,xlab="beta1",ylab="profile log-likelihood",type="n")
lines(betagrid,plogllgrid)
abline(v=beta1)

### Illustration profile likelihood for binary logistic regression
### using the optimize function

# defining the full log-likelihood
flogll=function(beta0,beta1){
n=length(d)
p=exp(beta0+beta1*age)/(1+exp(beta0+beta1*age))
sum(d*log(p))+sum((1-d)*log(1-p))
}
# defining the profile -(log-likelihood) for fixed beta1f
plogll=function(beta1f){
plogllh=function(beta0) -flogll(beta0,beta1f)
 optimize(plogllh,beta0,lower=-10,upper=10)$objective
}

# minimizing the profile -(log-likelihood) for beta1f
optimize(plogll,beta1f,lower=-0.2,upper=0.5)

# plotting the profile likelihood curve
betagrid=seq(0.13,0.25,by=0.001)
plogllgrid=rep(NA,length(betagrid))
for (i in (1:length(betagrid))) plogllgrid[i]=-plogll(betagrid[i])
plot(betagrid,plogllgrid,xlab="beta1",ylab="profile log-likelihood",type="n")
lines(betagrid,plogllgrid)
fit=glm(d~age,family=binomial)
hatbeta1=fit$coef[2]
# plotting the estimate as a vertical dotted line
abline(v=hatbeta1,lty=3)

# computing the profile confidence interval for beta1f
cithreshold=logLik(fit)[1]-qchisq(0.95,1)
abline(h=cithreshold)

# profile confidence intervals using the confint function
confint.glm(fit,level=.95)
# for comparison, Wald type confidence intervals
confint.default(fit,level=.95)

profileci=function(beta1) -plogll(beta1)-cithreshold
pcil=uniroot(profileci,c(hatbeta1,hatbeta1+0.5))$root
pcir=uniroot(profileci,c(hatbeta1,hatbeta1-0.5))$root
# plotting the confidence limits as dashed vertical lines
abline(v=pcil,lty=2)
abline(v=pcir,lty=2)

##########################
#                        #
#   B.4 The Bootstrap    #
#                        #
##########################
# nonparametric bootstrap
library(boot)

# for the mean antibody level
meanstat=function(data,indices){
 data=data[indices] 
 mean(data) 
}
mub=boot(z,meanstat,R=999)
boot.ci(mub,type = c("norm","basic","perc"))

# for the prevalence
pib=boot(d,meanstat,R=999)
boot.ci(pib,type = c("norm","basic","perc"))

# for the effect of age
slope=function(data,indices){
 data=data[indices,] 
 d=data[,2]
 age=data[,1]
 fit=glm(d~age,family=binomial) 
 fit$coef[2]
}
data=data.frame(cbind(age,d))
slopeb=boot(data,slope,R=999)
boot.ci(slopeb,type = c("norm","basic","perc"))

# parametric bootstrap for the mean antibody level

# Function to generate normal data; mle will contain 
# the mean and standard deviation of the original data
z.rg1=function(data,mle){
out=data
out=rnorm(length(out),mle[[1]],mle[[2]])
out
}

mub=boot(z,meanstat,sim="parametric",ran.gen=z.rg1,mle=list(mn=mean(z),sd=sqrt(var(z))) ,R=999)
boot.ci(mub,type = c("norm","basic","perc"))

# Function to generate uniform data, obviously a bad choice!
z.rg2=function(data,mle){
out=data
out=runif(length(out),0,mle)
out
}

mub=boot(z,meanstat,sim="parametric",ran.gen=z.rg2,mle=mean(z),R=999)
boot.ci(mub,type = c("norm","basic","perc"))

################################
#                              #
#   B.5 Bayesian Methodology   #
#                              #
################################
library(rjags)
library(R2jags)

model.file="C:/book/appendix/Bayeslogistic.txt"

beta0i=mean(d)
beta1i=0
inits=function(){list(beta0=beta0i,beta1=beta1i)}
N=length(d)
data=list("d","age","N")
parameters=c("beta0","beta1")

date()
set.seed(1234)
jagsfit <- jags(data=data, n.chains=2, inits=inits, parameters, n.burnin=5000,
    n.iter=10000, n.thin=5, n.sims = 2000, model.file=model.file)
date()
print(jagsfit)
plot(jagsfit)
traceplot(jagsfit)
attributes(jagsfit)

jagsfit$BUGSoutput$summary
dim(jagsfit$BUGSoutput$sims.matrix)
plot(jagsfit$BUGSoutput$sims.matrix[,2],type="l")








