#################################################
#################################################
#								#
#                CHAPTER 8: 				#
#    Semi-Parametric Approaches to Model		#
#     Prevalence and Force of Infection		#
#								#
# last update: 25/08/2012				#
#################################################
#################################################

setwd("c:/book/chapter8")

##################################################
## Section 8.1: Semi-parametric approaches       #
##								 #
##################################################

b19<-read.table("c:/book/chapter4/B19-countries.dat",header=T)

b19<-b19[!is.na(b19$parvores),]
b19<-b19[!is.na(b19$age),]

country<-sort(unique(b19$country))
grid<-NULL
pos<-NULL
tot<-NULL
for (j in 1:length(country))
{
  datab19<-b19[b19$country==country[j],]
  grid[[j]]<-sort(unique(round(datab19$age)))
  pos[[j]]<-rep(0,length(grid[[j]]))
  tot[[j]]<-rep(0,length(grid[[j]]))
  for (i in 1:length(grid[[j]])) 
  { 
    pos[[j]][i]<-sum(datab19$parvores[round(datab19$age)==grid[[j]][i]]) 
    tot[[j]][i]<-length(datab19$parvores[round(datab19$age)==grid[[j]][i]]) 
  }
}

### FIGURE 8.1
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
plot(grid[[1]],pos[[1]]/tot[[1]],cex=0.017*tot[[1]],pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,82),ylim=c(0,1))



##################################################
## Section 8.2: Penalized Splines                #
##								 #
##################################################


### Basisfunction for Semiparametric Regression
### Explaining how a spline is built up

parvovirus<-read.table("c:/book/chapter4/VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<40)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]

### Select degree and number of knots to show
deg<-2
knr<-3

### library Semipar: Ruppert et al. (2003)
library(SemiPar)
f<-function(x,y){return((x-y)^deg*(x>y))}
expit<-function(x){return(exp(x)/(1+exp(x)))}


spmfit<-spm(y~f(a,degree=deg,basis="trunc.poly",knots=default.knots(a,knr)),family="binomial")
uktrbasis<-outer(a,default.knots(a,knr),f)*t(matrix(rep(spmfit$fit$coefficients$random$dummy.group.vec.Handan,length(a)),ncol=length(a)))

### FIGURE 8.2
par(mfrow=c(2,2),lwd=2,las=1,cex.axis=1,cex.lab=1.1,mgp=c(2,0.5, 0),mar=c(3.1,3.4,3.1,3))

plot(range(a),range(outer(a,default.knots(a,knr),f)),type="n",xlab="age",ylab=expression(paste((age-kappa[k])["+"]^2)))
for (i in 1:knr){lines(a,outer(a,default.knots(a,knr),f)[,i],lty=i+1)}
points(default.knots(a,knr),rep(0,knr),pch=2)
plot(range(a),range(uktrbasis),type="n",xlab="age",ylab=expression(paste(u[k](age-kappa[k])["+"]^2)))
for (i in 1:knr){lines(a,uktrbasis[,i],lty=i+1)}
points(default.knots(a,knr),rep(0,knr),pch=2)
semparpart<-apply(uktrbasis,1,sum)
lines(a,semparpart,lwd=2,lty=1)
fixed<-(cbind(1,a,a^2,a^3)[,1:(1+deg)])%*%spmfit$fit$coefficients$fixed
#plot(a,(fixed+semparpart),ylim=c(-10,10),xlab="age",type="l",ylab="fixed and random linear predictor parts",lwd=2)
plot(a,(fixed+semparpart),ylim=c(-10,10),xlab="age",type="l",ylab=expression(eta(age)),lwd=2)
lines(a,(semparpart),lty=3,lwd=2)
lines(a,(fixed),lty=2,lwd=2)
#plot(a,expit(fixed+semparpart),ylim=c(0,1),xlab="age",type="l",lwd=2,ylab="fixed and random parts on prevalence scale")
plot(a,expit(fixed+semparpart),ylim=c(0,1),xlab="age",type="l",lwd=2,ylab=expression(pi(age)))
lines(a,expit(semparpart),lty=3,lwd=2)
lines(a,expit(fixed),lty=2,lwd=2)



##################################################
## Section 8.2: Penalized Splines               ##
##								##
## 8.2.1 Penalized likelihood framework         ##
##################################################

### Data parvovirus B19
parvovirus<-read.table("c:/book/chapter4/VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]
s<-parvovirus$sex[order(parvovirus$age)]

grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

### Functions
foi.num<-function(x,p)
{
grid<-sort(unique(x))
pgrid<-(p[order(x)])[duplicated(sort(x))==F]
dp<-diff(pgrid)/diff(grid)
foi<-approx((grid[-1]+grid[-length(grid)])/2,dp,grid[c(-1,-length(grid))])$y/(1-pgrid[c(-1,-length(grid))])
return(list(grid=grid[c(-1,-length(grid))],foi=foi))
}

##### Hastie and Tibshirani (use BIC to select the smoothing parameter) ######

BICf<-function(fit){return(fit$deviance+log(length(fit$y))*(fit$nl.df+2))}

# If the library 'mgcv' has been loaded before, it should be detached before using the 'gam'-library
# detach(package:mgcv) 
library(gam)
out<-matrix(NA,ncol=3,nrow=100)
for (i in 1:100){
dfi<-1+0.5*i
fit.gam.logit.ht<-gam(y~s(a,df=dfi),family=binomial(link="logit"))
fit.gam.cloglog.ht<-gam(y~s(a,df=dfi),family=binomial(link="cloglog"))
out[i,]<-c(dfi,BICf(fit.gam.logit.ht),BICf(fit.gam.cloglog.ht))
}
fit.gam.logit.ht<-gam(y~s(a,df=out[which.min(out[,2]),1]),family=binomial(link="logit"))
fit.gam.cloglog.ht<-gam(y~s(a,df=out[which.min(out[,3]),1]),family=binomial(link="cloglog"))

####### Eilers and Marx (1996) ########

library(splines)
source("GLM functions Eilers and Marx.R")
AICme<-function(fit){return(fit$dev+2*(fit$eff.df))}
BICme<-function(fit){return(fit$dev+log((dim(fit$summary.predicted)[1]))*(fit$eff.df))}

#out<-matrix(NA,ncol=3,nrow=400)
#for (i in 1:400){
#lambda<-0+0.05*(i-1)
#me.fit<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",family="binomial",lambda=lambda,x.predicted=a)
#out[i,]<-c(lambda,me.fit$aic,BICme(me.fit))
#}

#me.fit<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",family="binomial",lambda=out[which.min(out[,2]),1],x.predicted=a)
#me.fit<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",family="binomial",lambda=out[which.min(out[,3]),1],x.predicted=a)

#me.fit.BIC<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",family="binomial",lambda=out[which.min(out[,3]),1],x.predicted=a)
#BICme(me.fit.BIC)

### cloglog and logit link
me.fit.BIC.logit<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",family="binomial",lambda=80,x.predicted=a)
me.fit.BIC.cloglog<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="cloglog",family="binomial",lambda=200,x.predicted=a)

###### Wood (2006) #######
# If package 'gam' has been loaded before, it should be detached before using 'mgcv'

detach(package:gam)
library(mgcv)

BICWood<-function(fit){return(fit$deviance+log(length(fit$y))*sum(fit$edf))}

##### Thin plate splines (default gam option)  ######

fit.gam.logit.tps<-gam(y~s(a,bs="tp"),family=binomial(link="logit"))
fit.gam.cloglog.tps<-gam(y~s(a,bs="tp"),family=binomial(link="cloglog"))

##### Cubic regression splines  ######

fit.gam.logit.cr<-gam(y~s(a,bs="cr"),family=binomial(link="logit"))
fit.gam.cloglog.cr<-gam(y~s(a,bs="cr"),family=binomial(link="cloglog"))

### FIGURE 8.3
par(mfrow=c(2,2),lwd=2,las=1,cex.axis=1,cex.lab=1.1,mgp=c(2,0.5, 0),mar=c(3.1,3.1,3.1,3))

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="sero-prevalence",xlim=c(0,72),ylim=c(-0.1,1))
lines(a,fit.gam.logit.ht$fitted.values,lty=1,lwd=2,col=1)
lines(a,fit.gam.cloglog.ht$fitted.values,lty=2,lwd=2,col=1)
h1<-foi.num(a,fit.gam.logit.ht$fitted.values)
lines(h1$grid,h1$foi,lty=1,lwd=2)
h2<-foi.num(a,fit.gam.cloglog.ht$fitted.values)
lines(h2$grid,h2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="sero-prevalence",xlim=c(0,72),ylim=c(-0.1,1))
lines(a,me.fit.BIC.logit$summary.predicted[,3],lty=1,lwd=2,col=1)
lines(a,me.fit.BIC.cloglog$summary.predicted[,3],lty=2,lwd=2,col=1)
h1<-foi.num(a,me.fit.BIC.logit$summary.predicted[,3])
lines(h1$grid,h1$foi,lty=1,lwd=2)
h2<-foi.num(a,me.fit.BIC.cloglog$summary.predicted[,3])
lines(h2$grid,h2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="sero-prevalence",xlim=c(0,72),ylim=c(-0.1,1))
lines(a,predict(fit.gam.logit.cr,type="response"),lty=1,lwd=2,col=1)
lines(a,predict(fit.gam.cloglog.cr,type="response"),lty=2,lwd=2,col=1)
h1<-foi.num(a,predict(fit.gam.logit.cr,type="response"))
lines(h1$grid,h1$foi,lty=1,lwd=2)
h2<-foi.num(a,predict(fit.gam.cloglog.cr,type="response"))
lines(h2$grid,h2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="sero-prevalence",xlim=c(0,72),ylim=c(-0.1,1))
lines(a,predict(fit.gam.logit.tps,type="response"),lty=1,lwd=2)
lines(a,predict(fit.gam.cloglog.tps,type="response"),lty=2,lwd=2)
h1<-foi.num(a,predict(fit.gam.logit.tps,type="response"))
lines(h1$grid,h1$foi,lty=1,lwd=2)
h2<-foi.num(a,predict(fit.gam.cloglog.tps,type="response"))
lines(h2$grid,h2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

### Table 8.1

BICf(fit.gam.logit.ht.bic)
BICf(fit.gam.cloglog.ht.bic)
BICme(me.fit.BIC.logit)
BICme(me.fit.BIC.cloglog)
BICWood(fit.gam.logit.cr)
BICWood(fit.gam.cloglog.cr)
BICWood(fit.gam.logit.tps)
BICWood(fit.gam.cloglog.tps)



###########################################################
## Section 8.2: Penalized Splines                        ##
##								         ##
## 8.2.2 Generalized Linear Mixed Model Framework        ##
###########################################################

detach(package:gam)
library(mgcv)

#### Results from a GLMM with a radial basis as fitted in SAS Proc glimmix
glmm.m1<-read.table("glmm_model5.txt",header=T)
glmm.m1<-glmm.m1[order(glmm.m1$age),]

####  Thin plate splines ######
fit.gamm.logit.tps<-gamm(y~s(a,bs="tp"),family=binomial(link="logit"))

#### Cubic regression splines  #####
fit.gamm.logit.cr<-gamm(y~s(a,bs="cr"),family=binomial(link="logit"))

#### FIGURE 8.4
dev.off()
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=2,mgp=c(2, 0.5, 0),mar=c(3.1,3.1,3.1,3))

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(-0.1,1))
lines(glmm.m1$age[glmm.m1$country=="be"],glmm.m1$mu[glmm.m1$country=="be"],lty=1,lwd=2)
lines(a,predict(fit.gamm.logit.tps$gam,type="response"),lty=2,lwd=2)
lines(a,predict(fit.gamm.logit.cr$gam,type="response"),lty=3,lwd=2)

glmm.foi2.country1<-foi.num(glmm.m1$age[glmm.m1$country=="be"],glmm.m1$mu[glmm.m1$country=="be"])
lines(glmm.foi2.country1$grid,glmm.foi2.country1$foi,lty=1,lwd=2)
h1<-foi.num(a,predict(fit.gamm.logit.tps$gam,type="response"))
lines(h1$grid,h1$foi,lty=2,lwd=2)
h2<-foi.num(a,predict(fit.gamm.logit.cr$gam,type="response"))
lines(h2$grid,h2$foi,lty=3,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5)




###########################################################
## Section 8.3: Covariate Effects                        ##
##								         ##
###########################################################

####  HYPOTHETICAL  EXAMPLE

####  FIGURE 8.5
par(mfrow=c(2,2),lwd=2,las=1,cex.axis=1,cex.lab=1.1,mgp=c(2,0.5,0),mar=c(3.1,3.1,3.1,2))

# Top left panel
a<-seq(0,70,1)
eta1<- -2+ 0.1*a + sin(0.1*a)
p1<-exp(eta1)/(1+exp(eta1))
eta2<- -3+ 0.1*a + sin(0.1*a)
p2<-exp(eta2)/(1+exp(eta2))
plot(a,p1,type="l",ylim=c(0,1),xlab="age",ylab="prevalence")
lines(a,p2,lty=2)
 
# Top right panel
a<-seq(0,70,1)
eta1<- -2+ 0.1*a + sin(0.1*a)
p1<-exp(eta1)/(1+exp(eta1))
eta2<- -3+ 0.08*a + sin(0.1*a)
p2<-exp(eta2)/(1+exp(eta2))
plot(a,p1,type="l",ylim=c(0,1),xlab="age",ylab="prevalence")
lines(a,p2,lty=2)

# Bottom left panel
a<-seq(0,70,1)
eta1<- -2+ 0.1*a + sin(0.1*a)
p1<-exp(eta1)/(1+exp(eta1))
eta2<- -3+ 0.08*a + sin(0.1*a+1.5)
p2<-exp(eta2)/(1+exp(eta2))
plot(a,p1,type="l",ylim=c(0,1),xlab="age",ylab="prevalence")
lines(a,p2,lty=2)

# Bottom right panel
a<-seq(0,70,1)
eta1<- -2+ 0.1*a + sin(0.1*a)
p1<-exp(eta1)/(1+exp(eta1))
eta2<- -3+ 0.08*a + 0.2*sin(0.1*a+1.5)
p2<-exp(eta2)/(1+exp(eta2))
plot(a,p1,type="l",ylim=c(0,1),xlab="age",ylab="prevalence")
lines(a,p2,lty=2)


####  B19 EXAMPLE

countrydata<-read.table("c:/book/chapter4/B19-countries.dat",header=T,sep="\t")
countrydata1<-countrydata[(countrydata$country=="be")|(countrydata$country=="it"),]
yc<-countrydata1$parvores
ac<-countrydata1$age
country<-as.numeric(countrydata1$country=="be")
n<-length(yc)

#### FIGURE 8.6
# Models fitted with SAS proc glimmix
glmm.m2<-read.table("glmm_model2.txt",header=T)
glmm.m2<-glmm.m2[order(glmm.m2$age),]
glmm.m3<-read.table("glmm_model3.txt",header=T)
glmm.m3<-glmm.m3[order(glmm.m3$age),]
glmm.m4<-read.table("glmm_model4.txt",header=T)
glmm.m4<-glmm.m4[order(glmm.m4$age),]
glmm.m5<-read.table("glmm_model5.txt",header=T)
glmm.m5<-glmm.m5[order(glmm.m5$age),]


par(mfrow=c(2,2),lwd=2,las=1,cex.axis=1,cex.lab=1.1,mgp=c(2,0.5, 0),mar=c(3.1,3.4,3.1,3))

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,80),ylim=c(-0.1,1),type="n")
lines(glmm.m2$age[glmm.m2$country=="be"],glmm.m2$mu[glmm.m2$country=="be"],lty=1,lwd=2)
lines(glmm.m2$age[glmm.m2$country=="it"],glmm.m2$mu[glmm.m2$country=="it"],lty=2,lwd=2)
glmm.foi2.country1<-foi.num(glmm.m2$age[glmm.m2$country=="be"],glmm.m2$mu[glmm.m2$country=="be"])
glmm.foi2.country2<-foi.num(glmm.m2$age[glmm.m2$country=="it"],glmm.m2$mu[glmm.m2$country=="it"])
lines(glmm.foi2.country1$grid,glmm.foi2.country1$foi,lty=1,lwd=2)
lines(glmm.foi2.country2$grid,glmm.foi2.country2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,80),ylim=c(-0.1,1),type="n")
lines(glmm.m3$age[glmm.m3$country=="be"],glmm.m3$mu[glmm.m3$country=="be"],lty=1,lwd=2)
lines(glmm.m3$age[glmm.m3$country=="it"],glmm.m3$mu[glmm.m3$country=="it"],lty=2,lwd=2)
glmm.foi3.country1<-foi.num(glmm.m3$age[glmm.m3$country=="be"],glmm.m3$mu[glmm.m3$country=="be"])
glmm.foi3.country2<-foi.num(glmm.m3$age[glmm.m3$country=="it"],glmm.m3$mu[glmm.m3$country=="it"])
lines(glmm.foi3.country1$grid,glmm.foi3.country1$foi,lty=1,lwd=2)
lines(glmm.foi3.country2$grid,glmm.foi3.country2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,80),ylim=c(-0.1,1),type="n")
lines(glmm.m4$age[glmm.m4$country=="be"],glmm.m4$mu[glmm.m4$country=="be"],lty=1,lwd=2)
lines(glmm.m4$age[glmm.m4$country=="it"],glmm.m4$mu[glmm.m4$country=="it"],lty=2,lwd=2)
glmm.foi4.country1<-foi.num(glmm.m4$age[glmm.m4$country=="be"],glmm.m4$mu[glmm.m4$country=="be"])
glmm.foi4.country2<-foi.num(glmm.m4$age[glmm.m4$country=="it"],glmm.m4$mu[glmm.m4$country=="it"])
lines(glmm.foi4.country1$grid,glmm.foi4.country1$foi,lty=1,lwd=2)
lines(glmm.foi4.country2$grid,glmm.foi4.country2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,80),ylim=c(-0.1,1),type="n")
lines(glmm.m5$age[glmm.m5$country=="be"],glmm.m5$mu[glmm.m5$country=="be"],lty=1,lwd=2)
lines(glmm.m5$age[glmm.m5$country=="it"],glmm.m5$mu[glmm.m5$country=="it"],lty=2,lwd=2)
glmm.foi5.country1<-foi.num(glmm.m5$age[glmm.m5$country=="be"],glmm.m5$mu[glmm.m5$country=="be"])
glmm.foi5.country2<-foi.num(glmm.m5$age[glmm.m5$country=="it"],glmm.m5$mu[glmm.m5$country=="it"])
lines(glmm.foi5.country1$grid,glmm.foi5.country1$foi,lty=1,lwd=2)
lines(glmm.foi5.country2$grid,glmm.foi5.country2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5,cex=0.9)

# R-code to fit the interaction of a spline of age with country
detach(package:gam)
library(mgcv)
# Model 1
fit.gam.cov1<-gam(yc~country+s(ac,bs="tp"),family=binomial(link="logit"),data=countrydata1)
# Model 2
fit.gam.cov2<-gam(yc~country+country*ac+s(ac,bs="tp"),family=binomial(link="logit"),data=countrydata1)
# Model 4
countrydata1$country1<-as.numeric(countrydata1$country=="it")
countrydata1$country2<-as.numeric(countrydata1$country=="be")
fit.gam.cov4<-gam(yc~s(ac,bs="tp",by=country1)+s(ac,bs="tp",by=country2),family=binomial(link="logit"),data=countrydata1)

n<-length(yc)
BICWood(fit.gam.cov1)
BICWood(fit.gam.cov2)
BICWood(fit.gam.cov4)

###########################################################
## Section 8.4: Adaptive spline smoothing                ##
##								         ##
###########################################################

### Data parvovirus B19
parvovirus<-read.table("c:/book/chapter4/VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]
s<-parvovirus$sex[order(parvovirus$age)]

grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

library(AdaptFit)
kn.mean<-default.knots(a,20)
kn.var<-default.knots(a,5)
fit.nonadapt<-asp(y~f(a,knots=kn.mean),adap=F,family="binomial",tol=1e-6,niter=1000,niter.var=100000,omit.missing=T,spar.method="REML")
fit.adapt<-asp(y~f(a,knots=kn.mean,var.knot=kn.var),family="binomial",tol=1e-3,niter=1000,niter.var=100000,omit.missing=T,spar.method="REML")
pi.adapt<-fit.adapt$fitted
pi.nonadapt<-fit.nonadapt$fitted

### FIGURE 8.7
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=2,mgp=c(2, 0.5, 0),mar=c(3.1,3.1,3.1,3))

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="sero-prevalence",xlim=c(0,72),ylim=c(-0.1,1))
lines(a,pi.adapt,lty=1,lwd=2)
lines(a,pi.nonadapt,lty=2,lwd=2)
h1<-foi.num(a,pi.adapt)
lines(h1$grid,h1$foi,lty=1,lwd=2)
h2<-foi.num(a,pi.nonadapt)
lines(h2$grid,h2$foi,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",line=1.4,las=3)


###################################################################
## Section 8.6: Non-, Semi- and Parametric Methods to            ##
##		    Estimate the prevalence and force of infection   ##
##		    a case study                                     ##
###################################################################

### Data: Rubella

rubella<-read.table("c:/book/chapter4/Rubella-UK.dat",header=T)
age<-c(rep(rubella$Age,rubella$Pos),rep(rubella$Age,rubella$Neg))
resp<-c(rep(1,sum(rubella$Pos)),rep(0,sum(rubella$Neg)))
y<-resp[order(age)]
a<-age[order(age)]
attach(rubella)
grid<-Age
pos<-Pos
tot<-Pos+Neg

### Fit of best models
### For full exercise see program: chapter8_casestudy.r

### Nonlinear 1
loglik1<-function(theta){
alpha<-abs(theta[1])
beta<-abs(theta[2])
gamma<-abs(theta[3])
p<-1-exp((alpha/beta)*a*exp(-beta*a)+1/beta*(alpha/beta-gamma)*(exp(-beta*a)-1)-gamma*a)
return(-sum(y*log(p)+(1-y)*(log(1-p))))
}
result<-abs(nlminb(c(0.1,0.1,0.1),loglik1)$par)
alphaopt<-result[1]
betaopt<-result[2]
gammaopt<-result[3]
p1<-function(a){1-exp((alphaopt/betaopt)*a*exp(-betaopt*a)+1/betaopt*(alphaopt/betaopt-gammaopt)*(exp(-betaopt*a)-1)-gammaopt*a)}
# BIC-value
2*nlminb(c(0.1,0.1,0.1),loglik1)$objective+log(length(y))*3

### Nonlinear 2
loglik2<-function(theta){
alpha<-abs(theta[1])
beta<-abs(theta[2])
p<-1-exp((alpha/beta)*a*exp(-beta*a)+1/beta*(alpha/beta)*(exp(-beta*a)-1))
return(-sum(y*log(p)+(1-y)*(log(1-p))))
}
result<-abs(nlminb(c(0.1,0.1),loglik2)$par)
alphaopt<-result[1]
betaopt<-result[2]
p2<-function(a){1-exp((alphaopt/betaopt)*a*exp(-betaopt*a)+1/betaopt*(alphaopt/betaopt)*(exp(-betaopt*a)-1))}
# BIC-value
2*nlminb(c(0.1,0.1),loglik2)$objective+log(length(y))*2


### The best fractional polynomial (-0.9,-0.9)
p<-c(-0.9,-0.9)
ifelse(p[1]!=0,a1<-a^p[1],a1<-log(a))
ifelse(p[2]==p[1],a2<-a1*log(a),ifelse(p[2]!=0,a2<-a^p[2],a2<-log(a)))
fit<-glm(y~a1+a2,family="binomial")
print("BIC of the best FP")
print(AIC(fit,k=log(length(y))))

### The best local polynomial fit of degree 2
library(locfit)
BIClf<-function(lpfit){return(-2*(sum(y*log(fitted(lpfit))+(1-y)*log(1-fitted(lpfit))))-log(length(a))*as.vector(aic(lpfit,pen=1)[4]-aic(lpfit,pen=2)[4]))}
alpha=0.8
lpfit<-locfit(y~a,family="binomial",alpha=alpha)
lpfitd1<-locfit(y~a,deriv=1,family="binomial",alpha=alpha)
lpfoi=fitted(lpfitd1)*fitted(lpfit)
BIClf(lpfit)

### The best spline (penalized likelihood framework)
library(gam)
source("GLM functions Eilers and Marx.R")
BICme<-function(fit){return(fit$dev+log((dim(fit$summary.predicted)[1]))*(fit$eff.df))}
me.fit.BIC.logit<-pspline.fit(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",family="binomial",lambda=80,x.predicted=a)
BICme(me.fit.BIC.logit)

### FIGURE 8.8
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=2,mgp=c(2, 0.5, 0),mar=c(3.1,3.1,3.1,3))

grid<-Age
pos<-Pos
tot<-Pos+Neg

plot(grid,pos/tot,cex=0.015*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

### Farrington MODEL
lines(a,p2(a),lty=4,lwd=2)#,col=3)
lines(foi.num(a,p2(a))$grid,foi.num(a,p2(a))$foi,lty=4,lwd=2)#,col=3)

### Fractional Polynomial fit
pi.fv<-fit$fitted.values
lines(a,pi.fv,lwd=2)
lines(foi.num(a,pi.fv)$grid,foi.num(a,pi.fv)$foi,lwd=2)

### Local fit
lines(a,fitted(lpfit),lty=2,lwd=2)
lines(a,lpfoi,lty=2,lwd=2)

### B-spline
lines(a,me.fit.BIC.logit$summary.predicted[,3],lty=3,lwd=2)
h1<-foi.num(a,me.fit.BIC.logit$summary.predicted[,3])
lines(h1$grid,h1$foi,lty=3,lwd=2)

# Extra layout
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.5)
