#################################################
#################################################
#						#
#                CHAPTER 13: 			#
#    Estimating Age-Time Dependent Prevalence	#
#   and Force of Infection from Serial 		#
#		Prevalence Data			#
#						#
# last update: 26/08/2012			#
#################################################
#################################################

rm(list=ls(all=TRUE))
setwd("c:/book/chapter13")

##### DEFINE FUNCTIONS
library(mgcv)

pavit<- function(pos=pos,tot=rep(1,length(pos)))
{
	gi<- pos/tot
	pai1 <- pai2 <- gi
	N <- length(pai1)
	ni<-tot
	for(i in 1:(N - 1)) {
		if(pai2[i] > pai2[i + 1]) {
			pool <- (ni[i]*pai1[i] + ni[i+1]*pai1[i + 1])/(ni[i]+ni[i+1])
			pai2[i:(i + 1)] <- pool
			k <- i + 1
			for(j in (k - 1):1) {
				if(pai2[j] > pai2[k]) {
				  pool.2 <- sum(ni[j:k]*pai1[j:k])/(sum(ni[j:k]))
				  pai2[j:k] <- pool.2
				}
			}
		}
	}
	return(list(pai1=pai1,pai2=pai2))
}


foi.num<-function(x,p)
{
grid<-sort(unique(x))
pgrid<-(p[order(x)])[duplicated(sort(x))==F]
dp<-diff(pgrid)/diff(grid)
foi<-approx((grid[-1]+grid[-length(grid)])/2,dp,grid[c(-1,-length(grid))])$y/(1-pgrid[c(-1,-length(grid))])
return(list(grid=grid[c(-1,-length(grid))],foi=foi))
}

##################################################
## Section 13.1: Introduction 		       #
##								 #
##################################################

### READ IN DATA
tbdata=read.table("c:/book/chapter4/tb.dat",header=T)
attach(tbdata)
a=AGE
x=BRTHYR-min(BRTHYR)
xf=as.factor(BRTHYR)
g=SEX
ts=a+BRTHYR
s=PPD
p=s/N
f=N-s

### FIGURE 13.1
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
plot(a,ts,pch=16,xlab="age",ylab="year")


##################################################
## Section 13.2: Proportional Hazards Model      #
##								 #
## 13.2.3 Application to Tuberculosis Data	 #
##################################################

y=cbind(s,f)
summary(f)
summary(s/f)

agrid=seq(range(unique(a))[1],range(unique(a))[2],length.out=100)

# Model 6:
gamfit6=gam(y~s(a)+x*g,family=binomial(link="cloglog"))

### FIGURE 13.2 (left panel)
### main="prevalence contours for model 6 (females)"
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
vis.gam(gamfit6,view=c("a","x"),cond=list(g=0),labcex=0.8,type="response",plot.type="contour",color="bw",too.far=0.15,xlab="age", ylab="(shifted) year of birth",main=" ") 
points(a[g==0],x[g==0],cex=100*p[g==0],lwd=2)

### FIGURE 13.2 (right panel)
### main="prevalence contours for model 6 (males)"
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
vis.gam(gamfit6,view=c("a","x"),cond=list(g=1),labcex=0.8,type="response",plot.type="contour",color="bw",too.far=0.15,xlab="age", ylab="(shifted) year of birth",main=" ") 
points(a[g==1],x[g==1],cex=100*p[g==1],lwd=2)

### Table 13.1
# More models are fitted here
# s(a,b) is used but te(a,b) is a better starting point (omitted here)
#----------------------------------------------------------------------
gamfit=gam(y~a+x,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~a+x+g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~a*x+g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),sum(gamfit$edf),sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+x,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+x+g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+x*g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+s(x),family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+s(x)+g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+s(x)+s(x,by=g)+g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a)+s(x)+s(a,by=g)+s(x,by=g)+g,family=binomial(link="cloglog"))
cbind(AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a,x),family=binomial(link="cloglog"))
c(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a) + s(x) + s(a,x),family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a,x)+g,family=binomial(link="cloglog"))
cbind(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))

gamfit=gam(y~s(a,x)+g+s(a,x,by=g),family=binomial(link="cloglog"))
c(sum(gamfit$edf),AIC(gamfit),AIC(gamfit, k = log(nrow(y))))


##################################################
## Section 13.4: The tuberculosis Data Revisited #
##								 #
################################################

# Model 12:
gamfit12=gam(y~s(a,x)+g+s(a,x,by=g),family=binomial(link="cloglog"))

### FIGURE 13.3 (left panel)
### main="prevalence contours for model 12 (females)"
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
vis.gam(gamfit12,view=c("a","x"),cond=list(g=0),labcex=0.8,type="response",plot.type="contour",color="bw",too.far=0.15,xlab="age", ylab="(shifted) year of birth",main=" ") 
points(a[g==0],x[g==0],cex=100*p[g==0],lwd=2)

### FIGURE 13.3 (right panel)
### main="prevalence contours for model 12 (males)"
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
vis.gam(gamfit12,view=c("a","x"),cond=list(g=1),labcex=0.8,type="response",plot.type="contour",color="bw",too.far=0.15,xlab="age", ylab="(shifted) year of birth",main=" ") 
points(a[g==1],x[g==1],cex=100*p[g==1],lwd=2)


#### FIGURE 13.4
# the fitted prevalence curves as shown in the book
gamfit=gam(y~s(a,x)+g+s(a,x,by=g),family=binomial(link="cloglog"))
par(mfrow=c(3,3),lwd=2,las=1,cex.axis=1.1,cex.lab=1.3,mar=c(4.1,4.1,2.1,0.5),mgp=c(2.8,0.5,0))
for (i in sort(unique(ts))){ 
year=1900+i
plot(a[(ts==i)&(g==0)],p[(ts==i)&(g==0)],xlim=c(6,18),cex=1.4,pch=16,xlab="age",ylab="prevalence",main=year,ylim=c(0,max(p[(ts==66)&(g==1)])))
points(a[(ts==i)&(g==1)],p[(ts==i)&(g==1)],xlim=c(6,18),cex=1.4,pch=1,lwd=2)
lines(a[(ts==i)&(g==0)],fitted(gamfit,type="response")[(ts==i)&(g==0)])
lines(a[(ts==i)&(g==1)],fitted(gamfit,type="response")[(ts==i)&(g==1)],lty=2)
abline(h=fitted(gamfit,type="response")[(ts==i)&(g==0)][a[(ts==i)&(g==0)]==12],col="grey")
abline(h=fitted(gamfit,type="response")[(ts==i)&(g==1)][a[(ts==i)&(g==1)]==12],col="grey",lty=2)
}


##### FIGURE 13.5 en 13.6
par(mfrow=c(3,3),lwd=2,las=1,cex.axis=1.1,cex.lab=1.3,mar=c(4.1,4.1,2.1,0.5),mgp=c(2.7,0.5,0))
teller=1
for (xind in sort(unique(x))){
newd <- data.frame(a=agrid,x=xind,g=0)
ypred=pavit(predict.gam(gamfit,newdata=newd,type="response"))
agridfoi=foi.num(agrid,ypred$pai2)$grid
ypredfoi=foi.num(agrid,ypred$pai2)$foi
plot(a[(x==xind)&(g==0)],p[(x==xind)&(g==0)],xlim=range(unique(a)),ylim=c(0,max(p)),xlab="age",ylab="prevalence",main=1900+min(BRTHYR)+xind,pch=16,cex=1.4)
lines(agrid,ypred$pai2)
lines(agridfoi,ypredfoi)
newd <- data.frame(a=agrid,x=xind,g=1)
ypred=pavit(predict.gam(gamfit,newdata=newd,type="response"))
agridfoi=foi.num(agrid,ypred$pai2)$grid
ypredfoi=foi.num(agrid,ypred$pai2)$foi
points(a[(x==xind)&(g==1)],p[(x==xind)&(g==1)],xlim=range(unique(a)),ylim=c(0,max(p)),cex=1.4,pch=1)
lines(agrid,ypred$pai2,lty=2)
lines(agridfoi,ypredfoi,lty=2)
teller=teller+1
if (teller==10) {
win.graph()
par(mfrow=c(3,3),lwd=2,las=1,cex.axis=1.1,cex.lab=1.3,mar=c(4.1,4.1,2.1,0.5),mgp=c(2.7,0.5,0))
}
}


##################################################
## Section 13.5: Another Example: Hepatitis A    #
##								 #
##################################################


# Reading in data from Flanders only
#------------------------------------
data<-read.table("c:/book/chapter4/hepatitis1993-2002.dat",header=T)
attach(data)
y<-status
a<-age
x<-birth.cohort
ts=a+x

# Seroprevalence plot
resp1<-y[ts==1993]
age1<-a[ts==1993]
pos1<-table(age1,resp1)[,2]
neg1<-table(age1,resp1)[,1]
tot1<-neg1+pos1
plot(unique(age1),pos1/tot1,cex=0.02*tot1,ylab="prevalence",xlab="age",xlim=c(0,100))
title("HAV 1993")

resp2<-y[ts==2002]
age2<-a[ts==2002]
pos2<-table(age2,resp2)[,2]
neg2<-table(age2,resp2)[,1]
tot2<-neg2+pos2
plot(unique(age2),pos2/tot2,cex=0.02*tot2,ylab="prevalence",xlab="age",xlim=c(0,100))
title("HAV 2002")



# Analyses
#----------
library(mgcv)

x=x-min(x)
xf=as.factor(x)

gamfit1=gam(y~a+x,family=binomial(link="cloglog"))
c(sum(gamfit1$edf),AIC(gamfit1),AIC(gamfit1, k = log(length(y))))

gamfit2=gam(y~s(a)+x,family=binomial(link="cloglog"))
c(sum(gamfit2$edf),AIC(gamfit2),AIC(gamfit2, k = log(length(y))))

gamfit3=gam(y~a+s(x),family=binomial(link="cloglog"))
c(sum(gamfit3$edf),AIC(gamfit3),AIC(gamfit3, k = log(length(y))))

# best fitting model
gamfit4=gam(y~s(a)+s(x),family=binomial(link="cloglog"))
c(sum(gamfit4$edf),AIC(gamfit4),AIC(gamfit4, k = log(length(y))))

gamfit5=gam(y~s(a,x),family=binomial(link="cloglog"))
c(sum(gamfit5$edf),AIC(gamfit5),AIC(gamfit5, k = log(length(y))))

# Testing for the proportional hazards assumption
anova(gamfit4,gamfit5,test="Chisq")

### FIGURE 13.7
par(mfrow=c(2,1),lwd=3,las=1,cex.axis=1.1,cex.lab=1.1,mgp=c(3, 0.5, 0))
for (i in unique(ts)){ 
x1<-a[ts==i]
pos1<-table(a[ts==i],y[ts==i])[,2]
neg1<-table(a[ts==i],y[ts==i])[,1]
tot1<-neg1+pos1
plot(unique(x1),pos1/tot1,cex=0.02*tot1,ylab="prevalence",xlab="age",xlim=c(0,80),main=i,lwd=2)
lines(a[(ts==i)],fitted(gamfit2,type="response")[(ts==i)],lty=2,lwd=3)
lines(a[(ts==i)],fitted(gamfit4,type="response")[(ts==i)],lwd=3)
lines(a[(ts==i)],fitted(gamfit5,type="response")[(ts==i)],lty=3,lwd=3)
legend(60,0.54,c("model 2","model 4","model 5" ),lty=c(2,1,3),
lwd=c(2,2,3))
}


### FIGURE 13.8 (left panel)
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
agrid=seq(range(unique(a))[1],range(unique(a))[2],length.out=100)

x1<-age1
pos1<-table(age1,resp1)[,2]
neg1<-table(age1,resp1)[,1]
tot1<-neg1+pos1
plot(unique(x1),pos1/tot1,cex=0.02*tot1,ylab="prevalence",xlab="age",xlim=c(0,80),type="n")
points(unique(x1),pos1/tot1,cex=0.02*tot1,ylab="prevalence",xlab="age",xlim=c(0,80))

x2<-age2
pos2<-table(age2,resp2)[,2]
neg2<-table(age2,resp2)[,1]
tot2<-neg2+pos2
points(unique(x2),pos2/tot2,cex=0.02*tot2,ylab="prevalence",xlab="age",xlim=c(0,100),col="darkgrey")

gamfitplot=gamfit4

for (xsel in c(10,20,30,40,50,60,70,80)){
agel=1993-1914-xsel
ager=2002-1914-xsel
agesel=(agrid>=agel)&(agrid<=ager)
newd <- data.frame(a=agrid,x=xsel)
ypred=predict.gam(gamfitplot,newdata=newd,type="terms")
con=ypred[1,2]+attributes(ypred)$constant
s1=ypred[,1]+con
pi0=1-exp(-exp(s1))
pi0m=pavit(1-exp(-exp(s1)))$pai2
lines(agrid[agesel],pi0m[agesel],lwd=2)
lines(agrid[!agesel],pi0m[!agesel],lty=3)
}

### FIGURE 13.8 (right panel)
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))

plot(c(min(agrid),max(agrid),min(agrid),max(agrid)),c(0,0.1,0,0.1),type="n",xlab="age",ylab="force of infection")
for (xsel in seq(0,80,by=10)){
agel=1993-1914-xsel
ager=2002-1914-xsel
agesel=(agrid>=agel)&(agrid<=ager)
newd <- data.frame(a=agrid,x=xsel)
ypred=predict.gam(gamfitplot,newdata=newd,type="terms")
con=ypred[1,2]+attributes(ypred)$constant
s1=ypred[,1]+con
pi0=1-exp(-exp(s1))
pi0m=pavit(1-exp(-exp(s1)))$pai2
agridfoi=foi.num(agrid,pi0m)$grid
ypredfoi=foi.num(agrid,pi0m)$foi
if (xsel==0) lines(agridfoi,ypredfoi,col="darkgrey",lwd=3)
if (xsel!=0) {
lines(agridfoi[agesel],ypredfoi[agesel],lwd=2)
lines(agridfoi[!agesel],ypredfoi[!agesel],lty=3)
}
}

### FIGURE 13.9
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))

plot(a,x,xlab="age",ylab="year of birth")

##################################################
## Section 13.6: Monotonicity                    #
##								 #
##################################################

# This section is based on an additive monotone pspline routine
# The code for the routine is available together with this program
# The code for Figure 13.10 is available from the authors

