#################################################
#################################################
#								#
#                CHAPTER 5: 				#
#    Estimating the force of infection from	#
#            incidence and prevalence		#
#								#
# last update: 25/08/2012				#
#################################################
#################################################

setwd("C:/book/chapter5.r")

#################################################
## Section 5.1: Serological data 		 	#
##								#
#################################################


### Discussing serological data and modelling issues on the Parvo Data
### Samples with antibody activity levels >24 U/ml were considered positive, 
### samples with antibody activity levels<20 were considered negative 
### and samples in between were considered equivocal


### Reading the parvo data, excluding the first 6 months
data<-read.table("C:/book/chapter4/VZV-b19-BE.dat",header=T)
subset<-(data$age>0.5)&(!is.na(data$age))&(!is.na(data$parvores))
data<-data[subset,]
z<-log(data$parvouml[order(data$age)])
y<-data$parvores[order(data$age)]
a<-data$age[order(data$age)]
s<-data$sex[order(data$age)]

### FIGURE 5.1 (left panel)
### PLOT OF ANTIBODY LEVELS AS FUNCTION OF AGE, WITH THRESHOLD LINES
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=2,mgp=c(3, 0.5, 0))

plot(a,z,main="",xlab="age",ylab="log(antibody levels)",xlim=c(0,70))
abline(h=log(20))
abline(h=log(24))


### FIGURE 5.1 (right panel)
### HISTOGRAM OF ANTIBODYLEVELS, WITH THRESHOLD VALUES
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=2,mgp=c(3, 0.5, 0))

hist(z,freq=F,nclass=sqrt(length(z)),main="",xlab="log(antibody levels)",ylab="relative frequency")
abline(v=log(20))
abline(v=log(24))

### FIGURE 5.2 (left panel)
### DICHOTOMIZED DATA, AS A FUNCTION OF AGE
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=2,mgp=c(3, 0.5, 0))

plot(a,jitter(y,factor=0.1),main="",xlab="age",ylab="seroprevalence",xlim=c(0,70))

### FIGURE 5.2 (right panel)
### GROUPED PROPORTIONS POSITIVE, AS A FUNCTION OF AGE
grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))

plot(grid,pos/tot,cex=0.02*tot,pch=19,main="",xlab="age",ylab="seroprevalence",xlim=c(0,70),ylim=c(-0.1,1))


#################################################
## Section 5.2: Modelling Issues 		 	#
##								#
#################################################


### Reading the parvo data including the first 6 months
data<-read.table("C:/book/chapter4/VZV-b19-BE.dat",header=T)
subset<-(!is.na(data$age))&(!is.na(data$parvores))
data<-data[subset,]
z<-log(data$parvouml[order(data$age)])
y<-data$parvores[order(data$age)]
a<-data$age[order(data$age)]
s<-data$sex[order(data$age)]

zs=z[a>0.5]
ys=y[a>0.5]
as=a[a>0.5]

### FIGURE 5.3
### Scatterplot using proportions over 1-year age-categories
grid<-sort(unique(round(as)))
neg<-table(ys,round(as))[1,]
pos<-table(ys,round(as))[2,]
tot<-neg+pos
#par(mfrow=c(1,1),cex.axis=1.2,cex.lab=1.2,lwd=2,las=1,mgp=c(3, 0.5, 0))
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))

plot(grid,pos/tot,cex=0.02*tot,pch=19,main="",xlab="age",ylab="seroprevalence",xlim=c(0,70),ylim=c(-0.1,1))

zm=z[a<=0.5]
ym=y[a<=0.5]
am=a[a<=0.5]

pos<-sum(ym)
tot<-length(ym)

points(0.25,pos/tot,cex=0.02*tot,pch=19)
points(0.25,pos/tot,cex=0.5*tot,pch=21)

library(locfit)

alpha=c(0,6)
lpfit<-locfit(y~a,family="binomial",alpha=alpha)
lpfit
lines(a,fitted(lpfit),lty=1,lwd=2)
# local fit force of infection based on the derivative (on the logit scale)
lpfitd1<-locfit(y~a,deriv=1,family="binomial",alpha=alpha)
lpfoi=fitted(lpfitd1)*fitted(lpfit)
lines(a,lpfoi,lty=1,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
abline(h=0)
mtext(side=4,"force of infection", las=3,line=1.6)

#################################################
## Section 5.3: Incidence data 		 	#
##								#
#################################################

data<-read.table("c:/book/chapter4/incidencedataHepB.txt",header=T)
data<-data[!data$year==1993,]
data$age[data$age<1]<-1
attach(data)


data1999<-read.table("data1999.txt",header=T)
pop<-read.table("population St Petersburg.txt",header=T)



# CONVERSION ASYMPTOMATIC CASES - INFECTED CASES
sympto.f<-function(age,infect)
{
return((4.4+10.1*log10(age))*infect/100)
}

infect.f<-function(age,symp)
{
return(100/(4.4+10.1*log10(age))*symp)
}


###  HEPATITIS B: AGE-SPECIFIC INCIDENCE IN TIME, PER AGE GROUP

library(mgcv)


inf<-infect.f(age,rep(1,length(age)))
t<-sort(unique(data$year))
date.year<-(as.numeric(format(as.Date(data$date,"%d/%m/%Y"), "%Y"))-1993)
date.month<-as.numeric(format(as.Date(data$date,"%d/%m/%Y"), "%m"))
time<-date.month+(date.year-1)*12

data$agegr[data$age<7]<-1
data$agegr[data$age>=7 & data$age<15]<-2
data$agegr[data$age>=15 & data$age<20]<-3
data$agegr[data$age>=20 & data$age<30]<-4
data$agegr[data$age>=30 & data$age<40]<-5
data$agegr[data$age>=40 & data$age<50]<-6
data$agegr[data$age>=50 & data$age<60]<-7
data$agegr[data$age>=60]<-8

pop$agegr[pop$age<7]<-1
pop$agegr[pop$age>=7 & pop$age<15]<-2
pop$agegr[pop$age>=15 & pop$age<20]<-3
pop$agegr[pop$age>=20 & pop$age<30]<-4
pop$agegr[pop$age>=30 & pop$age<40]<-5
pop$agegr[pop$age>=40 & pop$age<50]<-6
pop$agegr[pop$age>=50 & pop$age<60]<-7
pop$agegr[pop$age>=60]<-8

### FIGURE 5.4
layout(matrix(c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8), 4,4, byrow = TRUE))
par(lwd=2,las=1,cex.axis=1.1,cex.lab=1.1,mgp=c(2.2, 0.5, 0),mar=c(4.1,4.1,2.8,2.1))


for (i in 1:8)
{
freq<-rep(0,60)
denom<-rep(0,60)
for (j in 1:60) {
freq[j]<-sum(inf[data$agegr==i & time==j])
yr<-1994+ trunc((j-1)/12)
denom[j]<-sum(pop$WOMEN[pop$agegr==i & pop$year==yr])+sum(pop$MEN[pop$agegr==i & pop$year==yr])
}
plot(1:60,freq/denom*100000,type="n",cex=0.02,xlab="Time (in months from January 1999)",ylab="Incidence")
lines(1:60,freq/denom*100000,type="b",lty=2)

x<-1:60
y<-freq/denom
fit.gam<-gam(y~s(x,bs="cr"))
lines(x,predict(fit.gam)*100000,lty=1)

if (i==1)  {title("Age: 1-6")}
if (i==2)  {title("Age: 7-14")}
if (i==3)  {title("Age: 15-19")}
if (i==4)  {title("Age: 20-29")}
if (i==5)  {title("Age: 30-39")}
if (i==6)  {title("Age: 40-49")}
if (i==7)  {title("Age: 50-59")}
if (i==8)  {title("Age: 60+")}
}



###  CALCULATE THE FORCE OF INFECTION USING POPULATION PER YEAR
###  FIGURE 5.5

t<-sort(unique(data$year))

layout(matrix(c(1,1,2,2,3,3,0,4,4,5,5,0), 2,6, byrow = TRUE))
#par(cex.axis=1.2,cex.lab=1.2,lwd=2,las=1,mgp=c(3, 0.5, 0))
par(lwd=2,las=1,cex.axis=1.2,cex.lab=1.1,mgp=c(2.8, 0.5, 0))

for (j in 1:6)
{
datayear<-data[data$year==t[j],]
popyear<-pop[pop$year==t[j],]
inf<-infect.f(age,rep(1,length(age)))

a<-1:90
psusp<-c(0.8,0.85,0.9,0.95,1)
for (k in 1:5)
{
p0<-psusp[k]
Xa<-sapply(1:length(a),function(x) sum(inf[datayear$age<=x & datayear$age>(x-1)]))
Na<-(popyear[,2]+popyear[,3])[2:length(a)+1]

Sa<-NULL
Sa[1]<-p0*Na[1]

for (i in 2:length(a))
{
Sa[i]<-(Sa[i-1]-Xa[i-1])*Na[i]/Na[i-1]
}

y<-Xa/Sa
fit.gam<-gam(y~s(a,bs="cs"))

if (k==1) {
l<-length(psusp)+1-k
plot(a[!is.na(y)],predict(fit.gam,type="response"),type="l",xlab="age",ylab="force of infection",lty=5)
}
else {
lty.x=6-k
lines(a[!is.na(y)],predict(fit.gam,type="response"),lty=lty.x)
}
title(t[j])
}
}


##########################
##  SEROPREVALENCE DATA ##
##########################
rm(list=ls(all=TRUE))
library(mgcv)
data<-read.table("c:/book/chapter4/seroprevalencedataHepB.txt",header=T)
attach(data)


foi.num<-function(x,p)
{
grid<-sort(unique(x))
pgrid<-(p[order(x)])[duplicated(sort(x))==F]
dp<-diff(pgrid)/diff(grid)
foi<-approx((grid[-1]+grid[-length(grid)])/2,dp,grid[c(-1,-length(grid))])$y/(1-pgrid[c(-1,-length(grid))])
return(list(grid=grid[c(-1,-length(grid))],foi=foi))
}


a<-rep(age,tot)
y<-NULL
for (i in 1:length(age))
{
if ((pos[i]*(tot[i]-pos[i]))>0)
	y<-c(y,rep(1,pos[i]),rep(0,tot[i]-pos[i]))
if (tot[i]>0 & pos[i]==0)
	y<-c(y,rep(0,tot[i]))
if (tot[i]>0 & ((tot[i]-pos[i])==0))
	y<-c(y,rep(1,tot[i]))
}
sex<-c(rep(1,499),rep(2,501))
cbind(a,y,sex)

### FIGURE 5.6
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(5.1,3.1,4.1,3.5))


fit.gam.logit.cp<-gam(y~s(a,bs="tp"),family=binomial(link="logit"))
plot(age,pos/tot,cex=0.05*tot,pch=19,xlab="age",ylab="sero-prevalence",xlim=c(0,80),ylim=c(-0.1,1.0))
lines(sort(a),predict(fit.gam.logit.cp,type="response")[sort(a,index.return=TRUE)$ix],lty=1,lwd=2)
h1<-foi.num(a,predict(fit.gam.logit.cp,type="response"))
lines(h1$grid,h1$foi*20,lty=2,lwd=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4),labels=c(0.0,0.005,0.01,0.015,0.02))
mtext(side=4,"force of infection",las=3,line=2.5)
