#################################################
#################################################
#								#
#                CHAPTER 4: 				#
#        Data sources for modeling	            #
#		infectious diseases			#
#								#
# last update: 21/08/2012				#
#################################################
#################################################

setwd("C:/book/chapter4.r")

###########
# HAV-BE
###########
hav<-read.table("HAV-BE.dat",header=T)

grid<-hav$Age
pos<-hav$Neg
tot<-hav$Tot

### FIGURE 4.1 (left panel)
par(mfrow=c(1,1),cex.axis=1.2,cex.lab=1.2,lwd=3,las=1,mgp=c(3, 0.5, 0))
plot(grid,pos/tot,cex=0.022*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,86),ylim=c(0,1))


#################
# HAV - FLANDERS
#################
data<-read.table("hepatitis1993-2002.dat",header=T)
attach(data)
y<-status
a<-age
x<-birth.cohort
ts=a+x

### Seroprevalence plot 2002
resp2<-y[ts==2002]
age2<-a[ts==2002]
pos2<-table(age2,resp2)[,2]
neg2<-table(age2,resp2)[,1]
tot2<-neg2+pos2

### FIGURE 4.1 (right  panel)
plot(unique(age2),pos2/tot2,cex=0.02*tot2,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,86),ylim=c(0,1))

###########
# HAV -BUL
###########
hav<-read.table("HAV-BUL.dat",header=T)

grid<-hav$Age
pos<-hav$Pos
tot<-hav$Tot

### FIGURE 4.1 (lower panel)
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
plot(grid,pos/tot,cex=0.05*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,86),ylim=c(0,1))


#########################
# Hepatitis B in Russia
#########################
rm(list=ls(all=TRUE))
data<-read.table("seroprevalencedataHepB.txt",header=T)
attach(data)

agegrid<-trunc(age/1)*1
agegrid[age>40]<-trunc(age[age>40]/5)*5
grid<-sort(unique(agegrid))
posgrid<-NULL;totgrid<-NULL
for (i in 1:length(grid)) 
  { 
    posgrid[i]<-sum(pos[agegrid==grid[i]]) 
    totgrid[i]<-sum(tot[agegrid==grid[i]])
  }

### FIGURE 4.2
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))
plot(grid,posgrid/totgrid,cex=0.03*totgrid,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,72))



################
# Hepatitis C
################
 
hcv.dat<-read.table('hcvdat.txt', header=FALSE, 
                           na.strings=".", dec=".",  strip.white=TRUE)

dim(hcv.dat)


### FIGURE 4.3
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))

plot(hcv.dat$V1,hcv.dat$V4,cex=0.07*hcv.dat$V2,pch=16,xlab="age",ylab="seroprevalence",xlim=c(0,25),ylim=c(0,1))


###########
# Mumps
###########
mumps<-read.table("MUMPSUK.dat",header=T)

grid<-mumps$age
neg<-mumps$neg
pos<-mumps$pos
tot<-mumps$ntot

### FIGURE 4.4 (left panel)
windows(record=TRUE, width=7, height=3.5)
par(mfrow=c(1,3),cex.axis=1.3,cex.lab=1.5,lwd=3,las=1,mgp=c(3, 0.5, 0))

plot(grid,pos/tot,cex=0.005*tot,pch=16,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

###########
# Rubella
###########
rubella<-read.table("rubella-UK.dat",header=T)

grid<-rubella$Age
neg<-rubella$Neg
pos<-rubella$Pos
tot<-neg+pos

### FIGURE 4.4 (middle panel)
plot(grid,pos/tot,cex=0.01*tot,pch=16,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))


################
# Rubella-Mumps
################
rubmumps<-read.table("Rubella-Mumps-UK.dat",header=T)

grid<-rubmumps$Age
pos<-rubmumps$PP
tot<-rubmumps$PP+rubmumps$NP+rubmumps$PN+rubmumps$NN

### FIGURE 4.4 (right panel)
plot(grid,pos/tot,cex=0.01*tot,pch=16,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

#########################
# Parvo B19 - countries
#########################
b19<-read.table("B19-countries.dat",header=T)

b19<-b19[!is.na(b19$parvores),]
b19<-b19[!is.na(b19$age),]

country<-sort(unique(b19$country))
grid<-NULL
pos<-NULL
tot<-NULL
for (j in 1:length(country))
{
  datab19<-b19[b19$country==country[j],]
  grid[[j]]<-sort(unique(round(datab19$age)))
  pos[[j]]<-rep(0,length(grid[[j]]))
  tot[[j]]<-rep(0,length(grid[[j]]))
  for (i in 1:length(grid[[j]])) 
  { 
    pos[[j]][i]<-sum(datab19$parvores[round(datab19$age)==grid[[j]][i]]) 
    tot[[j]][i]<-length(datab19$parvores[round(datab19$age)==grid[[j]][i]]) 
  }
}

### FIGURE 4.5 BE
layout(matrix(c(0,1,1,2,2,0,3,3,4,4,5,5), 2,6, byrow = TRUE))
par(lwd=2,las=1,cex.axis=1.1,cex.lab=1.1,mgp=c(3, 0.5, 0))

plot(grid[[1]],pos[[1]]/tot[[1]],cex=0.02*tot[[1]],pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,82),ylim=c(0,1))

### FIGURE 4.5 EW
plot(grid[[2]],pos[[2]]/tot[[2]],cex=0.02*tot[[1]],pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,82),ylim=c(0,1))

### FIGURE 4.5 FI
plot(grid[[3]],pos[[3]]/tot[[3]],cex=0.02*tot[[1]],pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,82),ylim=c(0,1))  

### FIGURE 4.5 IT
plot(grid[[4]],pos[[4]]/tot[[4]],cex=0.02*tot[[1]],pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,82),ylim=c(0,1))

### FIGURE 4.5 PL
plot(grid[[5]],pos[[5]]/tot[[5]],cex=0.02*tot[[1]],pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,82),ylim=c(0,1))  


################
# Tuberculosis
################

TB<-read.table("tb.dat",header=T)

grid<-TB$AGE
pos<-TB$PPD
tot<-TB$N

### FIGURE 4.6 (left panel)
windows(record=TRUE, width=6.65, height=3.5)
par(mfrow=c(1,2),lwd=2,las=1,cex.axis=0.8,cex.lab=0.8,mgp=c(2, 0.45, 0),
mar=c(3.2, 2.8, 3.5, 1.5))

plot(grid,pos/tot,pch=19,cex=0.00002*tot,xlab="age",ylab="prevalence",xlim=c(6,18))

grid<-TB$BRTHYR+1900
pos<-TB$PPD
tot<-TB$N

### FIGURE 4.6 (right panel)
plot(grid,pos/tot,pch=19,cex=0.00002*tot,xlab="year",ylab="prevalence")


################
# VZV-Flanders
################

VZV<-read.table("VZV-Flanders.dat",header=T)

grid<-VZV$age
pos<-VZV$pos
tot<-VZV$ntot

### FIGURE 4.7 (left panel)
windows(record=TRUE, width=6.65, height=3.5)
par(mfrow=c(1,2),lwd=2,las=1,cex.axis=0.8,cex.lab=0.8,mgp=c(2, 0.45, 0),
mar=c(3.2, 2.8, 3.5, 1.5))

plot(grid,pos/tot,cex=0.015*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

###########
# VZV-BE
###########

parvovirus<-read.table("VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$VZVres)
parvovirus<-parvovirus[subset,]
y<-parvovirus$VZVres[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]

# Scatterplot using proportions over 1-year age-categories
grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

### FIGURE 4.7 (right panel)
plot(grid,pos/tot,cex=0.015*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))




############################
# VARICELLA AND PARVOVIRUS
############################

parvoB19<-read.table("VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$VZVres)
parvovirus<-parvovirus[subset,]
parvovirus<-parvovirus[!is.na(parvovirus$parvores),]
VZV<-parvovirus$VZVres[order(parvovirus$age)]
B19<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]

PP<-rep(0,length(VZV))
PP[VZV==1 & B19==1]<-1
NP<-rep(0,length(VZV))
NP[VZV==0 & B19==1]<-1
PN<-rep(0,length(VZV))
PN[VZV==1 & B19==0]<-1
NN<-rep(0,length(VZV))
NN[VZV==0 & B19==0]<-1


# Scatterplot using proportions over 1-year age-categories
grid<-sort(unique(round(a)))
PPgrid<-table(PP,round(a))[2,]
NPgrid<-table(NP,round(a))[2,]
PNgrid<-table(PN,round(a))[2,]
NNgrid<-table(NN,round(a))[2,]

tot<-PPgrid+NPgrid+PNgrid+NNgrid

### FIGURE 4.8 (top left panel)
layout(matrix(c(1,1,2,2,3,3,4,4), 2,4, byrow = TRUE))
par(lwd=2,las=1,cex.axis=1.1,cex.lab=1.1,mgp=c(3, 0.5, 0))

plot(grid,PPgrid/tot,cex=0.03*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

### FIGURE 4.8 (top right panel)
plot(grid,NPgrid/tot,cex=0.03*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

### FIGURE 4.8 (lower left panel)
plot(grid,PNgrid/tot,cex=0.03*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))

### FIGURE 4.8 (lower right panel)
plot(grid,NNgrid/tot,cex=0.03*tot,pch=19,xlab="age",ylab="seroprevalence",xlim=c(0,45),ylim=c(0,1))


#######################
##   INCIDENCE DATA  
#######################

data<-read.table("incidencedataHepB.txt",header=T)
data1999<-read.table("data1999.txt",header=T)
pop<-read.table("population St Petersburg.txt",header=T)

data<-data[!data$year==1993,]

data$age[data$age<1]<-1

attach(data)

# CONVERSION ASYMPTOMATIC CASES - INFECTED CASES
sympto.f<-function(age,infect)
{
return((4.4+10.1*log10(age))*infect/100)
}

infect.f<-function(age,symp)
{
return(100/(4.4+10.1*log10(age))*symp)
}

infect.f.old<-function(age,symp)
{
inf<-rep(0,length(age))
inf[age==1]<-23.52941
inf[age==2]<-12.34568
inf[age==3]<-8.064516
inf[age==4]<-6.17284
inf[age==5]<-5.076142
inf[age==6]<-4.504505
inf[age==7]<-4.048583
inf[age==8]<-3.770739
inf[age==9]<-3.521127
inf[age==10]<-3.311258
inf[age==11]<-3.144654
inf[age==12]<-3.030303
inf[age==13]<-2.941176
inf[age==14]<-2.881844
inf[age>=15]<-2.857143

return(inf*symp)
}


# PLOT CONVERSION ASYMPTOMATIC CASES - INFECTED CASES
# FIGURE 4.10
windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(3, 0.5, 0))

plot(1:100,infect.f(1:100,1),xlab="age",ylab="Proportion Infected / Susceptible",type="n")
lines(1:100,infect.f(1:100,1),type="l",lwd=2)

sympto.f(1:100,1)
infect.f.old(1:100,1)


# SUMMARY STATISTICS

t<-sort(unique(year))
inf<-infect.f(age,rep(1,length(age)))

for (i in 1:5)
{
print(c("year","cases","age male", "age female", "ratio"))

cases.symp<-nrow(data[year==t[i],])
agemale.symp<-mean(age[gender==1 & year==t[i]])
agefemale.symp<-mean(age[gender==2 & year==t[i]])
ratiomf<-nrow(data[year==t[i] &gender==1,])/nrow(data[year==t[i] &gender==2,])
print(c(t[i],cases.symp,agemale.symp,agefemale.symp,ratiomf))

cases.inf<-sum(inf[year==t[i]])
agemale.inf<-sum(inf[gender==1 & year==t[i]]*age[gender==1 & year==t[i]])/(sum(inf[gender==1 & year==t[i]]))
agefemale.inf<-sum(inf[gender==2 & year==t[i]]*age[gender==2 & year==t[i]])/(sum(inf[gender==2 & year==t[i]]))
ratiomf<-sum(inf[year==t[i] &gender==1])/sum(inf[year==t[i] &gender==2])
print(c(t[i],cases.inf,agemale.inf,agefemale.inf,ratiomf))
}

print(c("year","cases","age male", "age female", "ratio"))

cases.symp<-sum(data1999$men)+sum(data1999$women)
agemale.symp<-sum(data1999$age*data1999$men)/sum(data1999$men)
agefemale.symp<-sum(data1999$age*data1999$women)/sum(data1999$women)
ratiomf<-sum(data1999$men)/sum(data1999$women)
print(c(t[i],cases.symp,agemale.symp,agefemale.symp,ratiomf))

inf1999.men<-infect.f(data1999$age,data1999$men)
inf1999.women<-infect.f(data1999$age,data1999$women)
cases.inf<-sum(inf1999.men)+sum(inf1999.women)
agemale.inf<-sum(data1999$age*inf1999.men)/sum(inf1999.men)
agefemale.inf<-sum(data1999$age*inf1999.women)/sum(inf1999.women)
ratiomf<-sum(inf1999.men)/sum(inf1999.women)
print(c(t[i],cases.inf,agemale.inf,agefemale.inf,ratiomf))



### PLOT THE SYMPTOMATIC & INFECTED CASES (PER 1,000,000 POPULATION)
t<-sort(unique(data$year))

### FIGURE 4.9
layout(matrix(c(1,1,2,2,3,3,0,4,4,5,5,0), 2,6, byrow = TRUE))
par(lwd=2,las=1,cex.axis=1.2,cex.lab=1.2)
for (i in 1:5)
{
popyear<-pop[pop$year==t[i],]
datayear<-data[year==t[i],]

# asymptomatic cases
f1<-as.data.frame(table(datayear$age[datayear$gender==1]))
f1.age<-as.numeric(levels(f1$Var1)[as.integer(f1$Var1)])
plot(f1.age,f1$Freq/popyear[f1.age,3]*100000,type="n",cex=0.02,ylim=c(0,500),xlab="age",ylab="Incidence Symptomatic Cases")
lines(f1.age,f1$Freq/popyear[f1.age,3]*100000)

f1<-as.data.frame(table(datayear$age[datayear$gender==2]))
f1.age<-as.numeric(levels(f1$Var1)[as.integer(f1$Var1)])
lines(f1.age,f1$Freq/popyear[f1.age,2]*100000,lty=2)
title(t[i])
}


### FIGURE 4.11
layout(matrix(c(1,1,2,2,3,3,0,4,4,5,5,0), 2,6, byrow = TRUE))
par(las=1,cex.axis=1.2,cex.lab=1.2,lwd=2)
for (i in 1:5)
{
popyear<-pop[pop$year==t[i],]
datayear<-data[year==t[i],]

# infected cases
f1<-as.data.frame(table(datayear$age[datayear$gender==1]))
f1.age<-as.numeric(levels(f1$Var1)[as.integer(f1$Var1)])
freq<-infect.f(f1.age,f1$Freq)
plot(f1.age,freq/popyear[f1.age,3]*100000,type="n",cex=0.02,,ylim=c(0,3000),,xlab="age",ylab="Incidence Infected Cases")
lines(f1.age,freq/popyear[f1.age,3]*100000)

f1<-as.data.frame(table(datayear$age[datayear$gender==2]))
f1.age<-as.numeric(levels(f1$Var1)[as.integer(f1$Var1)])
freq<-infect.f(f1.age,f1$Freq)
lines(f1.age,freq/popyear[f1.age,2]*100000,lty=2)
title(t[i])
}
