#################################################
#################################################
#								#
#                CHAPTER 9: 				#
#    The constraint of monotonicity 		#
#								#
# last update: 25/08/2012				#
#################################################
#################################################
rm(list=ls(all=TRUE))

setwd("c:/book/chapter9")

### DEFINING SOME FUNCTIONS 

# Function expit
#----------------
expit<-function(x){return(exp(x)/(1+exp(x)))}

# Function pavit
#----------------
# The pool adjacent violator algorithm in R
# datai represents the ordered fitted values
pavit<- function(pos=pos,tot=rep(1,length(pos)))
{
	gi<- pos/tot
	pai1 <- pai2 <- gi
	N <- length(pai1)
	ni<-tot
	for(i in 1:(N - 1)) {
		if(pai2[i] > pai2[i + 1]) {
			pool <- (ni[i]*pai1[i] + ni[i+1]*pai1[i + 1])/(ni[i]+ni[i+1])
			pai2[i:(i + 1)] <- pool
			k <- i + 1
			for(j in (k - 1):1) {
				if(pai2[j] > pai2[k]) {
				  pool.2 <- sum(ni[j:k]*pai1[j:k])/(sum(ni[j:k]))
				  pai2[j:k] <- pool.2
				}
			}
		}
	}
	return(list(pai1=pai1,pai2=pai2))
}

# Numerical Approximation of the FOI
# Based on age and estimated seroprevalence
#-------------------------------------------
foi.num<-function(x,p)
{
grid<-sort(unique(x))
pgrid<-(p[order(x)])[duplicated(sort(x))==F]
dp<-diff(pgrid)/diff(grid)
foi<-approx((grid[-1]+grid[-length(grid)])/2,dp,grid[c(-1,-length(grid))])$y/(1-pgrid[c(-1,-length(grid))])
return(list(grid=grid[c(-1,-length(grid))],foi=foi))
}

# Piecewise constant FOI
#------------------------
pcwfoi<-function(response,x.var,breaks){
hulp1<-function(beta1)
{
		n<-length(response)
		int<-rep(0,length(x.var))
		integrand<-rep(0,length(x.var))
		for (i in 1:(length(breaks)-1))
		{
		int<-int+((breaks[i+1]-breaks[i])*beta1[i]*(x.var>=breaks[i+1])+(x.var-breaks[i])*beta1[i]*(x.var<breaks[i+1])*(x.var>breaks[i]))
		integrand<-integrand+beta1[i]*(x.var<breaks[i+1])*(x.var>=breaks[i])
		}
		lambda<-integrand
		clambda<-int
		pihat<-1-exp(-clambda)
		pli<-rep(0,n)
		pli[response==0]<-(1-pihat)[response==0]
		pli[response==1]<-pihat[response==1]
		lpli<-log(pli+1e-6)
	return(list(pli=-sum(lpli),x.var=x.var,prob=pihat,foi=lambda))
}
hulp2<-function(beta){return(hulp1(beta)$pli)}
result.nlm<-nlm(f=hulp2,p=rep(0.1,length(breaks)-1),iterlim=500,print.level=0)
prev<-hulp1(result.nlm$estimate)$prob
foi<-hulp1(result.nlm$estimate)$foi
dev<-2*hulp1(result.nlm$estimate)$pli
k<-length(breaks)-1
aic<-dev+2*k
bic<-dev+log(length(response))*k
return(list(prev=prev,foi=foi,lambda.vec=result.nlm$estimate,x.var=x.var,aic=aic,bic=bic,dev=dev))
}

##################################################
## Section 9.2: Piecewise constant Forces of     #
##		    Infection   				 #
##################################################

### HAV DATA 
################

HAV<-read.table("c:/book/chapter4/HAV-BUL.dat",header=T)
a<-c(rep(HAV$Age,HAV$Pos),rep(HAV$Age,HAV$Tot-HAV$Pos))
y<-c(rep(rep(1,length(HAV$Age)),HAV$Pos),rep(rep(0,length(HAV$Age)),HAV$Tot-HAV$Pos))
y<-y[order(a)]
a<-a[order(a)]
grid<-sort(HAV$Age)
neg<-HAV$Tot-HAV$Pos
pos<-HAV$Pos
tot<-neg+pos

### Piecewise constant FOI
### FIGURE 9.1 (left panel)

breaks1<-c(0,10,20,40,60,100)
pcwfit1<-pcwfoi(response=y,x.var=a,breaks=breaks1) # monotone curve
pcwfit1$lambda.vec
breaks2<-c(0,5,11,18,60,100)
pcwfit2<-pcwfoi(response=y,x.var=a,breaks=breaks2) # non-monotone curve
pcwfit2$lambda.vec

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))

plot(grid,pos/tot,cex=0.05*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(-0.1,1),lwd=2)
lines(pcwfit1$x.var,pcwfit1$prev)
lines(pcwfit2$x.var,pcwfit2$prev,lty=2)
for (i in 1:(length(breaks1)-1)){lines(c(breaks1[i],breaks1[i+1]),c(pcwfit1$lambda.vec[i],pcwfit1$lambda.vec[i]))}
for (i in 2:(length(breaks1)-1)){lines(c(breaks1[i],breaks1[i]),c(pcwfit1$lambda.vec[i-1],pcwfit1$lambda.vec[i]),lty=3)}
for (i in 1:(length(breaks2)-1)){lines(c(breaks2[i],breaks2[i+1]),c(pcwfit2$lambda.vec[i],pcwfit2$lambda.vec[i]),lty=2)}
for (i in 2:(length(breaks2)-1)){lines(c(breaks2[i],breaks2[i]),c(pcwfit2$lambda.vec[i-1],pcwfit2$lambda.vec[i]),lty=3)}
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)
pcwfit1$dev
pcwfit2$dev

### B19 DATA
################

parvovirus<-read.table("c:/book/chapter4/VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]
grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

### Piecewise constant FOI
### FIGURE 9.1 (right panel)

breaks1<-c(0,10,20,40,60,100)
pcwfit1<-pcwfoi(response=y,x.var=a,breaks=breaks1) # monotone curve
breaks2<-c(0,5,11,18,60,100)
pcwfit2<-pcwfoi(response=y,x.var=a,breaks=breaks2) # non-monotone curve

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))

plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(-0.1,1),lwd=2)
lines(pcwfit1$x.var,pcwfit1$prev,lwd=3)
lines(pcwfit2$x.var,pcwfit2$prev,lwd=3,lty=2)
pcwfit1$lambda.vec
pcwfit2$lambda.vec
for (i in 1:(length(breaks1)-1)){lines(c(breaks1[i],breaks1[i+1]),c(pcwfit1$lambda.vec[i],pcwfit1$lambda.vec[i]),lwd=3)}
for (i in 2:(length(breaks1)-1)){lines(c(breaks1[i],breaks1[i]),c(pcwfit1$lambda.vec[i-1],pcwfit1$lambda.vec[i]),lwd=3,lty=3)}
for (i in 1:(length(breaks2)-1)){lines(c(breaks2[i],breaks2[i+1]),c(pcwfit2$lambda.vec[i],pcwfit2$lambda.vec[i]),lwd=3,lty=2)}
for (i in 2:(length(breaks2)-1)){lines(c(breaks2[i],breaks2[i]),c(pcwfit2$lambda.vec[i-1],pcwfit2$lambda.vec[i]),lwd=3,lty=3)}
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
pcwfit1$dev
pcwfit2$dev
mtext(side=4,"force-of-infection",las=3,line=1.4)


##################################################
## Section 9.3: Isotonic Regression              #
##		    						 #
##    9.3.2 Keiding (1991)				 #
##################################################

### HAV DATA 
################

HAV<-read.table("c:/book/chapter4/HAV-BUL.dat",header=T)
a<-c(rep(HAV$Age,HAV$Pos),rep(HAV$Age,HAV$Tot-HAV$Pos))
y<-c(rep(rep(1,length(HAV$Age)),HAV$Pos),rep(rep(0,length(HAV$Age)),HAV$Tot-HAV$Pos))
y<-y[order(a)]
a<-a[order(a)]
grid<-sort(HAV$Age)
neg<-HAV$Tot-HAV$Pos
pos<-HAV$Pos
tot<-neg+pos

### Isotonic regression Monotone Kernel Estimation Keiding 1991
### FIGURE 9.2 (left panel)

xx<-pavit(pos=pos,tot=tot)
foi.k1<-foi.num(grid,xx$pai2)$foi
foi.k1[is.na(foi.k1)]<-0
foi.k1[foi.k1>10]<-0
age.k1<-foi.num(grid,xx$pai2)$grid
fit.k1<- ksmooth(age.k1,foi.k1,kernel="normal",bandwidth=30,n.points=length(age.k1))

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))
plot(grid,pos/tot,cex=0.05*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(-0.1,1),lwd=2)
lines(age.k1,1-exp(-cumsum(c(age.k1[1],diff(age.k1))*fit.k1$y)),lty=1)
lines(fit.k1$x,fit.k1$y,lty=1)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)

### B19 DATA
################

parvovirus<-read.table("c:/book/chapter4/VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]
grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

### Isotonic regression Monotone Kernel Estimation Keiding 1991
### FIGURE 9.2 (right panel)

xx<-pavit(pos=pos,tot=tot)
foi.k1<-foi.num(grid,xx$pai2)$foi
foi.k1[is.na(foi.k1)]<-0
foi.k1[foi.k1>10]<-0
age.k1<-foi.num(grid,xx$pai2)$grid
fit.k1<- ksmooth(age.k1,foi.k1,kernel="normal",bandwidth=10,n.points=length(age.k1))

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))
plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(0,1),lwd=2)
lines(age.k1,1-exp(-cumsum(c(age.k1[1],diff(age.k1))*fit.k1$y)),lty=1)
lines(fit.k1$x,fit.k1$y,lty=1)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)



##################################################
## Section 9.3: Isotonic Regression              #
##		    						 #
##    9.3.3 Smooth then Constraint			 #
##################################################

### HAV DATA 
################

HAV<-read.table("c:/book/chapter4/HAV-BUL.dat",header=T)
a<-c(rep(HAV$Age,HAV$Pos),rep(HAV$Age,HAV$Tot-HAV$Pos))
y<-c(rep(rep(1,length(HAV$Age)),HAV$Pos),rep(rep(0,length(HAV$Age)),HAV$Tot-HAV$Pos))
y<-y[order(a)]
a<-a[order(a)]
grid<-sort(HAV$Age)
neg<-HAV$Tot-HAV$Pos
pos<-HAV$Pos
tot<-neg+pos


### Monotone Local Polynomials
### FIGURE 9.3 (left panel)

library(locfit)
alphagrid<-seq(0.2,2, by=0.05)
gcvp<-gcvplot(y~a,family="binomial",alpha=alphagrid,deg=2)
alpha<-alphagrid[which.min(gcvp$values)]
lpfit1<-locfit(y~a,family="binomial",alpha=alpha)
lpfitd1<-locfit(y~a,deriv=1,family="binomial",alpha=alpha)
lpfoi1=fitted(lpfitd1)*fitted(lpfit1)
yhat.mlp<-pavit(pos=fitted(lpfit1))$pai2
lpfoi1<-fitted(lpfitd1)*fitted(lpfit1)
foihat.mlp<-apply(cbind(0,lpfoi1),1,max)
lpfoi2<-apply(cbind(0,fitted(lpfitd1)),1,max)*yhat.mlp
foihat.mlp<-apply(cbind(0,lpfoi2),1,max)

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))
plot(grid,pos/tot,cex=0.05*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(-0.1,1),lwd=2)
lines(a,yhat.mlp,lty=1)
lines(a,foihat.mlp,lty=1)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)

### GLMM and pavit

library(mgcv)
fit.gamm.logit.cr<-gamm(y~s(a,bs="cr"),family=binomial(link="logit"))
lines(a,pavit(predict(fit.gamm.logit.cr$gam,type="response"))$pai2,lty=2)
h1<-foi.num(a,predict(fit.gamm.logit.cr$gam,type="response"))
lines(h1$grid,apply(cbind(0,h1$foi),1,max),lty=2)


### B19 DATA
################

parvovirus<-read.table("VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]
grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

### Monotone Local Polynomials
### FIGURE 9.3 (right panel)

library(locfit)
alphagrid<-seq(0.2,2, by=0.05)
gcvp<-gcvplot(y~a,family="binomial",alpha=alphagrid,deg=2)
alpha<-alphagrid[which.min(gcvp$values)]
lpfit1<-locfit(y~a,family="binomial",alpha=alpha)
lpfitd1<-locfit(y~a,deriv=1,family="binomial",alpha=alpha)
lpfoi1=fitted(lpfitd1)*fitted(lpfit1)
yhat.mlp<-pavit(pos=fitted(lpfit1))$pai2
lpfoi1<-fitted(lpfitd1)*fitted(lpfit1)
foihat.mlp<-apply(cbind(0,lpfoi1),1,max)
lpfoi2<-apply(cbind(0,fitted(lpfitd1)),1,max)*yhat.mlp
foihat.mlp<-apply(cbind(0,lpfoi2),1,max)

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))
plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(0,1),lwd=2)
lines(a,yhat.mlp,lty=1)
lines(a,foihat.mlp,lty=1)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)

### GLMM and pavit

library(mgcv)
fit.gamm.logit.cr<-gamm(y~s(a,bs="cr"),family=binomial(link="logit"))
lines(a,pavit(predict(fit.gamm.logit.cr$gam,type="response"))$pai2,lty=2)
h1<-foi.num(a,predict(fit.gamm.logit.cr$gam,type="response"))
lines(h1$grid,apply(cbind(0,h1$foi),1,max),lty=2)


##################################################
## Section 9.4: P-spline regression with         #
##		    shape constraints 			 #
## 								 #
##################################################

### HAV DATA 
################

HAV<-read.table("c:/book/chapter4/HAV-BUL.dat",header=T)
a<-c(rep(HAV$Age,HAV$Pos),rep(HAV$Age,HAV$Tot-HAV$Pos))
y<-c(rep(rep(1,length(HAV$Age)),HAV$Pos),rep(rep(0,length(HAV$Age)),HAV$Tot-HAV$Pos))
y<-y[order(a)]
a<-a[order(a)]
grid<-sort(HAV$Age)
neg<-HAV$Tot-HAV$Pos
pos<-HAV$Pos
tot<-neg+pos

### P-spline smoothing with shape constraints (Bollaerts et al., 2006)
### FIGURE 9.4 (left panel) - final models: optimal alpha based on BIC

source("monotone psplinefit.R")
fit0<-mpspline.fitter(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",
family="binomial",alpha=54,kappa=0)
fit1<-mpspline.fitter(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",
family="binomial",alpha=66,kappa=1e8)

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))
plot(grid,pos/tot,cex=0.05*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(0,1),lwd=2)
lines(fit0$x,fit0$y,lwd=3,lty=1)
lines(fit1$x,fit1$y,lwd=3,lty=3)
lines(foi.num(fit1$x,fit1$y)$grid,foi.num(fit1$x,fit1$y)$foi,lwd=3,lty=1)
lines(foi.num(fit0$x,fit0$y)$grid,foi.num(fit0$x,fit0$y)$foi,lwd=3,lty=3)

### P-spline smoothing with pavit function

lines(fit0$x,pavit(fit0$y)$pai2,lwd=3,lty=2)
lines(foi.num(fit0$x,pavit(fit0$y)$pai2)$grid,foi.num(fit0$x,pavit(fit0$y)$pai2)$foi,lwd=3,lty=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)


### B19 DATA
################

parvovirus<-read.table("c:/book/chapter4/VZV-B19-BE.dat",header=T)
subset<-(parvovirus$age>0.5)&(parvovirus$age<76)&(!is.na(parvovirus$age))&!is.na(parvovirus$parvores)
parvovirus<-parvovirus[subset,]
y<-parvovirus$parvores[order(parvovirus$age)]
a<-parvovirus$age[order(parvovirus$age)]
grid<-sort(unique(round(a)))
neg<-table(y,round(a))[1,]
pos<-table(y,round(a))[2,]
tot<-neg+pos

### P-spline smoothing with shape constraints (Bollaerts et al., 2006)
### FIGURE 9.4 (right panel) - final models: optimal alpha using BIC

source("monotone psplinefit.R")
fit0<-mpspline.fitter(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",
family="binomial",alpha=54,kappa=0)
fit1<-mpspline.fitter(response=y,x.var=a,ps.intervals=20,degree=3,order=2,link="logit",
family="binomial",alpha=66,kappa=1e8)

windows(record=TRUE, width=5, height=5)
par(las=1,cex.axis=1.1,cex.lab=1.1,lwd=3,mgp=c(2, 0.5, 0),mar=c(4.1,4.1,4.1,3))
plot(grid,pos/tot,cex=0.02*tot,pch=1,xlab="age",ylab="seroprevalence",xlim=c(0,72),ylim=c(0,1),lwd=2)
lines(fit0$x,fit0$y,lwd=3,lty=1)
lines(fit1$x,fit1$y,lwd=3,lty=3)
lines(foi.num(fit1$x,fit1$y)$grid,foi.num(fit1$x,fit1$y)$foi,lwd=3,lty=1)
lines(foi.num(fit0$x,fit0$y)$grid,foi.num(fit0$x,fit0$y)$foi,lwd=3,lty=3)

### P-spline smoothing with pavit function

lines(fit0$x,pavit(fit0$y)$pai2,lwd=3,lty=2)
lines(foi.num(fit0$x,pavit(fit0$y)$pai2)$grid,foi.num(fit0$x,pavit(fit0$y)$pai2)$foi,lwd=3,lty=2)
axis(side=4,at=c(0.0,0.1,0.2,0.3,0.4))
mtext(side=4,"force-of-infection",las=3,line=1.4)
