# -------------------------------------------------------------------------------------------------------
# EBook        XFG
# -------------------------------------------------------------------------------------------------------
# Macro        XFGexKF
# -------------------------------------------------------------------------------------------------------
# Description  Computes estimates of the SPD for a European call option from JAN-02-2003
#              using the extended Kalman filter 
# -------------------------------------------------------------------------------------------------------
# Usage        function(OptionPricesC,StrikePricesK,BetaInitial,NumberOfObservations,Runs)
# -------------------------------------------------------------------------------------------------------
# Inputs:        
# OptionPricesC         (1 x T) standardized vector of the European CALL option prices  
# StrikePricesK         (1 x T) vector with various strike prices K
# BetaInitial           (1 x k) the initial value [distribution] of beta_{0|0} 
# NumberOfObservations  for how many observations the estimates beta_t are computed 
# Runs                  how many times the extended Kalman filter runs
#
# Output                (1 x k) G.t vector with computed point estimates of the SPD, plot of estimates
# -------------------------------------------------------------------------------------------------------
# Author       M. Svojik, 20080122
# -------------------------------------------------------------------------------------------------------
# R code

DataComplete <- read.table(file=".../eurex1.dat", sep = "")  # complete data set
DataCALL <- DataComplete[DataComplete[ ,2] == 1, ]    # just CALL options [marked with 1 in the 2nd column]

MinimumOfTimeToExpiry <- min(DataCALL[ ,3])           # the shortest time to expiry

# we choose CALL options with the shortest time to expiry

DataCALLMiniumTimeExpiry <- DataCALL[DataCALL[ ,3] == MinimumOfTimeToExpiry, ]

TimeToTrade <- DataCALLMiniumTimeExpiry[ ,6]            # vector of time of trade
Data <- DataCALLMiniumTimeExpiry[order(TimeToTrade), ]	# sorting data


StrikePricesK <- Data[ ,1]                              # column of strike prices
SortedStrikePricesK <- sort(unique(StrikePricesK))      # vector of sorted "unique" strike prices K
p <- length(SortedStrikePricesK)                        # how many various strike prices K we have

OptionPricesC <- Data[ ,4]*exp(Data[ ,5]*Data[ ,3])   # CALL option prices divided by the discount factor exp(-r(T-t))
T <- length(OptionPricesC)                            # number of observations

# vector of means of CALL options with corresponding strike price k_j 

h <- array(0,p)
ExpectedValueC <- array(0,p)

for (k in 1:p)
{                                                  
 for (t in 1:T)
 {
   if (Data[t,1] == SortedStrikePricesK[k])
    {
      h[k] <- h[k] + 1
      ExpectedValueC[k] <- ExpectedValueC[k] + OptionPricesC[t]
    }
 }
 ExpectedValueC[k] <- ExpectedValueC[k]/h[k]
}

BetaInitial <- ExpectedValueC[p]

  # Display CALL option prices againts strike prices K, JAN-02-2003

  axis.y <- max(OptionPricesC)
  plot(StrikePricesK, OptionPricesC, main = "CALL Option Prices", xlab = "Strike Price K", ylab = "CALL Option Price C(K)", ylim = c(0, axis.y + 10))


  X <- matrix(0, nrow = p, ncol = p, byrow = TRUE)   # construction of the design matrix X
  X[,1] <- 1

  for (i in 1:(p-1))
   {
    for (j in 2:p)
    {
     if (p - i + 1 >= j) X[i,j] <- SortedStrikePricesK[p - j + 2] - SortedStrikePricesK[i]
    }
   }

  G <- matrix(0, nrow = T, ncol = p, byrow = TRUE)
  SIGMA <- matrix(0, nrow = p*T, ncol = p, byrow = TRUE)

  # G = matrix of all estimates g_t in every time step t
  # SIGMA = matrix of all estimates sigma_t in every time step t

################################################################################################

# 0. step: Setting of variances of eps_t and eta_t in the considered model:

        # C_t[k] = X*beta_t + eps_t, eps_t ~ N[0, var.eps]
        # beta_t = beta_{t-1} + eta_t, eta_t ~ N[0, var.eta]

  VarEps <- var(OptionPricesC)  # initialization of variance of eps_t
  VarEta <- diag(p)             # initialization of variance of eta_t


# 1. step: Initialization step [t = 0]; setting beta_{0|0}, sigma_{0|0}


  sigma.t <- diag(p)        # initialization of sigma_{0|0}
  beta.t <- rep(0,p)        # initialization of state vector beta_{0|0}

  beta.t[1] <- BetaInitial  # 1st component of beta_{0|0}
  beta.t[2:p] <- 1/(p-1)    # the second (p-1) components of beta_{0|0} are uniform distributed

  B.t <- matrix(0, nrow = p, ncol=p, byrow=TRUE)	 # Jacobian matix B_{t|t-1}
  g.t <- rep(0,p)				                           # transformation of beta_t


  Runs <- 2                 # Runs = how many times the extended Kalman filter runs (tested for Runs = 1, 2)
  NumberOfObservations <- round(c(1,T/8,T/4,3*T/8,T/2,5*T/8,3*T/4,7*T/8,T))

  for (k in 1:Runs)
   {
    VarEpsSecond <- 0       # variance of eps_t in the 2nd run
    VarEtaSecond <- 0       # variance of eta_t in the 2nd run
    
    # state vector beta_t is transformed
 		# S = sum_{j = 1}^{p-1} [exp(beta_t[j])], beta_t = (beta_t[0], ... beta_t[p-1])

    S <- sum(exp(beta.t[2:p]))
    g.t[1] <- exp(beta.t[1])
    g.t[2:p] <- exp(beta.t[2:p])/S

      
    for (t in 1:NumberOfObservations[9])   # the main recursion, t -> t + 1, until we reach t = T
     {
      for (i in 1:p)        # corresponding row of X is selected
       {
        if (StrikePricesK[t] == SortedStrikePricesK[i]) X.SelectedRow <- X[i,]
       }

      gPrevious <- g.t      # saving the previous value of g_t


# 2. step: Prediction step, a priori estimates; beta_{t|t-1}, sigma_{t|t-1}


      # beta_{t|t-1} = beta_{t-1|t-1}
      # Sigma_{t|t-1} = Sigma_{t-1|t-1} + var(eta_t)

      P.t <- sigma.t + VarEta   # notation: P.t = Sigma_{t|t-1},  sigma.t = Sigma_{t-1|t-1}

      # Computing of the Jacobian matrix B_{t|t-1}
      
      B.t[1,1] <- (S^2)*exp(beta.t[1])
      
      for (i in 2:p)
       {
        for (j in 2:p)
         {
          B.t[i,j] <- -exp(beta.t[i] + beta.t[j])
          if (i == j) B.t[i,i] <- exp(beta.t[i])*(S - exp(beta.t[i]))
         }
       }

      B.t <- (1/S^2)*B.t


# 3. step: Prediction error, its MSE; I_t, F_t


        # I_t = C_t[k] - X*g(beta_{t|t-1})
        # F_t = X*B_{t|t-1}*Sigma_{t|t-1}*t(B_{t|t-1})*t(X) + var(eps_t)

      Innovations <- OptionPricesC[t] - X.SelectedRow%*%g.t
      F.t <- X.SelectedRow%*%B.t%*%P.t%*%t(B.t)%*%X.SelectedRow + VarEps


# 4. step: Updating step; beta_{t|t}, Sigma_{t|t}, K_t


        # K_t = Sigma_{t|t-1}*t(B_{t|t-1})*t(X)*(F_t)^{-1}
        # beta_{t|t} = beta_{t|t-1} + K_t*I_t
        # Sigma_{t|t} = [I - K_t*X*]*B_{t|t-1}*Sigma_{t|t-1}


      KalmanGain <- P.t%*%t(B.t)%*%X.SelectedRow%*%(F.t)^{-1} # the MMSE version
      beta.t <- beta.t + KalmanGain%*%Innovations
      sigma.t <- (diag(p) - KalmanGain%*%X.SelectedRow%*%B.t)%*%P.t

        # actual state vector beta_t is transformed to the actual estimate g_t

      S <- sum(exp(beta.t[2:p]))
      g.t[1] <- exp(beta.t[1])
      g.t[2:p] <- exp(beta.t[2:p])/S

      G[t,] <- g.t              # all estimates g_t
      SIGMA[(p*(t-1)+1):(p*t),] <- sigma.t

      VarEpsSecond <- VarEpsSecond + Innovations^2
      VarEtaSecond <- VarEtaSecond + (sum(g.t) - sum(gPrevious))^2

     }   # end of the main recursion for t = 1, ..., T


   VarEps <- (1/T)*VarEpsSecond                # new, more accurately variance of eps_t
   VarEta <- ((1/(T*p))*VarEtaSecond)*diag(p)  # new, more accurately variance of eta_t

   if (k == 1) gPreviousRun <- g.t             # saving g_t from the previous (1st) run

   }     # end of the cycle for k = 1, 2, ..., Runs

    g.t               # estimate g.t we are looking for
    sum(g.t[2:p])     # checking the constraint

################################################################################

    # Graphic outputs

    # 1ST RUN: estimates g_t t = 1, 119, 238, 356, 475, 594, 712, 831, 950, against strike prices K

if (k == 1)
{
    # estimates g_t for t = 1, 119, 238 observations, against strike price K, t = 1, 119, 238, 356, 475, 594, 712, 831, 950

    par(mfrow = c(3,1))

    plot(G[1,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 1 observation", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[1,p:2]~SortedStrikePricesK[1:(p-1)])

    plot(G[119,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 119 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[119,p:2]~SortedStrikePricesK[1:(p-1)])

    plot(G[238,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 238 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[238,p:2]~SortedStrikePricesK[1:(p-1)])

    # estimates g_t for t = 356, 475, 594 observations, against strike price K, t = 1, 119, 238, 356, 475, 594, 712, 831, 950

    par(mfrow = c(3,1))

    plot(G[356,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 356 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[356,p:2]~SortedStrikePricesK[1:(p-1)])

    plot(G[475,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 475 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[475,p:2]~SortedStrikePricesK[1:(p-1)])

    plot(G[594,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 594 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[594,p:2]~SortedStrikePricesK[1:(p-1)])

    # estimates g_t for t = 712, 831, 950 observations, against strike price K, t = 1, 119, 238, 356, 475, 594, 712, 831, 950

    par(mfrow = c(3,1))

    plot(G[712,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 712 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[712,p:2]~SortedStrikePricesK[1:(p-1)])

    plot(G[831,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 831 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[831,p:2]~SortedStrikePricesK[1:(p-1)])

    plot(G[950,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on T = 950 observations", sub = "(extended Kalman filter, 1st run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[950,p:2]~SortedStrikePricesK[1:(p-1)])
}

    # 2ND RUN: estimates g_t t = 1, 475, 950, against strike prices K [is what in the discussion paper is]

if (k == 2)
{
    # estimates g_t for t = 1, 475, 950 observations, against strike price K, t = 1, 475, 950

    par(mfrow = c(3,1))

    plot(G[1,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 1 observation", sub = "(extended Kalman filter, 2nd run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[1,p:2]~SortedStrikePricesK[1:(p-1)], lty = 1, lwd = 2)

    plot(G[475,p:2]~SortedStrikePricesK[1:(p-1)], type = "n", main = "Estimate of the SPD based on t = 475 observations", sub = "(extended Kalman filter, 2nd run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[475,p:2]~SortedStrikePricesK[1:(p-1)], lty = 1, lwd = 2)

    plot(G[950,p:2]~SortedStrikePricesK[1:(p-1)], type = "n" , main = "Estimate of the SPD based on T = 950 observations", sub = "(extended Kalman filter, 2nd run of the algorithm, JAN-02-2003)", xlab = "Strike price K", ylab = "Point estimation")
    lines(G[950,p:2]~SortedStrikePricesK[1:(p-1)], lty = 1, lwd = 2)
}                               