7 Seasonality forcing model

Code

library(tidyverse)
library(reshape2)
library(readxl)
library(lubridate)
library(deSolve)
library(bbmle)
library(stringi)
invisible(Sys.setlocale("LC_TIME", "English"))

Code

df1 <- read_excel("D:/OUCRU/hfmd/data/TCM_full.xlsx",
                  col_types = c("date", "numeric", "text",
                                "text", "text", "date", "date", "date",
                                "text", "text", "text"))
colnames(df1) <- c("dob", "age", "gender", "commune", "district",
                   "reported_date", "onset_date","adm_date",
                   "medi_cen","inout","severity")
df1$dob <- df1$dob %>% as_date()
df1$adm_date <- df1$adm_date %>% as_date()

df1$age1 <- interval(df1$dob, df1$adm_date) / years(1)
df1$adm_week <- as.Date(floor_date(df1$adm_date, "week"))
df1$district <- df1$district %>% str_replace_all(
  c( "Quận Gò vấp"  = "Quận Gò Vấp"))
df1$district <- df1$district %>%
  str_remove("Quận|Huyện|Thành phố") %>%
  trimws(which = "both")

case <- df1 %>% filter(year(adm_date) == 2023 ) %>%
        group_by(adm_week) %>%
        count()
case$t <- 1:nrow(case)

7.1 SEIR model for CH1 HFMD admission simulation in 2023

Code

ch1_adm <- df1 %>%
  filter(year(adm_date) == 2023 &
           medi_cen %in% c("Bệnh viện Nhi đồng 1",
                           "Bênh viện Nhi Đồng 1",
                           "Bệnh viện Nhi Đồng 1")) %>%
  mutate(district2 = district %>%
           str_replace_all(
             c("Quận 2" = "Thủ Đức",
               "Quận 9" = "Thủ Đức")) %>%
           str_remove("Quận|Huyện") %>%
           trimws(which = "both") %>%
           stri_trans_general("latin-ascii") %>%
           tolower())

ch1_adm_time_series <- ch1_adm %>%
  group_by(adm_week) %>%
  count() %>%
  ungroup()

\[\begin{align} \frac{dS}{dt} &= \mu N - \beta_t \frac{SI}{N} - \mu S, \\ \frac{dE}{dt} &= \beta_t \frac{S I}{N} - \sigma E - \mu E, \\ \frac{dI}{dt} &= \sigma E - \gamma I - \mu I, \\ \frac{dR}{dt} &= \gamma I - \mu R. \end{align}\]

Where

\[ \beta_t = \beta_0 \left(1 - \beta_1 \cos(\omega t)\right) \]

Assumed:

S0 = 13000, E0 = 1, I0 = 1, R0 = 0
\(\gamma\) = 0.0192308, \(\mu = \frac{1}{80 \times 52}\)

I found best fitting \(\beta_0, \beta_t, \omega\) by minimizing the negative log likelihood, where the log of the observed incidence is assumed to follow a normal distribution that has a mean of log of the model prediction, scaled by \(\rho\), and a standard deviation that equals the residual standard deviation:

\[ log \ observed\ incidence\ at\ time\ t \sim Normal(log(\rho \times i(t)),\sigma) \]

Here \(\rho\) and \(\sigma\) are estimated by performing an intercept only linear regression of the logged observed incidence against the logged prediction as an offset term, where the estimated intercept corresponds to \(log(\rho)\) and the standard deviation of the linear regression corresponds to \(\sigma\)

Code

library(odin2)
library(dust2)
library(bbmle)


seir_seasonality <- odin2::odin({
  N <- S + E + I + R
  deriv(S) <- mu * N - beta_t * S * I / N - mu * S
  deriv(E) <- beta_t * S * I / N - sigma * E - mu * E
  deriv(I) <- sigma * E - gamma * I - mu * I
  deriv(R) <- gamma * I - mu * R
  deriv(CInc) <- sigma * E
  
  # seasonality forcing
  beta_0 <- parameter(0.4)
  beta_1 <- parameter(0)
  omega <- parameter(2*3.14/52) # use week as time unit by default
  sigma <- parameter(0.2)
  beta_t <- beta_0*(1 + beta_1*cos(omega*time))
  
  # initialize starting population
  init_S <- parameter(9500)
  init_I <- parameter(500)
  init_E <- parameter(500)
  gamma <- parameter(0.05)
  mu <- parameter(0.05)
  
  initial(S) <- init_S
  initial(E) <- init_E
  initial(I) <- init_I
  initial(R) <- 0
  initial(CInc) <- 0
  
})

run_mod <- function(mod, pars, duration=100, timestep=1){
  # --- initialize simulation time ---- 
  times <- seq(0, duration, timestep)
  
  sys <- dust_system_create(mod, pars)
  dust_system_set_state_initial(sys)
  out <- dust_system_simulate(sys, times)
  out <- dust_unpack_state(sys, out)
  
  out <- out %>% 
    as.data.frame() %>% 
    mutate(
      t = times
    )
  
  out$Inc <- c(1, diff(out$CInc))
  out
}

mll_seir <- function(beta_0, beta_1,omega,sigma) {
  # Make sure that parameters are positive
  beta_0 <- exp(beta_0)
  beta_1 <- exp(beta_1)
  omega <- exp(omega)
  sigma <- exp(sigma)
  
  pars <- list(
    beta_1 = beta_1,
    beta_0 = beta_0,
    omega = omega,
    sigma = sigma,
    gamma = 1/52,
    mu = 1/(80*52),
    init_S = 13000,
    init_E = 1,
    init_I = 1
  )
  
  pred <- run_mod(seir_seasonality, pars, duration=53, timestep=1)
  # Return the negative log-likelihood
  pred <- pred %>%  filter(t != 0) %>% pull(Inc)
  llh <- lm(log(ch1_adm_time_series$n) ~ 1 + offset(log(pred)))
  p <- coef(llh) %>% as.numeric()
  sigma2 <- summary(llh)$sigma
  
  # Return the negative log-likelihood
  - sum(dnorm(x = log(ch1_adm_time_series$n), mean = log(pred) + log(p), sd = sigma2))
}

starting_param_val <- list(beta_1 = .5,
                           beta_0 = 0.3,
                           # gamma = 1/52,
                           # mu = 1/(80*52),
                           sigma = .3,
                           omega = 2*3.14/(52/2.2))

estimates <- mle2(minuslogl = mll_seir, start = lapply(starting_param_val, log), method = "Nelder-Mead")
params <- exp(coef(estimates))

run_mod(seir_seasonality, 
        pars = list(
          beta_0 = params[1],
          beta_1 = params[2],
          sigma = params[4],
          gamma = 1/52,
          mu = 1/(80*52),
          omega = params[3],
          init_S = 13000,
          init_E = 1,
          init_I = 1
        ), 
        duration=53, timestep=1)%>% 
  filter(t != 0) %>% 
  cbind(ch1_adm_time_series) %>%
  mutate(beta_t = params[1]*(1 + params[2]*cos(params[3]*t)),
         N = S+E+I+R,
         foi = beta_t*(I/N)) %>% 
  ggplot(aes(x = adm_week))+
  geom_line(aes(y = Inc))+
  geom_line(aes(y=beta_t*1000),linetype = "dashed")+
  geom_line(aes(y=S/10),color = "blue",alpha = .3)+
  geom_point(aes(x = adm_week,y=n),
             data = ch1_adm_time_series)+
  geom_vline(xintercept = as.Date("2023-05-21"))+
    geom_vline(xintercept = as.Date("2023-09-10"))+
    annotate(
      geom = "text", x = as.Date("2023-06-12"), y = 1300,
      label = "Summer break", hjust = 0, vjust = 1, size = 7
    )+
    theme_minimal()

The points show the observed incidence in CH1 in 2023, while the solid line is the incidence simulated by the SEIR model. The dashed line represents \(\beta_t\), and the blue line shows the number of susceptible individuals.

7.2 Gathering data on timing children go to school

Code

case2 <- df1 %>%
  group_by(adm_week) %>%
  count()

case2 %>% ggplot(aes(x = adm_week,y = n )) +
  geom_line()+
  geom_vline(xintercept = as.Date("2014-05-31"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2014-08-01"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2015-05-31"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2015-08-01"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2016-05-27"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2016-08-15"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2017-05-31"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2017-08-14"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2018-05-31"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2018-08-20"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2019-05-25"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2019-09-05"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2020-07-31"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2020-09-01"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2021-05-31"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2021-09-05"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2022-06-30"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2022-09-05"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2023-05-26"),
             alpha = 0.4)+
  geom_vline(xintercept = as.Date("2023-08-21"),
             alpha = 0.4)+
  scale_x_date(breaks = "1 year",date_labels = "%Y")+
  labs(x = "Admission week",y = "Number of cases")+
  theme_minimal()