5  2023 HCMC HMFD outbreak

data_file <- "D:/OUCRU/hfmd/data/hfmd_sero.rds"
library(dplyr)
library(stringr)
library(purrr)
library(tidyr)
library(magrittr)
library(mgcv)
library(scam)
library(ggplot2)
library(lubridate)
predict2 <- function(...) predict(..., type = "response") |> as.vector()
hfmd <- data_file |>
  readRDS() |> 
  as_tibble() |> 
  mutate(collection = id |>
           str_remove(".*-") |> 
           as.numeric() |> 
           divide_by(1e4) |> 
           round(),
         col_date2 = as.numeric(col_date),
         across(pos, ~ .x > 0))

age_profile <- function(data, age_values = seq(0, 15, le = 512), ci = .95) {
  model <- gam(pos ~ s(age), binomial, data)
  link_inv <- family(model)$linkinv
  df <- nrow(data) - length(coef(model))
  p <- (1 - ci) / 2
  model |> 
    predict(list(age = age_values), se.fit = TRUE) %>%
    c(list(age = age_values), .) |> 
    as_tibble() |> 
    mutate(lwr = link_inv(fit + qt(    p, df) * se.fit),
           upr = link_inv(fit + qt(1 - p, df) * se.fit),
           fit = link_inv(fit)) |> 
    select(- se.fit)
}


age_profile_unconstrained <- function(data, age_values = seq(0, 15, le = 512),
                                      ci = .95) {
  data |> 
    group_by(collection) |> 
    group_map(~ age_profile(.x, age_values, ci))
}

shift_right <- function(n, x) {
  if (n < 1) return(x)
  c(rep(NA, n), head(x, -n))
}

age_profile_constrained_cohort2 <- function(data, age_values = seq(0, 15, le = 512),
                                           ci = .95, n = 100) {
  dpy <- 365 # number of days per year
  
  mean_collection_times <- data |> 
    group_by(collection) |> 
    summarise(mean_col_date = mean(col_date2)) |> 
    with(setNames(mean_col_date, collection))
  
  cohorts <- cumsum(c(0, diff(mean_collection_times))) |>
    divide_by(dpy * mean(diff(age_values))) |> 
    round() |> 
    map(shift_right, age_values)
  
  age_time <- map2(mean_collection_times, cohorts,
                   ~ tibble(collection_time = .x, cohort = .y))
  
  age_time_inv <- age_time |> 
    map(~ cbind(.x, age = age_values)) |> 
    bind_rows() |> 
    na.exclude()

  data |> 
# Step 1:
    group_by(collection) |> 
    group_modify(~ .x |>
                   age_profile(age_values, ci) |> 
                   mutate(across(c(fit, lwr, upr), ~ map(.x, ~ rbinom(n, 1, .x))))) |> 
    group_split() |> 
    map2(age_time, bind_cols) |> 
    bind_rows() |> 
    unnest(c(fit, lwr, upr)) |>
    pivot_longer(c(fit, lwr, upr), names_to = "line", values_to = "seropositvty") |> 
# Step 2a:
    filter(cohort < max(age) - diff(range(mean_collection_times)) / dpy) |>
    group_by(cohort, line) |> 
    group_modify(~ .x %>%
                   scam(seropositvty ~ s(collection_time, bs = "mpi"), binomial, .) |> 
                   predict2(list(collection_time = mean_collection_times)) %>%
                   tibble(collection_time = mean_collection_times,
                          seroprevalence  = .)) |> 
    ungroup() |> 
# Step 2b:
    left_join(age_time_inv, c("cohort", "collection_time")) |> 
    group_by(collection_time, line) |> 
    group_modify(~ .x |>
                   right_join(tibble(age = age_values), "age") |>          ### added
                   arrange(age) |>                                         ### added
                   mutate(across(seroprevalence,
                                 ~ gam(.x ~ s(age), betar) |>
                                   predict2(list(age = age_values))))) |>  ### modified
    ungroup() |> 
    pivot_wider(names_from = line, values_from = seroprevalence) |> 
    group_by(collection_time) |> 
    group_split()
}
constrained_age_profiles_cohort2 <- age_profile_constrained_cohort2(hfmd)

5.1 Attack rate

attack_rates <- map2(head(constrained_age_profiles_cohort2, -1),
                     constrained_age_profiles_cohort2[-1],
                     ~ left_join(na.exclude(.x), na.exclude(.y), "cohort") |> 
                       mutate(attack = (fit.y - fit.x) / (1 - fit.x)))
census2019 <- readRDS("D:/OUCRU/hfmd/data/census2019.rds")

age_structure <- census2019 |> 
  filter(province == "Thành phố Hồ Chí Minh") |> 
  group_by(age) |> 
  summarise(n = sum(n)) |> 
  mutate(across(age, ~ stringr::str_remove(.x, " tuổi| \\+") |> as.integer())) |> 
  arrange(age) |> 
  filter(age < 17)

with(age_structure,
     plot(age - 1, n, type = "s", ylim = c(0, 110000), col = 4, lwd = 3,
          xlab = "age classes (years)", ylab = "number of children"))
abline(v = 0:15, col = "grey")

5.2 Expected incidence from seroprevalence

mod <- lm(n ~ age, age_structure)

incidences <- map(attack_rates,
                  ~ mutate(.x, incidence = (1 - fit.x) * attack *
                                             predict(mod, list(age = age.x))))

incidences %>% 
  bind_rows(.id = "id") %>% 
  ggplot(aes(x = age.x, y = incidence,color = id))+
  geom_line()+
  coord_cartesian(ylim = c(0,20000))+
  theme(legend.position = "hide")+
  theme_minimal()+
  theme(legend.position = "hide")

5.3 Incidence of hospitalization

hfmd_incidence <- readRDS("D:/OUCRU/hfmd/data/hfmd_incidence.rds")

incidence1 <- hfmd_incidence %>% 
  filter(year(adm_date) == 2023) %>% 
  mutate(adm_date2 = as.numeric(adm_date),
         cohort = interval(dob, "2023-01-01") / years(1)) 


mean_collection_times <- hfmd |> 
  group_by(collection) |> 
  summarise(mean_col_date = mean(col_date2)) |> 
  with(setNames(mean_col_date, collection))

ouut <- list()

for (i in 1:3){
  ouut[[i]] <- incidence1 %>% 
    filter(adm_date2 >= as.numeric(mean_collection_times[i]) & 
             adm_date2 <= as.numeric(mean_collection_times[i+1])) %>% 
    mutate(age_gr = cut(cohort, breaks = seq(0,16),right = T)) %>% 
    na.omit(age_gr) %>%
    group_by(age_gr) %>% 
    count() %>% 
    mutate(age_gr2 = as.numeric(age_gr)) %>% 
    gam(n ~ s(age_gr2),data = .) %>% 
    predict(list(age_gr2 = incidences[[i]]$age.x))%>%
    tibble(age = incidences[[i]]$age.x,
           incidence  = .)
}

ouut %>% 
  bind_rows(.id = "id") %>% 
  ggplot(aes(x = age, y = incidence,color = id))+
  geom_line()+
  coord_cartesian(ylim = c(0,10000))+
  theme_minimal()+
  theme(legend.position = "hide")

5.4 Probability of hospitalization

map2(ouut, incidences, ~inner_join(.x, .y, by = join_by(age == age.x)) %>% 
       mutate(prob = (incidence.x/incidence.y)*100)) %>% 
  bind_rows(.id = "id") %>% 
  ggplot(aes(x = age, y = prob,color = id))+
  geom_line()+
  theme_minimal()+
  theme(legend.position = "hide")