9 Reframe ms figure

9.1 Import data

Code

## file path
path2data <- paste0("D:/OUCRU-Onedrive/OneDrive - Oxford University Clinical Research Unit/Data/HFMD/cleaned/")

# Name of cleaned HFMD data file
cases_file <- "hfmd_full.rds"

# Name of cleaned sero file
sero_file <- "sero_EV-A71_1222-1223.rds"

# Name of cleaned viro file

viro_file <- "virology_cleaned.rds"

## import data

df_cases <- paste0(path2data, cases_file) |>
  readRDS()

df_sero <- paste0(path2data, sero_file) |>
  readRDS()

df_viro <- paste0(path2data, viro_file) |>
  readRDS()

9.2 Library

Code

library(readxl)
library(tidyverse)
library(gtsummary)
library(patchwork)
library(lubridate)
library(stringr)
library(mgcv)
library(janitor)
library(predtools)
library(magrittr)
library(slider)
library(scam)
library(epitools)
library(sf)
library(stringi)
library(gtsummary)
library(zoo)
invisible(Sys.setlocale("LC_TIME", "English"))

Code

## filter cases in 2023

df23 <- df_cases  |>  filter(year(adm_date) == 2023)
df23 <- df23[!duplicated(df23), ]

df23 <- df23 |>
  mutate(
    wave = case_when(
      adm_date <= as.Date("2023-09-03") ~ "1st",
      adm_date > as.Date("2023-09-03") ~ "2nd"
    ),
    severity2 = case_when(
      is.na(severity) ~ "Undefined",
      severity %in% c("3","4") ~ "3 or 4",
      !severity %in% c("3","4",NA) ~ severity
    ) |> as.factor()
  )

9.3 Using k-means to find the time point that separate two peaks

Code

library(cluster)
library(factoextra)

set.seed(123)

df23 <- df23 |>
  # select(age1,adm_date) |>
  mutate(time = as.numeric(adm_date)) |>
  filter(!is.na(age1))


kmeans_result_1 <- kmeans(df23[,c("age1","time")], centers = 2, nstart = 25)

df23$wave <- factor(kmeans_result_1$cluster,labels = c("1st","2nd"),levels = c(2,1))

## show the time point that seperate two peaks

time_cut_1st <- df23 |> 
  filter(wave == "1st") |> 
  pull(time) |> 
  max()

time_cut_2nd <- df23 |> 
  filter(wave == "2nd") |> 
  pull(time) |> 
  min()

time_cut <- mean(time_cut_1st,time_cut_2nd) |> as.numeric()
time_cut

[1] 19605

So I selected time points at 1.9605^{4} to seperate two peaks

Code

df23 |> 
  filter(age1 <= 15) |> 
  ggplot(aes(x = adm_date, y = age1, color = wave)) +
  geom_point(size = .5) + 
  scale_x_datetime(date_breaks = "1 month" , date_labels = "%b")+
  theme_minimal()+
  geom_vline(xintercept = as.Date(time_cut),linewidth = .2)+
  labs(x = "Admission date", y = "Age (years)")+
  theme(legend.position = "bottom")

9.4 Table 1

Code

### table1

tab_age <-
  df23 |>
  tbl_summary(by = wave,
              label = list(age1 ~ "Age (Years)",
                           gender ~ "Gender"),
              digits = list(all_continuous() ~ c(2,2),
                            all_categorical() ~ c(0,2)),
              include = c(age1,gender)) |>
  remove_row_type(type = "missing") |>
  add_p()


calculate_prop_test <- function(data, variable, by, ...) {
  # subset to non-missing
  data <- tidyr::drop_na(data, dplyr::all_of(c(variable, by)))

  prop.trend.test(
    x = table(data[[variable]], data[[by]])[2, ], # get the second row (the positive row)
    n = table(data[[by]])
  ) |>
    broom::tidy()
}

tab_sev <-
  df23 |> 
  filter(severity2 != "Undefined") |> 
  select(wave, severity2)  |> 
  mutate(rn = row_number()) |> 
  tidyr::spread(severity2, severity2) |> 
  mutate(across(-c(wave, rn), ~ ifelse(is.na(.), 0, 1))) |>
  select(-rn) |>
  tbl_summary(by = wave,
              statistic = list(
                all_categorical() ~ "{n} / {N} ({p}%)"
              ),
              digits = all_categorical() ~ c(0,0,2)) |>
  add_p() |>
  modify_table_body(
    ~ .x |>
      mutate(row_type = "level",
             variable = "wave") %>%
      {bind_rows(
        tibble(variable = "wave", row_type = "label", label = "Severity"),
        .
      )}
  )

tab_inout <-
  df23 |>
  filter(inout %in% c("Inpatient","Outpatient")) |> 
  select(wave, inout) |>
  mutate(rn = row_number()) |>
  tidyr::spread(inout, inout) |>
  mutate(across(-c(wave, rn), ~ ifelse(is.na(.), 0, 1))) |>
  select(-rn) |>
  tbl_summary(by = wave,
              statistic = list(
                all_categorical() ~ "{n} / {N} ({p}%)"
              ),
              digits = all_categorical() ~ c(0,0,2)) |>
  add_p() |>
  modify_table_body(
    ~ .x |>
      mutate(row_type = "level",
             variable = "wave") %>%
      {bind_rows(
        tibble(variable = "wave", row_type = "label", label = "Treatment"),
        .
      )}
  )

final_table <- tbl_stack(list(tab_age, tab_sev, tab_inout),quiet = TRUE) 

final_table

Characteristic	1st N = 23,285¹	2nd N = 20,011¹	p-value²
Age (Years)	2.66 (1.78, 3.75)	2.35 (1.51, 3.68)	<0.001
Gender			0.040
Female	10,016 (43.01%)	8,412 (42.04%)
Male	13,269 (56.99%)	11,599 (57.96%)
Severity
1	5,214 / 8,212 (63.49%)	4,181 / 5,801 (72.07%)	<0.001
2A	2,451 / 8,212 (29.85%)	1,455 / 5,801 (25.08%)	<0.001
2B	415 / 8,212 (5.05%)	128 / 5,801 (2.21%)	<0.001
3 or 4	132 / 8,212 (1.61%)	37 / 5,801 (0.64%)	<0.001
Treatment
Inpatient	3,160 / 22,493 (14.05%)	2,432 / 19,563 (12.43%)	<0.001
Outpatient	19,333 / 22,493 (85.95%)	17,131 / 19,563 (87.57%)	<0.001
¹ Median (Q1, Q3); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test

Table 1: Demographic and clinical characteristics of reported HFMD cases in Ho Chi Minh City in 2023.

9.5 Figure 1

Code

cohort_lines <- function(cohort_ages = seq(0, 5, by = 1),viro = FALSE){
  start_date <- as.Date("2023-01-01")
  end_date   <- as.Date("2023-12-31")

  ## generate the cohort line
  tseq <- seq(start_date, end_date, by = "day")
  cohort_lines <- lapply(cohort_ages, function(a0){
    data.frame(
      date = tseq,
      age  = a0 + as.numeric(tseq - start_date) / 365,
      cohort = as.factor(a0)
    )
  }) |> dplyr::bind_rows()
  
  color_cohort <- ifelse(viro == TRUE,"black","#80FFFFFF")

  ## create mid-opacity white of cohort lines before June
  cohort_lines <- cohort_lines |>
    mutate(trend = case_when(
      date < as.Date("2023-06-01") ~ color_cohort,
      date >= as.Date("2023-06-01") ~ "#FFFFFFFF",
    ))
  cohort_lines$group <- consecutive_id(cohort_lines$trend)
  cohort_lines <- head(do.call(rbind, by(cohort_lines, cohort_lines$group, rbind, NA)), -1)
  cohort_lines[, c("trend", "group")] <- lapply(cohort_lines[, c("trend", "group")], na.locf)
  cohort_lines[] <- lapply(cohort_lines, na.locf, fromLast = T)

  cohort_lines$trend <- factor(cohort_lines$trend,
                               levels = c(color_cohort,"#FFFFFFFF"))
  cohort_lines
}

## figure 1a: time series cases

f1a <- df23 |>
  group_by(adm_week,wave) |> 
  count() |> 
  ungroup() |> 
  ggplot(aes(x = adm_week, y = n,fill = wave))+
  geom_col(alpha = 0.6,color="black")+
  scale_x_date(date_breaks = "1 month",
               date_labels = "%b",
               name = "Admission date (2023)",
               limits = c(as.Date("2023-01-01"),as.Date("2023-12-31")))+
  # geom_vline(xintercept = as.Date(time_cut))+
  scale_fill_manual(values = c("#582C83FF","#FFC72CFF"))+
  theme_minimal()+
  labs(y = "Cases",tag = "A")+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill = guide_colourbar(barwidth=20),
         color = "none")


## figure 1b
f1b <- df23 |>
  select(adm_week,age1) |>
  ggplot(aes(x = adm_week, y = age1)) +
  stat_density_2d(
    aes(fill = after_stat(count)),
    geom = "raster",
    contour = FALSE,
    interpolate = TRUE
  ) +
  geom_line(
    data = cohort_lines(seq(0,5,by=1)),
    aes(x = date, y = age, group = cohort,color = trend),
    linewidth = 0.25,
    alpha = 0.3
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_fill_viridis_c(option = "inferno", n.breaks = 10) +
  theme_minimal() +
  labs(
    x = "Admission week (2023)",
    y = "Age (years)",
    fill = "Number of\nadmissions",
    tag = "B"
  )+
  theme_minimal()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(0,6)),breaks = seq(0,6))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_text(size = 18),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=25),
         color = "none")

f1c <- ggplot(data=df23, aes(x=age1, group=wave, fill=wave)) +
  geom_density(alpha = 0.6) +
  scale_fill_manual(values = c("#582C83FF","#FFC72CFF")) +
  scale_x_reverse(limit = c(6,0),
                  breaks = seq(0,6,by=1),
                  minor_breaks = NULL)+
  scale_y_continuous(minor_breaks = NULL)+
  coord_flip()+
  theme_minimal()+
  labs(x = "Age", 
       y = "Density",
       fill = "Wave",
       tag = "C")+
  theme(axis.title.y = element_blank(),
        axis.ticks.x = element_blank(),
        # legend.position = "top",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_blank(),
        legend.text = element_text(size = 18),
        legend.title = element_text(size = 18))


((f1a/f1b)|(plot_spacer()/f1c))+
  plot_layout(widths = c(2.5,1))

Code

ggsave("./fig_manuscript/fig1_new.svg",
       width = 15,height = 9,dpi = 500)

Explain how to generate figure 1B

I just used the admission week and age (float form), and use geom_density_2d function to plot it. The function performed a 2D kernel density estimation and display results with contours

9.5.1 Trying to using gam to smooth the raw data

In this step, I just used the admission week and age (integer form), i.e I counted the number of admission at each age in each week and plot it by heatmap below.

Code

df_smooth <- df23 |>
  mutate(age3 = floor(age1)) |>
  group_by(adm_date, age3) |>
  summarize(n = n(), .groups = "drop") |>
  complete(age3, adm_date, fill = list(n = 0)) |>
  mutate(
    age = as.numeric(age3),
    time = as.numeric(adm_date)   # convert to numeric scale
  )

df_smooth |>
  filter(age < 7) |>
  ggplot() +
  geom_tile(aes(x = as.Date(time), y = age, fill = n))+
  labs(x = "Week", y = "Age", fill = "Smoothed cases")+
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_fill_viridis_c(option = "inferno", n.breaks = 10) +
  theme_minimal() +
  labs(
    x = "Admission date",
    y = "Age (years)",
    fill = "Number of admissions"
  )+
  theme_minimal()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(-0.5,6.5)),breaks = seq(-1,7))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=25),
         color = "none")

The total number admission from the raw data

In this step, I tried to use GAM to model the raw data, I tried to used gaussian family with log link function, and bs smoothing terms, with tensor-product

Code

## data for GAM fitting, using n ~ s(age) + s(time)
df_smooth |> 
  select(-age3) |> 
  head()

# A tibble: 6 × 4
  adm_date       n   age  time
  <date>     <int> <dbl> <dbl>
1 2023-01-01     0     0 19358
2 2023-01-02     3     0 19359
3 2023-01-03     0     0 19360
4 2023-01-04     3     0 19361
5 2023-01-05     0     0 19362
6 2023-01-06     1     0 19363

Code

## create data for predict
age_val <- seq(0,6,le = 365)

collection_date_val <- seq(min(df23$adm_week),
                           max(df23$adm_week), le = 365)

new_data <- expand.grid(age = age_val,
                        time = as.numeric(collection_date_val))

Code

## gam formular with tensor-product
fit_gau_bs <- gam(n ~ s(age,bs = "bs") + 
                      s(time,bs = "bs") + 
                      ti(age, time, bs = c("bs", "bs")) , data = df_smooth,
                  family = gaussian(link = "log"),method = "REML")

new_data$pred_gau_bs <- predict(fit_gau_bs, newdata = new_data, type = "response")

f1b_new <- ggplot(new_data) +
  geom_raster(aes(x = as.Date(time), y = age, fill = pred_gau_bs),interpolate = TRUE) +
  geom_line(
    data = cohort_lines(),
    aes(x = date, y = age, group = cohort,color = trend),
    linewidth = 0.3,
    alpha = 0.4
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_fill_viridis_c(option = "turbo", n.breaks = 10) +
  theme_minimal() +
  labs(
    x = "Admission date",
    y = "Age (years)",
    fill = "Number of\nadmissions",
    tag = "B"
  )+
  theme_minimal()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(-0.5,6.5)),breaks = seq(-1,7))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=25),
         color = "none")

f1b_new

Code

library(plotly)

plot_ly(new_data, x = ~sort(unique(as.Date(time))),
        y = ~sort(unique(age)),
        z = ~matrix(pred_gau_bs, 365),
        showscale = F) |> 
  add_surface() |> 
  layout(autosize = F, width = 700, height = 700)

9.5.2 tensor-product plot

Code

ggplot(new_data, aes(x = as.Date(time), y = age, z = pred_gau_bs)) +
  geom_raster(aes(fill = pred_gau_bs), interpolate = TRUE) +
  geom_contour(color = "white", alpha = 0.7) +
  scale_fill_viridis_c(option = "turbo",n.breaks = 10) +  
  labs(x = "Week", y = "Age", fill = "Smoothed cases")+
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  labs(
    title = "Tensor-product smooth of age × time",
    x = "Age",
    y = "Time",
    fill = "Smooth (fit)"
  ) +
  theme_minimal()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(-0.5,6.5)),breaks = seq(-1,7))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=25),
         color = "none")

In this plot, I fitted log normal distribution to age distribution of 2 wave, the histograms are the raw data and the lines are the fitted lognormal distribution

Code

library(fitdistrplus)
fit1 <- fitdist(df23 %>% filter(age1 >0 & wave == "1st") %>% pull(age1),"lnorm")
fit2 <- fitdist(df23 %>% filter(age1 >0 & wave == "2nd") %>% pull(age1),"lnorm")

x_vals <- seq(0, 7, le = 512)


fit_1p <- data.frame(wave = "1st",age = x_vals, density = dlnorm(x_vals,
                                                                 meanlog = fit1$estimate[1],
                                                                 sdlog = fit1$estimate[2]))

fit_2p <- data.frame(wave = "2nd",age = x_vals, density = dlnorm(x_vals,
                                                                 meanlog = fit2$estimate[1],
                                                                 sdlog = fit2$estimate[2]))

f1c_new <- ggplot(data=df23) +
  geom_histogram(aes(x=age1, fill = wave),
                 color = "white",
                 alpha = .5) +
  scale_fill_manual(values = c("#582C83FF","#FFC72CFF"))+
  geom_line(data = rbind(fit_1p,fit_2p),aes(x = age,y = density*5000))+
  facet_wrap(~wave)+
  scale_x_reverse(limit = c(6,0),
                  breaks = seq(0,6,by=1),
                  minor_breaks = NULL)+
  scale_y_continuous(minor_breaks = NULL,
                     name = "Cases count",
                     sec.axis = sec_axis( trans=~./5000, name="Density"))+
  coord_flip()+
  theme_bw()+
  labs(x = "Age",
       y = "Density",
       tag = "C")+
  theme(axis.title.y = element_blank(),
        axis.ticks.x = element_blank(),
        # legend.position = "top",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.x = element_text(size = 18),
        axis.text.y = element_blank(),
        axis.text.x = element_text(size = 18),
        legend.text = element_text(size = 18),
        strip.text = element_text(size = 18),
        legend.title = element_text(size = 18))


((f1a|plot_spacer())/
    (f1b_new|wrap_elements(full = f1c_new  + 
                             theme(plot.margin = margin(-40,0,67,0)))))+
  plot_layout(widths = c(1,1))

Code

ggsave("./fig_manuscript/fig1_new2.svg",
       width = 15,height = 9,dpi = 500)

9.6 Figure 2

Code

age_profile_constrained_cohort2 <- function(data, age_values = seq(0, 15, le = 512),
                                            ci = .95, n = 100) {
  
  predict2 <- function(...) predict(..., type = "response") |> as.vector()
  
  age_profile <- function(data, age_values = seq(0, 15, le = 512), ci = .95) {
    model <- gam(pos ~ s(age), binomial, data)
    link_inv <- family(model)$linkinv
    df <- nrow(data) - length(coef(model))
    p <- (1 - ci) / 2
    model |>
      predict(list(age = age_values), se.fit = TRUE) %>%
      c(list(age = age_values), .) |>
      as_tibble() |>
      mutate(lwr = link_inv(fit + qt(    p, df) * se.fit),
             upr = link_inv(fit + qt(1 - p, df) * se.fit),
             fit = link_inv(fit)) |>
      dplyr::select(- se.fit)
  }
  
  
  age_profile_unconstrained <- function(data, age_values = seq(0, 15, le = 512),
                                        ci = .95) {
    data |>
      group_by(collection) |>
      group_map(~ age_profile(.x, age_values, ci))
  }
  
  shift_right <- function(n, x) {
    if (n < 1) return(x)
    c(rep(NA, n), head(x, -n))
  }
  
  dpy <- 365 # number of days per year

  mean_collection_times <<- data |>
    group_by(collection) |>
    summarise(mean_col_date = mean(col_date2)) |>
    with(setNames(mean_col_date, collection))

  cohorts <- cumsum(c(0, diff(mean_collection_times))) |>
    divide_by(dpy * mean(diff(age_values))) |>
    round() |>
    map(shift_right, age_values)

  age_time <- map2(mean_collection_times, cohorts,
                   ~ tibble(collection_time = .x, cohort = .y))

  age_time_inv <- age_time |>
    map(~ cbind(.x, age = age_values)) |>
    bind_rows() |>
    na.exclude()

  data |>
    # Step 1:
    group_by(collection) |>
    group_modify(~ .x |>
                   age_profile(age_values, ci) |>
                   mutate(across(c(fit, lwr, upr), ~ map(.x, ~ rbinom(n, 1, .x))))) |>
    group_split() |>
    map2(age_time, bind_cols) |>
    bind_rows() |>
    unnest(c(fit, lwr, upr)) |>
    pivot_longer(c(fit, lwr, upr), names_to = "line", values_to = "seropositvty") |>
    # Step 2a:
    filter(cohort < max(age) - diff(range(mean_collection_times)) / dpy) |>
    group_by(cohort, line) |>
    group_modify(~ .x %>%
                   scam(seropositvty ~ s(collection_time, bs = "mpi"), binomial, .) |>
                   predict2(list(collection_time = seq(mean_collection_times[1],19722,le=52))) %>%
                   tibble(collection_time = seq(mean_collection_times[1],19722,le=52),
                          seroprevalence  = .)) |>
    ungroup() |>
    group_by(collection_time, line) |>
    group_modify(~ .x |>
                   mutate(across(seroprevalence, ~ gam(.x ~ s(cohort), betar) |>
                                   predict2()))) |>  ### modified
    ungroup() |>
    pivot_wider(names_from = line, values_from = seroprevalence) |>
    group_by(collection_time) |>
    group_split()
}

First I show how I calculate the expected admission of CH1. I used outpatient cases from CH1: group number of outpatient admitted to CH1 by age and district. I used the 2019 census as the denominator, also group number of populatiion by age and district. Then the probability of CH1 admission is calculated by divided \(\frac{CH1 \ outpatients_{[a,d]}}{HCMC \ population \ in \ 2019_{[a,d]}}\). After that at each age group, I weighted the probability of CH1 admission, then multiply it again to hcmc population at age and district d to get the expected CH1 admission.

Code

ch1_outpatient <- paste0(path2data, "CH1_outpatient_HCM_cleaned.rds") |>
  readRDS()

census2019 <- paste0(path2data, "census2019.rds") |>
  readRDS() 

## generate denominator

N_ad <- census2019 |>
  filter(province == "Thành phố Hồ Chí Minh") |>
  mutate(district = district %>%
           str_replace_all(
             c("Quận 2" = "Thủ Đức",
               "Quận 9" = "Thủ Đức")) %>%
           str_remove("Quận|Huyện") %>%
           trimws(which = "both") %>%
           stri_trans_general("latin-ascii") %>%
           tolower()) %>%
  group_by(district,age) |>
  summarise(n = sum(n),.groups = "drop") |>
  mutate(across(age, ~ stringr::str_remove(.x, " tuổi| \\+") |> as.integer())) |>
  arrange(age) |>
  filter(age < 17)

## probability of CH1 admission
expected_catchment_CH1 <- left_join(ch1_outpatient,
          N_ad,
          by = join_by(district,age_gr == age)) %>% 
  na.omit(n.y) |> 
  mutate(p_hos = n.x/n.y) %>%  
  arrange(age_gr) %>% 
  group_by(age_gr) %>% 
  mutate(weight = p_hos/sum(p_hos),
         expected_cm = n.y*weight) %>% 
  ungroup() |> 
  group_by(period,age_gr) |> 
  summarise(e_admission = sum(expected_cm),.groups = "drop")  

## age profile of expected CH1 catchment
expected_age_cm <- expected_catchment_CH1 |> 
  group_by(period) |> 
  group_modify(~ gam(e_admission~s(age_gr),method = "REML",data = .) %>% 
                 predict(list(age_gr = seq(0,15,le = 512)),type="response") %>%
                 as.tibble() %>% 
                 mutate(age = seq(0,15,le = 512)))

expected_age_cm |> 
  ggplot(aes(x = age,y = value))+
  geom_line()+
  # geom_ribbon(aes(ymin = lwr,ymax=upr),fill = "blue",alpha = .3)+
  geom_point(aes(x = age_gr, y = e_admission),
             data = expected_catchment_CH1)+
  theme_bw()+
  facet_wrap(~period)+
  labs(y = "Expected admission",x = "Age (years)")

Point is the data and line is the fitted GAM model

Code

df_cases_ch1_23 <- df23 |> 
  filter(medi_cen %in% c("Bệnh viện Nhi đồng 1",
                         "Bênh viện Nhi Đồng 1",
                         "Bệnh viện Nhi Đồng 1")) 

## fig 2C
ts_ch1 <- df_cases_ch1_23 %>%
  group_by(adm_week,wave) %>%
  count() %>%
  ggplot(aes(x = adm_week, y = n,fill = wave))+
  geom_col(alpha = 0.6,color="black")+
  scale_x_date(date_breaks = "1 month",
               date_labels = "%b %Y",
               name = "Collection date",
               limits = c(as.Date("2023-01-01"),as.Date("2023-12-31")))+
  scale_y_continuous(limits = c(0,900),breaks = seq(0,900,le = 4))+
  scale_fill_manual(values = c("#582C83FF","#FFC72CFF"))+
  theme_minimal()+
  annotate("rect", fill = "grey",
           xmin = as.Date(c("2023-01-01","2023-04-05","2023-08-02","2023-12-06")),
           xmax = as.Date(c("2023-01-15","2023-04-26","2023-08-30","2023-12-27")),
           ymin = 0, ymax = Inf, alpha = .5)+
  labs(y = "Cases")+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=20),
         color = "none")

## fig 2D

df_cases_count_ch1_23 <- df_cases_ch1_23 |> 
  group_by(age,time) |> 
  count() |> 
  ungroup()
  
fit_gau_bs_ch1 <- gam(n ~ s(age,bs = "bs") + 
                    s(time,bs = "bs") + 
                    ti(age, time, bs = c("bs", "bs")) , data = df_cases_count_ch1_23,
                  family = gaussian(link = "log"),method = "REML")

age_val <- seq(0,6,le = 365)

collection_date_val <- seq(min(df23$adm_week),
                           max(df23$adm_week), le = 365)

new_data2 <- expand.grid(age = age_val,
                        time = as.numeric(collection_date_val))

new_data2$pred_gau_bs_ch1 <- predict(fit_gau_bs_ch1, newdata = new_data2, type = "response")

hm_ch1 <- ggplot(new_data2) +
  geom_raster(aes(x = as.Date(time), y = age, fill = pred_gau_bs_ch1),interpolate = TRUE) +
  geom_line(
    data = cohort_lines(),
    aes(x = date, y = age, group = cohort,color = trend),
    linewidth = 0.3,
    alpha = 0.4
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_fill_viridis_c(option = "turbo", n.breaks = 10) +
  theme_minimal() +
  labs(
    x = "Admission date",
    y = "Age (years)",
    fill = "Number of\nadmissions",
    tag = "D"
  )+
  theme_minimal()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(-0.5,6.5)),breaks = seq(-1,7))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=25),
         color = "none")

## calculate atk rate as a function of time
df_sero <- df_sero |>
  as_tibble() |>
  mutate(collection = id |>
           str_remove(".*-") |>
           as.numeric() |>
           divide_by(1e4) |>
           round(),
         col_date2 = as.numeric(col_date),
         across(pos, ~ .x > 0))

outttt <- age_profile_constrained_cohort2(df_sero)


## fig 2E
atk_sero <- map2(head(outttt, -15),
     tail(outttt,-15),
     ~ left_join(na.exclude(.x), na.exclude(.y), "cohort")|>
       mutate(attack = (fit.y - fit.x) / (1 - fit.x),
              date = as.Date(collection_time.y))) |> 
  bind_rows() |> 
  ggplot() +
  geom_raster(aes(x=date, y=cohort,fill = attack),interpolate = TRUE)+
  geom_line(
    data = cohort_lines(seq(0,15,by=1)),
    aes(x = date, y = age, group = cohort,color = trend),
    linewidth = 0.25,
    alpha = 0.3
  )+
  scale_fill_viridis_c(option = "turbo",name = "Attack rate")+
  theme_void()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(0,14)),breaks = seq(0,14,by=2))+
  scale_x_date(date_breaks = "1 month",
               date_labels = "%b",
               name = "Collection date in 2023",
               limits = c(as.Date("2023-01-01"),as.Date("2023-12-31")))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_text(size = 18),
        axis.ticks.x = element_blank(),
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(color = "none")


## fig 2B
age_profile_sp_cm <- outttt |> 
  bind_rows() |> 
  mutate(col_date = as.Date(collection_time)) |> 
  filter(col_date %in% as.Date(c("2022-12-29","2023-04-11","2023-08-13","2023-12-31"))) |> 
  mutate(col_lab = format(col_date, format = "%b %Y"),
         col_lab = factor(col_lab,levels = c("Dec 2022","Apr 2023","Aug 2023","Dec 2023"))) |> 
  ggplot(aes(x = cohort,y = fit))+
  geom_line()+
  geom_ribbon(aes(ymin = lwr,ymax = upr),fill = "blue", alpha = .3)+
  geom_point(data = df_sero |> 
               mutate(col_lab = factor(col_time,levels = c("Dec 2022","Apr 2023",
                                                          "Aug 2023","Dec 2023"))),
             aes(x = age, y = pos),shape = "|")+
  facet_wrap(~ col_lab,nrow = 1)+
  labs(y = "Seroprevalence (%)",x = "Age (years)",tag="B")+
  scale_y_continuous(labels = scales::label_percent(scale = 100),limits = c(0,1))+
  theme_bw()+
  theme(axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 18),
        legend.position = "none",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        strip.text.x = element_text(size = 18))


## fig 1A 

ouut <- list()
dat <- list()

for (i in 1:3){
  dat[[i]] <- df_cases_ch1_23 |> 
    filter(adm_date >= as.Date(mean_collection_times[i]) &
             adm_date <= as.Date(mean_collection_times[i+1]))
}


expected_incidence <- map2(head(outttt, -15),
                           tail(outttt,-15),
                           ~ left_join(na.exclude(.x), na.exclude(.y), "cohort")|>
                             mutate(attack = (fit.y - fit.x) / (1 - fit.x),
                                    date = as.Date(collection_time.y))) |> 
  bind_rows() |> 
  filter(date %in% as.Date(c("2023-04-11","2023-08-13","2023-12-31"))) |> 
  mutate(period = case_when(
    date <= as.Date("2023-04-30") ~ "12/2022 - 4/2023",
    date > as.Date("2023-04-30") & date <= as.Date("2023-08-31") ~ "4/2023 - 8/2023",
    date > as.Date("2023-08-31") ~ "8/2023 - 12/2023"
  )) |> 
  group_by(period) |> 
  group_split() |> 
  map(~ left_join(.x,expected_age_cm, by = join_by(cohort == age,period))) %>% 
  bind_rows(.id = "id") |> 
  mutate(sp_gap = fit.y-fit.x,
         exp_inc = sp_gap*value)

exp_in_ch1 <- expected_incidence |> 
  ggplot()+
  geom_line(aes(x = cohort,y = exp_inc))+
  geom_histogram(data = dat %>% bind_rows(.id = "id") %>% filter(age1 >0),
                 aes(age1),binwidth = 0.5,
                 color = "white",fill = "black",alpha = 0.2)+
  facet_wrap(~factor(id,labels = c("12/2022 - 4/2023",
                                   "4/2023 - 8/2023",
                                   "8/2023 - 12/2023")))+
  labs(y = "Expected incidence",x = "Age (years)",tag = "A")+
  coord_cartesian(ylim = c(0,1200))+
  theme_bw()+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 18),
    legend.text = element_text(size = 18),
    plot.tag = element_text(face = "bold", size = 18),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    legend.title = element_text(size = 18),
    strip.text = element_text(size = 18),
    panel.grid.minor.x = element_blank())

Code

fig2 <- exp_in_ch1/
  age_profile_sp_cm/
  (ts_ch1+labs(tag="C"))/
  (hm_ch1+labs(tag="D"))/
  (atk_sero+labs(tag="E"))

ggsave("./fig_manuscript/fig2_new.svg",plot = fig2, 
       width = 15,height = 20,dpi = 500)

Explain how I generated figure 2E. I used constrainted algorithm. I mentioned the algorithm overhere: Using GAMs to model seroprevalence as a function of both age and time using a 2-step algorithm.

-* Step 1 (age profile): for each time point (i.e. samples collection):

fit an unconstrained binomial GAM to seropositivity as a function of age
convert population seroprevalence into individual seropositivity realizations: for each value of a large vector of age values:
- generate the prediction + confidence interval
- from each of the rate values of the prediction and confidence interaval lower and upper bounds, generate random realizations of a Bernoulli process to convert population seroprevalence into individual seropositivity
Step 2a (epidemiological time): for each value of age:
- fit a monotonically increasing binomial GAM to the Bernoulli realizations as a function of time
- for each time point: generate the prediction + confidence interval
Step 2b (smoothing out the stochasticity introduced by the seroprevalence to seropositivity conversion): for each time point fit an unconstrained beta GAMs to the predictions and confidence interval bounds of step 2a as a function of age

But in the step 2b, instead of fitting 4 collection time points, I fitted 52 colection time point as 52 week, then I exclude the age profile of seroprevalence before April 2023. And I calculated attack rate followed \(attack \ racte = \frac{seroprevalence_{a,t}-seroprevalence_{a,t-1}}{1 - seroprevalence_{a,t-1}}\). And plot it by ggplot

9.7 Figure 3

Try to model virology as a function of age and time

In this section, I used the GAM fitted as a function of age and time of CH1 reported cases (used to generate heatmap in section 9.5.1 but for CH1 cases only) (1), combined with GAM fitted as a function of age and time of virological data (used to generate figure 3C)(2). Then I multiplied \(reported \ cases (1) \times EV-A71 \ percentage (2)\) at each age and time then I had number of ev-a71 of each age and time. Then I group by each time and sum ev-a71 cases at all age group to generate the plot below.

Code

time_f <- df_cases_ch1_23 %>%
  group_by(adm_week) %>%
  count() %>%
  ungroup()

df_viro <- df_viro %>%
  mutate(pos = case_when(
    sero_gr == "EV-A71" ~ TRUE,
    sero_gr != "EV-A71" ~ FALSE),
    pos_cv = case_when(
      sero_gr == "CV-A6" ~ TRUE,
      sero_gr != "CV-A6" ~ FALSE),
    peak = case_when(
      month(admission_date) >= 6 & month(admission_date) <= 8 ~ "6/2023 - 8/2023",
      month(admission_date) >= 9 ~ "9/2023 - 12/2023"
    ),
    adm_week = as.Date(floor_date(admission_date, "week")),
    adm_month = as.Date(floor_date(admission_date, "month")),
    time = as.numeric(admission_date)
  )

# dt_vr <- df_viro %>%
#   group_by(adm_month) %>%
#   count(pos) %>%
#   ungroup() %>%
#   pivot_wider(names_from = pos,values_from = n,names_prefix = "pos") %>%
#   replace(is.na(.),0) %>%
#   mutate(total = posTRUE+posFALSE,
#          per = posTRUE/total,
#          time = adm_month %>% as.numeric()
#          )%>%
#   rowwise() %>%                          # needed for prop.test()
#   mutate(
#     lwr = prop.test(posTRUE, total)$conf.int[1],   # 95% CI lower bound
#     upr = prop.test(posTRUE, total)$conf.int[2]    # optional upper bound
#   )


# modelll <- gam(pos~s(time),family = binomial,method = "REML",data = df_viro)
# 
# link_inv <- modelll$family$linkinv
# 
# recon_epicurve_dt <- modelll %>%
#   predict(list(time = time_f %>%
#                  filter(adm_week >= min(df_viro$admission_date)) %>%
#                  pull(adm_week) %>%
#                  as.numeric()
#   ),se.fit = TRUE) %>%
#   as_tibble() %>%
#   mutate(time = time_f %>%
#            filter(adm_week >= min(df_viro$admission_date)) %>%
#            pull(adm_week),
#          lwr = link_inv(fit - 1.96*se.fit),
#          upr = link_inv(fit + 1.96*se.fit),
#          fit = link_inv(fit)
#          ) %>%
#   dplyr::select(-se.fit) %>%
#   left_join(time_f,., by = join_by(adm_week == time)) %>%
#   mutate(e_ev71 = fit*n,
#          e_ev71_lwr = lwr*n,
#          e_ev71_upr = upr*n) %>%
#   replace(is.na(.),-1) 
# 
# recon_epicurve <- recon_epicurve_dt %>%
#   ggplot(aes(x = adm_week))+
#   geom_line(aes(y = fit*1000),linetype = "dashed")+
#   geom_ribbon(aes(y = fit*1000,ymin = lwr*1000,ymax = upr*1000),fill = "blue",alpha = .3)+
#   geom_line(aes(y = e_ev71))+
#   geom_ribbon(aes(y = e_ev71_upr,ymin = e_ev71_lwr,ymax = e_ev71_upr),fill = "red",alpha = .5)+
#   scale_y_continuous(
#     name = "CH1 admission",
#     sec.axis = sec_axis(~ . /1000, name = "EV-A71 percentage"),
#     limits = c(0,1000)
#   ) +
#   geom_col(aes(y=n),alpha = 0.3)+
#   geom_point(data = df_viro, aes(x = admission_date, y = pos*1000),shape = "|")+
#   scale_x_date(date_breaks = "1 month",
#                date_labels = "%b",
#                limits = c(as.Date("2023-01-01"),as.Date("2023-12-31")))+
#   theme_minimal()+
#   theme(axis.title.y = element_text(size = 18),
#         axis.title.x = element_blank(),
#         axis.ticks.x = element_blank(),
#         legend.position = "bottom",
#         plot.tag = element_text(face = "bold", size = 18),
#         axis.text.x = element_blank(),
#         axis.text.y = element_text(size = 18),
#         legend.text = element_text(size = 15),
#         legend.title = element_text(size = 18))

per_serotype <- df_viro %>%
  group_by(adm_month) %>%
  count(sero_gr) %>%
  mutate(total = sum(n),
         per = n/total) %>%
  ggplot(aes(x = adm_month,y=per,fill = sero_gr))+
  geom_bar(position="fill", stat="identity")+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",
               limits = c(as.Date("2023-01-01"),as.Date("2023-12-31")))+
  scale_y_continuous(labels = scales::label_percent())+
  labs(fill = "Serotype group",y = "Percentage (%)",tag = "B")+
  theme_minimal()+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))

## model ev-a71 percentage as a function of time and age

model_viro_age_time <- gam(pos ~ s(age_adm) + s(time) + ti(age_adm,time),
                           family = binomial,method = "REML",data = df_viro)

collection_date_val <- seq(min(df_viro$time),
                           max(df_viro$time), le = 365)

new_data2 <- expand.grid(age_adm = age_val,
                         time = as.numeric(collection_date_val))

new_data2$pred <- predict(model_viro_age_time, newdata = new_data2, type = "response")


ev_a71_age_time_p <- ggplot() +
  geom_raster(data = new_data2, 
              aes(x = as.Date(time), y = age_adm, fill = pred), interpolate = TRUE) +
  scale_fill_viridis_c(option = "turbo",n.breaks = 10) +
  geom_line(
    data = cohort_lines(viro = TRUE),
    aes(x = date, y = age, group = cohort,color = trend),
    linewidth = 0.25,
    alpha = 0.3
  ) +
  labs(x = "Week", y = "Age", fill = "Smoothed EV-A71 percentage")+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",
               limits = c(as.Date("2023-01-01"),as.Date("2023-12-31")))+
  theme_void()+
  scale_y_reverse(name = "Age (years)",lim= rev(c(-0.5,6.5)),breaks = seq(-1,7))+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=25),
         color = "none")

# fig3 <- (recon_epicurve+labs(tag="A"))/
#   (per_serotype+labs(tag="B"))/
#   (ev_a71_age_time_p+labs(tag="C"))
# 
# ggsave("./fig_manuscript/fig3_new.svg", plot = fig3,
#        width = 13,height = 12,dpi = 500)

Code

# df_cases_count_ch1_23 <- df_cases_ch1_23 |> 
#   group_by(age,time) |> 
#   count() |> 
#   ungroup()
#   
# fit_gau_bs_ch1 <- gam(n ~ s(age,bs = "bs") + 
#                     s(time,bs = "bs") + 
#                     ti(age, time, bs = c("bs", "bs")) , data = df_cases_count_ch1_23,
#                   family = gaussian(link = "log"),method = "REML")

cases_fit <- fit_gau_bs_ch1 |>  
    predict(newdata = df_cases_count_ch1_23, se.fit = TRUE) |> 
    as.tibble() |> 
    cbind(df_cases_count_ch1_23) |>
    mutate(
      # c_lwr = fit_gau_bs_ch1$family$linkinv(fit - 1.96 * se.fit),
      # c_upr = fit_gau_bs_ch1$family$linkinv(fit + 1.96 * se.fit),
      c_fit = fit_gau_bs_ch1$family$linkinv(fit)
           ) |> 
    dplyr::select(-c(se.fit,fit))
    
recontructed_df <- model_viro_age_time |>
  predict(newdata = df_cases_count_ch1_23 |> rename(age_adm = age), se.fit = TRUE) |>
  as.tibble() |>
  cbind(df_cases_count_ch1_23) |>
  mutate(v_lwr = model_viro_age_time$family$linkinv(fit - 1.96 * se.fit),
         v_upr = model_viro_age_time$family$linkinv(fit + 1.96 * se.fit),
         v_fit = model_viro_age_time$family$linkinv(fit)) |>
  dplyr::select(-c(se.fit,fit)) |>
  full_join(cases_fit, by = join_by(age, time,n)) |>
  mutate(ev71_c = c_fit*v_fit,
         ev71_u = c_fit*v_upr,
         ev71_l = c_fit*v_lwr)

recontructed_cases_count_df <- recontructed_df |>
  mutate(day = as.Date(time),
         week = as.Date(floor_date(day, "week"))) |> 
  group_by(week) |>
  summarise(cases = sum(c_fit),
            # cases_u = sum(c_upr),
            # cases_l = sum(c_lwr),
            ev71_cases = sum(ev71_c),
            ev71_cases_u = sum(ev71_u),
            ev71_cases_l = sum(ev71_l)) 

f3a <- recontructed_cases_count_df |> 
  ggplot(aes(x = week))+
  geom_line(aes(y = cases),linetype = "dashed")+
  # geom_ribbon(aes(ymin = cases_u,ymax = cases_l),fill = "blue",alpha = .3)+
  geom_line(aes(y = ev71_cases))+
  geom_ribbon(aes(ymin = ev71_cases_u,ymax = ev71_cases_l),fill = "red",alpha = .3)+
  geom_col(data = df_cases_count_ch1_23 |>
             mutate(day = as.Date(time),
                    week = as.Date(floor_date(day, "week")))|>
             group_by(week) |>
             summarise(cases = sum(n)),
           aes(x = week, y  = cases),alpha = .3)+
  geom_point(data = df_viro, aes(x = admission_date, y = pos*1000),shape = "|",size = 2)+
  scale_x_date(date_breaks = "1 month",
               date_labels = "%b",
               name = "Collection date",
               limits = c(as.Date("2023-01-01"),as.Date("2023-12-31"))) +
  labs(x = "Admission day (2023)",y = "Total number of cases",tag = "A") +
  theme_minimal()+
  theme(axis.title.y = element_text(size = 18),
        axis.title.x = element_text(size = 18),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+
  guides(fill=guide_colourbar(barwidth=20),
         color = "none")

f3a

The bar show the raw data of reported cases admitted to CH1 at each date. The dased line is the fitted GAM model. The solid line is the reconstructed EV-A71 cases.

And compare with the expected infectious estimated from serological data

Code

compare_ev71 <- recontructed_df |> 
  mutate(day = as.Date(time),
         week = as.Date(floor_date(day, "week")),
         period = case_when(
           week <= as.Date("2023-04-30") ~ "12/2022 - 4/2023",
           week > as.Date("2023-04-30") & week <= as.Date("2023-08-31") ~ "4/2023 - 8/2023",
           week > as.Date("2023-08-31") ~ "8/2023 - 12/2023"
         )) |> 
  group_by(period,age) |> 
  summarise(ev_a71_cases = sum(ev71_c),
            raw_n = sum(n),
            .groups = "drop") |> 
  ggplot(aes(x = age, y = ev_a71_cases))+
  geom_col(alpha = 0.3)+
  geom_line(data = expected_incidence,
            aes(x = cohort,y=exp_inc))+
  scale_x_continuous(limits = c(0,15))+
  facet_wrap(~period)+
  labs(y = "Expected incidence",x = "Age (years)",tag = "A")+
  theme_bw()+
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_text(size = 18),
    legend.text = element_text(size = 18),
    plot.tag = element_text(face = "bold", size = 18),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    legend.title = element_text(size = 18),
    strip.text = element_text(size = 18),
    panel.grid.minor.x = element_blank())


compare_ev71

Combined all analysis in this section

Code

fig3_new <- (compare_ev71+labs(tag="A"))/
  (f3a+labs(tag="B"))/
  (per_serotype+labs(tag="C"))/
  (ev_a71_age_time_p+labs(tag="D"))

ggsave("./fig_manuscript/fig3_new2.svg", plot = fig3_new,
       width = 13,height = 15,dpi = 500)

9.8 Supplementary plot

Code

df23 %>%
  group_by(adm_week, district) %>%
  count() %>%
  ggplot(aes(x = adm_week, y = n)) +
  geom_col() +
  scale_x_date(
    date_breaks = "2 months",
    date_labels = "%b",
    name = "Admission week",
    limits = c(as.Date("2023-01-01"), as.Date("2023-12-31"))
  ) +
  theme_bw() +
  labs(y = "Number of admission") +
  facet_wrap(~district, ncol = 4) +
  theme(
    axis.title.x = element_text(size = 15),
    axis.text.x = element_text(size = 15),
    axis.ticks.x = element_blank(),
    legend.position = "none",
    plot.tag = element_text(face = "bold", size = 18),
    axis.title.y = element_text(size = 18),
    axis.text.y = element_text(size = 18),
    strip.text.x = element_text(size = 18)
  )

Code

ggsave("./fig_manuscript/sup1_new.svg",
       width = 12,height = 12,dpi = 500)

9.8.1 compare the age distributions in the hospital data with the age distribution in the districts where the hospital patients are coming from

Code

df_cases_ch1_23 |> 
  mutate(age3 = ceiling(age1)) |> 
  group_by(age3,district2) |> 
  count() |> 
  left_join(ch1_outpatient |> 
              group_by(district,age_gr) |> 
              summarise(n = sum(n)),
            by = join_by(district2 == district,age3 == age_gr)) |> 
  ungroup() |> 
  set_colnames(c("age","district","hfmd_cases","outpatient")) |> 
  ggplot(aes(x = age))+
  geom_line(aes(y = outpatient))+
  geom_point(aes(y = hfmd_cases*6))+
  scale_y_continuous(
    name = "Outpatients admissions",
    # Define the secondary axis
    sec.axis = sec_axis(
      trans = ~ . / 6, 
      name = "HFMD cases")
  )+
  facet_wrap(~district)

The lines show age distribution of outpatients cases admitted to CH1 in 2023. The points show age distribution of HFMD cases admitted to CH1 in 2023