4  Manuscript figure

Code
library(readxl)
library(tidyverse)
library(gtsummary)
library(patchwork)
library(lubridate)
library(stringr)
library(mgcv)
library(janitor)
library(predtools)
library(magrittr)
library(slider)
invisible(Sys.setlocale("LC_TIME", "English"))

## data import

## case notification
df1 <- read_excel("D:/OUCRU/hfmd/data/TCM_full.xlsx",
                  col_types = c("date", "numeric", "text",
                                "text", "text", "date", "date", "date",
                                "text", "text", "text"))
colnames(df1) <- c("dob", "age", "gender", "commune", "district",
                   "reported_date", "onset_date","adm_date",
                   "medi_cen","inout","severity")
df1$dob <- df1$dob %>% as_date()
df1$adm_date <- df1$adm_date %>% as_date()

df1$age1 <- interval(df1$dob, df1$adm_date) / years(1)
df1$adm_week <- as.Date(floor_date(df1$adm_date, "week"))
df1$district <- df1$district %>% str_replace_all(
  c( "Quận Gò vấp"  = "Quận Gò Vấp"))
df1$district <- df1$district %>%
  str_remove("Quận|Huyện|Thành phố") %>%
  trimws(which = "both")

## serological data

apr_2023 <- read_excel("D:/OUCRU/hfmd/data/4_2023.xlsx")
aug_2023 <- read_excel("D:/OUCRU/hfmd/data/08_2023.xlsx")
dec_2022 <- read_excel("D:/OUCRU/hfmd/data/12_2022.xls")
dec_2023 <- read_excel("D:/OUCRU/hfmd/data/12_2023.xlsx")

t423 <- data.frame(apr_2023[-c(1,2),c(6,8,10:14)])
t423$pos <- replace(t423$...14,is.na(t423$...14),0) %>%
  str_detect(regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t423) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t423$age <- as.numeric(t423$age)
t423$col_time <- rep("Apr 2023",nrow(t423))


t823 <- data.frame(aug_2023[-c(1,2),c(6,8,9,14:17)])
t823$pos <- str_detect(t823$...17,regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t823) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t823$age <- as.numeric(t823$age)
t823$col_time <- rep("Aug 2023",nrow(t823))

t1222 <- data.frame(dec_2022[-c(1,2),c(6,8,10:14)])
t1222$pos <- replace(t1222$...14,is.na(t1222$...14),0) %>%
  str_detect(regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t1222) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t1222$age <- as.numeric(t1222$age)
t1222$col_time <- rep("Dec 2022",nrow(t1222))


t1223 <- data.frame(dec_2023[-c(1,2),c(6,8,9,14:17)])
t1223$pos <- replace(t1223$...17,is.na(t1223$...17),0) %>%
  str_detect(regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t1223) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t1223$age <- as.numeric(t1223$age)
t1223$col_time <- rep("Dec 2023",nrow(t1223))

## census data

census2019 <- readRDS("D:/OUCRU/hfmd/data/census2019.rds")
Code
# function

slide_age <- function(time1,age1,w1,s1){
  df1 <- data.frame(time1,age1) %>%                   ## line listing data frame
    filter(!is.na(time1) & !is.na(age1)) %>%
    arrange(time1)

  a_df1 <- df1 %>% count(time1)                      ## aggregate data frame

  total1 <- nrow(a_df1)
  spots1 <- seq(from = 1, to = (total1 - w1 + 1), by = s1)

  out_total <- data.frame()

  for (i in 1:length(spots1)){
    range1 <- data.frame(a_df1[spots1[i]:(spots1[i] + w1 - 1),1])
    result1 <- df1$age1[df1$time1 >= range1[1,] & df1$time1 <= range1[w1,]]
    out <- data.frame(date = rep(as.character(range1[ceiling(w1/2),]),length(result1)),
                      age = result1)
    out <- out[order(out$age),]
    out_total <- rbind(out_total,out)
  }
  output <- list()
  output$wdat <- out_total
  output$adat <- out_total %>% count(date)
  output$parms <- data.frame(w = w1,s =s1)
  return(output)
}


# case_noti <- function(timee, agee,lab = FALSE){
# 
#   dat <- data.frame(timee,agee) %>%
#     filter(!is.na(timee) & !is.na(agee)) %>%
#     count(timee)
# 
#   ts <- ggplot(dat, aes(x =timee, y = n)) +
#     geom_bar(stat = "identity") +
#     labs(y = "Cases") +
#     theme_minimal()
# 
#   if(lab == TRUE){
#     ts
#   } else {
#     ts + theme(axis.title.x = element_blank(),
#                axis.text.x = element_blank(),
#                axis.ticks.x = element_blank())
#   }
# 
# }

4.1 Figure 1: heat map

Code
library(paletteer)
df23 <- df1 %>% filter(year(adm_week) == 2023)

co <- data.frame()

for (i in 0:6){
  gen <- seq(0,1,le=52) + i
  co <- rbind(co,gen)
}


df23 <- df1 %>% filter(year(adm_date) == 2023)
wwww <- slide_age(time1 = df23$adm_date,
                  age1 =  df23$age1,
                  w1 = 7, s1=7)

ch <- data.frame(date = wwww$adat$date,
                 c0 = as.numeric(co[1,]),
                 c1 = as.numeric(co[2,]),
                 c2 = as.numeric(co[3,]),
                 c3 = as.numeric(co[4,]),
                 c4 = as.numeric(co[5,]),
                 c5 = as.numeric(co[6,]))

leb_month <- c("Jan",rep("",3),"Feb",rep("",3),"Mar",rep("",4),"Apr",rep("",3),
               "May",rep("",4),"Jun",rep("",3),"Jul",rep("",3),"Aug",rep("",4),
               "Sep",rep("",3),"Oct",rep("",3),"Nov",rep("",4),"Dec",rep("",3))

ts <- data.frame(wwww$wdat$date,wwww$wdat$age) %>%
  filter(!is.na(wwww$wdat$date) & !is.na(wwww$wdat$age)) %>%
  count(wwww$wdat$date) %>%
  set_colnames(c("time","n")) %>% 
  mutate(peak = c(rep("1st",36),rep("2nd",nrow(.)-36))) %>% 
  ggplot(aes(x = time, y = n,fill = peak)) +
  geom_col(alpha = 0.6,color="black") +
  geom_vline(xintercept = 36.5) +
  theme_minimal()+
  scale_y_continuous(name = "Cases",
                     breaks = seq(0,3000,by = 1000),
                     limit =c(0,3000), 
                     minor_breaks = NULL)+
  scale_fill_manual(values = c("#582C83FF","#FFC72CFF"))+
  labs(tag = "A")+ 
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "none",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18))

hm <- ggplot(data=wwww$wdat, aes(x=date, y=age)) +
      stat_density(
        aes(fill = after_stat(density)),
        geom = "raster",
        position = "identity"
      )+
      scale_fill_paletteer_c("grDevices::Inferno")+
      # scale_fill_gradient(low="#040404FF", high= "#FFFE9EFF")+
      # scale_fill_distiller(palette = "Blues")+
      theme_minimal()+
      scale_y_reverse(name = "Age (years)",lim= rev(c(0,6)),breaks = seq(0,6))+
      scale_x_discrete(name = "Admission week",labels = leb_month)+
      labs(tag = "B",fill = "Density")+
      # theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1,size = 8))+
      geom_line(data = ch,aes(x = date,y = c0),col = "white",
                group = 1,lwd = 0.25,alpha = 0.8)+
      geom_line(data = ch,aes(x = date,y = c1),col = "white",
                group = 1,lwd = 0.25,alpha = 0.8)+
      geom_line(data = ch,aes(x = date,y = c2),col = "white",
                group = 1,lwd = 0.25,alpha = 0.8)+
      geom_line(data = ch,aes(x = date,y = c3),col = "white",
                group = 1,lwd = 0.25,alpha = 0.8)+
      geom_line(data = ch,aes(x = date,y = c4),col = "white",
                group = 1,lwd = 0.25,alpha = 0.8)+
      geom_line(data = ch,aes(x = date,y = c5),col = "white",
                group = 1,lwd = 0.25,alpha = 0.8)+
      theme(axis.title.y = element_text(size = 18),
            axis.ticks.x = element_blank(),
            legend.position = "bottom",
            plot.tag = element_text(face = "bold", size = 18),
            axis.title.x = element_text(size = 18),
            axis.text.x = element_text(size = 18),
            axis.text.y = element_text(size = 18),
            legend.text = element_text(size = 15),
            legend.title = element_text(size = 18))+ 
  guides(fill=guide_colourbar(barwidth=20,label.position="bottom")) 

fi_peak <- df1 %>% filter(year(adm_date) == "2023") %>%
  filter((adm_date <= as.Date("2023-09-03")&
            !is.na(adm_date) & !is.na(age1)))

se_peak <- df1 %>% filter(year(adm_date) == "2023") %>%
  filter((adm_date > as.Date("2023-09-03")) &
           !is.na(adm_date) & !is.na(age1))

data <- data.frame(
  peak = c( rep("1st wave",nrow(data.frame(se_peak$age1))),
            rep("2nd wave",nrow(data.frame(fi_peak$age1)))),
  age = c( fi_peak$age1, se_peak$age1 )
)

## density plot

ad <- ggplot(data=data, aes(x=age, group=peak, fill=peak)) +
      geom_density(alpha = 0.6) +
      scale_fill_manual(values = c("#582C83FF","#FFC72CFF")) +
      scale_x_reverse(limit = c(6,0),breaks = seq(0,6,by=1), 
                      minor_breaks = NULL)+
      scale_y_continuous(minor_breaks = NULL)+
      coord_flip()+
      theme_minimal()+
      labs(x = "Age", y ="Density",tag = "C",fill = "Outbreak")+
      theme(axis.title.y = element_blank(),
            axis.ticks.x = element_blank(),
            legend.position = "bottom",
            plot.tag = element_text(face = "bold", size = 18),
            axis.title.x = element_text(size = 18),
            axis.text.x = element_text(size = 18),
            axis.text.y = element_text(size = 18),
            legend.text = element_text(size = 18),
            legend.title = element_text(size = 18))
Code
((ts/hm)|(plot_spacer()/ad))+
  plot_layout(widths = c(2.5,1))

Code
ggsave("./fig_manuscript/fig1.svg",
       width = 15,height = 8,dpi = 500)
Code
data %>% 
  tbl_summary(by = peak,
              digits = list(all_continuous() ~ 3)) %>% 
  add_overall() %>% 
  add_p()
Characteristic Overall
N = 43,372
1
1st wave
N = 20,358
1
2nd wave
N = 23,014
1
p-value2
age 2.536 (1.679, 3.723) 2.649 (1.784, 3.726) 2.393 (1.548, 3.721) <0.001
1 Median (Q1, Q3)
2 Wilcoxon rank sum test

4.2 Figure 2: Compare attack rate and seroprevalence per age group over time

Code
df23$agr <- cut(df23$age1, breaks = c(0, 1, 2, 3, 4,5,6,Inf), right = F)
df23$agr <- factor(df23$agr, labels = c("[0-1)", "[1-2)", "[2-3)",
                                        "[3-4)","[4-5)","[5-6)","6+"))

df23$age_gr2 <- cut(df23$age1+0.00000001, breaks = c(seq(0, 15, by = 3),82),
                       labels = c("<0 & ≤3 year-old",
                                  "<3 & ≤6 year-old",
                                  "<6 & ≤9 year-old",
                                  "<9 & ≤12 year-old",
                                  "<12 & ≤15 year-old",
                                  "16+ "))

hcm19 <- census2019 %>% filter(province == "Thành phố Hồ Chí Minh")

hcm19$district <- hcm19$district %>%
  str_remove_all("Quận|Huyện") %>%
  str_replace_all(
    c("\\b2\\b|\\b9\\b"  = "Thủ Đức")) %>%
  trimws(which = "both")




hcm19$age2 <- word(hcm19$age,end = 1) |> as.numeric()

hcm19$age3 <- cut(hcm19$age2,breaks = c(seq(0, 15, by = 3),82),
                     labels = c("<0 & ≤3 year-old",
                                "<3 & ≤6 year-old",
                                "<6 & ≤9 year-old",
                                "<9 & ≤12 year-old",
                                "<12 & ≤15 year-old",
                                "16+"))

pop_agegr <- hcm19 %>%
  group_by(age3) %>%
  summarise(n = sum(n))

# atk_plot_df <- df23 %>%
#   select(adm_week,age_gr2)%>%
#   count(age_gr2,adm_week) %>%
#   left_join(pop_agegr,by = join_by(age_gr2 == age3)) %>%
#   group_by(age_gr2) %>%
#   mutate(pop = n.y - cumsum(lag(n.x, default = 0)),
#          atr = n.x/pop)  %>%
#   select(-n.y)
# 
# 
# atk_rate <- atk_plot_df %>% na.omit() %>% filter(age_gr2 != "16+") %>%
#   ggplot(aes(x = adm_week,y=log(atr*100000))) +
#   # geom_bar(stat = "identity")+
#   geom_line()+
#   facet_wrap(vars(age_gr2),ncol = 5)+
#   # scale_x_date(limits = as.Date(c("2023-01-01","2023-12-31")))+
#   coord_cartesian(xlim = as.Date(c("2023-01-01","2023-12-31")),
#                   ylim = c(-2.5,7.5))+
#   scale_x_date(date_breaks = "3 months",date_labels = "%b %Y")+
#   theme_bw()+
#   labs(x = "Admission week",y= "Log attack rate",tag = "A")+
#   theme(axis.text.x = element_blank(),
#         axis.ticks.x = element_blank())

atk_rate <- df23 %>%
  select(adm_week,age_gr2)%>%
  count(age_gr2,adm_week) %>%
  left_join(pop_agegr,by = join_by(age_gr2 == age3)) %>%
  group_by(age_gr2) %>%
  mutate(pop = n.y - cumsum(lag(n.x, default = 0)),
         atr = cumsum(n.x)/pop)  %>%
  select(-n.y) %>% 
  mutate(
    attack_rate_ma = slide_dbl(
      .x      = atr,
      .f      = mean,         # function to compute
      .before = 4,            # 4 weeks before = 5-week window total
      .after  = 0,
      .step = 1,
      .complete = TRUE        # only compute for full windows
    )
  ) %>%
  ungroup() %>% na.omit() %>% filter(age_gr2 != "16+") %>% 
  ggplot(aes(x = adm_week,y=log10(attack_rate_ma))) +
  # geom_bar(stat = "identity")+
  geom_line()+
  facet_wrap(vars(age_gr2),ncol = 5)+
  scale_x_date(breaks = seq(as.Date("2022-12-01"),as.Date("2023-12-31"), le = 4),
               date_labels = "%b %Y", minor_breaks = NULL)+
  scale_y_continuous(minor_breaks = NULL)+
  # scale_x_date(date_breaks = "4 months",date_labels = "%b %Y")+
  coord_cartesian(xlim = as.Date(c("2022-12-01","2023-12-31")))+
  theme_bw()+
  labs(x = "Admission week",y= "Log 10 cumulative attack rate",tag = "A")+ 
  theme(axis.title.x = element_text(size = 18),
        axis.text.x = element_blank(),
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        strip.text.x = element_text(size = 15))


## sero_prevalence by age group

sero <- rbind(t1222,t1223,t423,t823)

sero$age_gr2 <- cut(sero$age+0.00000001, breaks = seq(0, 15, by = 3),
                       labels = c("<0 & ≤3 year-old",
                                  "<3 & ≤6 year-old",
                                  "<6 & ≤9 year-old",
                                  "<9 & ≤12 year-old",
                                  "<12 & ≤15 year-old"))

sero$col_date <- make_date(sero$col_year,sero$col_month,sero$col_day)

sero$age_gr2 <- factor(sero$age_gr2,
                         levels = c("<0 & ≤3 year-old",
                                    "<3 & ≤6 year-old",
                                    "<6 & ≤9 year-old",
                                    "<9 & ≤12 year-old",
                                    "<12 & ≤15 year-old"))

## observation seroprevalence
te_sp <- sero %>% 
  group_by(age_gr2,col_time) %>% 
  count(pos) %>% 
  pivot_wider(names_from = pos, values_from = n) %>%
  ungroup() %>% 
  mutate(tot = `0`+`1`,
         sp = `1`/(`0`+`1`)) %>% 
  as.data.frame()

for (i in 1:nrow(te_sp)) {
  te_sp$lwr[i] <- prop.test(te_sp$`1`[i],te_sp$tot[i],correct=TRUE)$conf.int[1]
  te_sp$upr[i] <- prop.test(te_sp$`1`[i],te_sp$tot[i],correct=TRUE)$conf.int[2]
}


col_ddd<- sero %>% 
  group_by(age_gr2,col_time) %>% 
  summarise(mean = mean(col_date)) %>%
  ungroup()

te_sp$col_date <- col_ddd$mean

sp_agr <- ggplot(sero,
       aes(x = col_date, y = pos)) +
  # geom_jitter(height = 0.05)+
  geom_point(aes(x = col_date, pos),
             shape = "|")+
  geom_smooth(fill = "blue",alpha = 1/10,
              method = mgcv::gam,formula = y ~ s(x, bs = "bs"),
              method.args = list(method = "REML",link = "logit",
                                 family = "binomial"))+
  geom_point(data = te_sp,
             aes(x  = col_date,y = sp))+
  geom_errorbar(data = te_sp,
                aes(x  = col_date,y = sp,ymin = lwr, ymax = upr),alpha = .5)+
  facet_wrap(~age_gr2,
             ncol = 5)+
  labs(x = "Collection date", y  = "Seroprevalence (%)",tag = "B")+
  scale_x_date(breaks = seq(as.Date("2022-12-01"),as.Date("2023-12-31"), le = 4),
               date_labels = "%b %y", minor_breaks = NULL)+
  scale_y_continuous(labels = scales::label_percent(), minor_breaks = NULL)+
  # scale_x_date(date_breaks = "4 months",date_labels = "%b %Y")+
  coord_cartesian(ylim = c(0, 1),xlim = as.Date(c("2022-12-01","2023-12-31")))+
  theme_bw()+
  theme(axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 12,vjust = 0.5,
                                   hjust = 0.5,angle = 45),
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        strip.text.x = element_text(size = 15))
Code
atk_rate/
  sp_agr

Code
ggsave("./fig_manuscript/fig3.svg",
       width = 12,height = 7,dpi = 500)

4.3 Figure 3: seroepidemiology

4.3.1 GLM

Code
predict2 <- function(x, ci = .95, le = 512, m = 100) {
  p <- (1 - ci) / 2

  link_inv <- x$family$linkinv
  dataset <- x$data
  n <- nrow(dataset) - length(x$coefficients)
  age_range <- range(dataset$age)

  ages <- seq(age_range[1], age_range[2], le = le)

  x |>
    predict(data.frame(age = ages), se.fit = TRUE) |>
    extract(c("fit", "se.fit")) %>%
    c(age = list(ages), .) |>
    as_tibble() |>
    mutate(lwr = m * link_inv(fit + qt(    p, n) * se.fit),
           upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
           fit = m * link_inv(fit)) |>
    select(- se.fit)
}
Code
m <- 100
eps <- 1
plot1222 <-  glm(pos ~ age, binomial, data = t1222) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2022"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#0808cf")+
  ylim(0,101)+
  theme_classic()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2022" = "#0808cf"))+
  labs(y = "Seroprevalence (%)",x = "Age")+
  geom_point(data= t1222, aes(x = age,y = m * pos + eps),shape = "|",
             col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Dec 2022"),size = 6)

plot0423 <-  glm(pos ~ age + I(age ^2), binomial, data = t423) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Apr 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#ed097b")+
  ylim(0,101)+
  theme_classic()+
  scale_color_manual(name = "Y series",
                     values = c("Apr 2023" = "#ed097b"))+
  labs(y = "Seroprevalence (%)",x = "Age")+
  geom_point(data= t423, aes(x = age, m * pos + eps),
             shape = "|",
             col = "#ed097b")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Apr 2023"),size = 6)

plot0823 <-  glm(pos ~ age + I(age^2) + I(age^3), binomial, data = t823) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Aug 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#ed6b00")+
  ylim(0,101)+
  theme_classic()+
  scale_color_manual(name = "Y series",
                     values = c("Aug 2023" = "#ed6b00"))+
  labs(y = "Seroprevalence (%)",x = "Age")+
  geom_point(data= t823, aes(x = age, m * pos + eps),
             shape = "|",
             col = "#ed6b00")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Aug 2023"),size = 6)

plot1223 <-  glm(pos ~ age + I(age^2) + I(age^3), binomial, data = t1223) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#33516b")+
  ylim(0,101)+
  theme_classic()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2023" = "#33516b"))+
  labs(y = "Seroprevalence (%)",x = "Age")+
  geom_point(data= t1223, aes(x = age, m * pos + eps),
             shape = "|",
             col = "#33516b")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Dec 2023"),size = 6)
Code
df_plot <- df1 %>% filter(year(adm_week) == "2023") %>%
  filter(!is.na(adm_week) ) %>%
  count(adm_week) %>% as.data.frame()

atdf <- rbind(t1222,t423,t823,t1223) %>%
  mutate(col_date = make_date(year = col_year,
                              month = col_month,
                              day = col_day))

ts2 <- ggplot()+
  geom_bar(data = df_plot, aes(x = as.Date(adm_week), y = n),stat = "identity",
           alpha = 0.5) +
  geom_point(aes(x = as.Date(col_date), y =  pos*3000),
            shape = "|",data = atdf)+
  labs(x = "Admission week",y = "Cases",tag = "B")+
  annotate("rect", fill = "blue",
           xmin = as.Date(c("2022-12-01","2023-04-01","2023-08-01","2023-12-01")), 
           xmax = as.Date(c("2022-12-31","2023-04-30","2023-08-30","2023-12-30")), 
           ymin = 0, ymax = Inf, alpha = .2)+
  theme_classic()+
  scale_y_continuous(breaks = seq(0,3000,by = 1000),
                     limit =c(0-1,3000+1))+
  scale_x_date(
    breaks = as.Date(c("2022-12-15", "2023-04-15", "2023-08-15", "2023-12-15")),
    labels = c("Dec 2022", "Apr 2023", "Aug 2023", "Dec 2023"),
    limits = c(as.Date("2022-11-24"),as.Date("2024-01-01")))+
  theme(
    axis.text.x = element_text(size = 18),
    axis.text.y = element_text(size = 18),
    legend.text = element_text(size = 18),
    plot.tag = element_text(face = "bold", size = 18),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18))
Code
glmfit <- (plot1222 | plot0423 | plot0823 | plot1223) 


fig3 <- glmfit/
  ts2

fig3+ 
  plot_annotation(tag_levels = c('A'))

Code
# ggsave("./fig_manuscript/fig2.svg",
#        width = 15,height = 7,dpi = 500)

4.3.2 GAM

Compare GAM fitting manually and geom_smooth

Code
predictg <- function(x, ci = .95, le = 512, m = 100) {
  p <- (1 - ci) / 2
  
  link_inv <- x$family$linkinv
  dataset <- x$model
  n <- nrow(dataset) - length(x$coefficients)
  age_range <- range(dataset$age)
  
  ages <- seq(age_range[1], age_range[2], le = le)
  
  x |>
    predict(data.frame(age = ages), se.fit = TRUE) |>
    extract(c("fit", "se.fit")) %>%
    c(age = list(ages), .) |>
    as_tibble() |>
    mutate(lwr = m * link_inv(fit + qt(    p, n) * se.fit),
           upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
           fit = m * link_inv(fit)) |>
    select(- se.fit)
}

df.list <- list(t1222,t423,t823,t1223)

## fit manually

fig2m <- lapply(df.list, function(x) 
            gam(pos~s(age,bs = "bs"),method = "REML",
                family = "binomial",data = x) |>
            predictg() %>% as.data.frame() 
            ) %>%
  bind_rows(.id = "label") %>% 
  mutate(col_date = case_when(
    label == 1 ~ "Dec 2022",
    label == 2 ~ "Apr 2023",
    label == 3 ~ "Aug 2023",
    label == 4 ~ "Dec 2023"
  )) %>% 
  ggplot(aes(x = age,y = fit, 
             fill = factor(col_date,levels = c("Dec 2022","Apr 2023",
                                               "Aug 2023","Dec 2023"))))+
  geom_line()+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5)+
  scale_fill_paletteer_d("nbapalettes::thunder")+
  ylim(0,101)+
  labs(y = "Seroprevalence (%)",x = "Age",tag = "Manually")+
  scale_x_continuous(breaks = seq(0,15,by = 3))+
  theme_bw()+
  facet_wrap(~factor(col_date,levels = c("Dec 2022","Apr 2023",
                                         "Aug 2023","Dec 2023")),
              ncol = 4)+ 
  theme(axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 18),
        legend.position = "none",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        strip.text.x = element_text(size = 18))

## geom_smooth

fig2a <- df.list %>% bind_rows(.id = "label") %>% 
        ggplot(aes(x = age, y = pos)) +
          # geom_jitter(height = 0.05)+
          geom_point(aes(x = age, pos),
                     shape = "|")+
          facet_wrap(~factor(col_time,levels = c("Dec 2022","Apr 2023",
                                                 "Aug 2023","Dec 2023")),
                     ncol = 4)+
          geom_smooth(fill = "blue",alpha = 1/10,
                      method = mgcv::gam,formula = y ~ s(x, bs = "bs"),
                      method.args = list(method = "REML",link = "logit",
                                         family = "binomial"))+
          labs(y = "Seroprevalence (%)",x = "Age",tag = "A")+
          scale_x_continuous(breaks = seq(0,15,by = 3), minor_breaks = NULL)+
          scale_y_continuous(labels = scales::label_percent(), minor_breaks = NULL)+
          coord_cartesian(ylim = c(0, 1))+
          theme_bw()+ 
          theme(axis.title.x = element_text(size = 18),
                axis.text.x = element_text(size = 18),
                legend.position = "none",
                plot.tag = element_text(face = "bold", size = 18),
                axis.title.y = element_text(size = 18),
                axis.text.y = element_text(size = 18),
                strip.text.x = element_text(size = 18))
Code
fig2m/
  fig2a

Code
fig2a/
  ts2

Code
ggsave("./fig_manuscript/fig2.svg",
       width = 13,height = 8,dpi = 500)

4.4 GLM with PAVA algorithm

Code
library(Iso)

mod1 <- glm(pos ~ age * col_date +
              I(age ^ 2) * col_date, binomial,
            mutate(atdf, across(col_date, as.numeric)))
age_val <- c(.1, seq(1,15,0.5))
collection_date_val <- seq(min(atdf$col_date),
                           max(atdf$col_date))

new_data <- expand.grid(age = age_val, col_date = as.numeric(collection_date_val))

prdcts <- cbind(new_data, fit = 100 * predict(mod1, new_data, "response")) |> 
  as_tibble() |> 
  arrange(col_date) |> 
  mutate(across(col_date, as_date))

out_pava <- prdcts %>% 
  mutate(time_numeric = as.numeric(col_date)) %>% 
  group_by(age) %>% 
  arrange(time_numeric) %>%
  mutate(
    seroprev_monotonic = pava(fit, time_numeric, decreasing = FALSE)
  )

## boostrap 

bootstrap <- function(data, new_data) {
  # Step 1: resample
  boot_data <- data %>% slice_sample(n = nrow(data), replace = TRUE)
  
  # Step 2: refit model
  mod <- glm(pos ~ age * col_date + I(age^2) * col_date, 
             family = binomial, 
             data = mutate(boot_data, col_date = as.numeric(col_date)))
  
  # Step 3: predict
  preds <- predict(mod, new_data, type = "response") * 100  # seroprev %
  preds_df <- bind_cols(new_data, fit = preds) |> 
    mutate(col_date = as_date(col_date))
  
  # Step 4: apply PAVA per age
  preds_df %>%
    mutate(time_numeric = as.numeric(col_date)) %>%
    group_by(age) %>%
    arrange(time_numeric) %>%
    summarise(
      col_date = col_date,
      seroprev_monotonic = pava(fit, time_numeric, decreasing = FALSE),
      .groups = "keep"
    )
}


B <- 10000  # number of bootstrap iterations

set.seed(42)  # for reproducibility
boot_results <- map_dfr(1:B, ~bootstrap(atdf, new_data) %>% 
                          mutate(bootstrap = .x))

point_est <- out_pava %>% 
  mutate(col_time = case_when(
    month(col_date) == 12 & year(col_date) == 2022 ~ "Dec 2022",
    month(col_date) == 04 & year(col_date) == 2023 ~ "Apr 2023",
    month(col_date) == 08 & year(col_date) == 2023 ~ "Aug 2023",
    month(col_date) == 12 & year(col_date) == 2023 ~ "Dec 2023"
  )) %>% na.omit() %>% 
  group_by(col_time,age) %>%
  summarise(
    sp = mean(seroprev_monotonic)
  )

sp_ci <- boot_results %>% 
  mutate(col_time = case_when(
    month(col_date) == 12 & year(col_date) == 2022 ~ "Dec 2022",
    month(col_date) == 04 & year(col_date) == 2023 ~ "Apr 2023",
    month(col_date) == 08 & year(col_date) == 2023 ~ "Aug 2023",
    month(col_date) == 12 & year(col_date) == 2023 ~ "Dec 2023"
  )) %>% na.omit() %>% 
  group_by(col_time,age) %>%
  summarise(
    lower = quantile(seroprev_monotonic, 0.025),
    upper = quantile(seroprev_monotonic, 0.975),
    .groups = "drop"
  )  

# df.list <- list(t1222,t423,t823,t1223)

sp_pava <- ggplot() +
  geom_line(data = point_est,aes(x = age, y = sp), color = "blue") +
  geom_ribbon(data = sp_ci, aes(x = age,ymin = lower, ymax = upper), fill = "blue", alpha = 0.3) +
  geom_point(aes(x = age, pos*100),
             shape = "|",data = atdf)+
  labs(y = "Seroprevalence (%)", x = "Age (years)") +
  facet_wrap(~factor(col_time,levels = c("Dec 2022","Apr 2023","Aug 2023","Dec 2023")),
             nrow = 1) +
  scale_x_continuous(breaks = seq(0,15,by = 3), minor_breaks = NULL)+
  scale_y_continuous(limits = c(0,100),breaks = seq(0,100,by = 25),
                     labels = scales::label_percent(scale = 1), minor_breaks = NULL)+
  theme_bw()+ 
  theme(axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 18),
        legend.position = "none",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        strip.text.x = element_text(size = 18))
Code
sp_pava/
  ts2

Code
ggsave("./fig_manuscript/glm_pava.svg",
       width = 15,height = 8,dpi = 500)

4.5 Supplementary plot

4.5.1 Attack rate

Code
pop <- readRDS("D:/OUCRU/hfmd/data/count_dangky.rds")
pop_a <- pop %>% group_by(birth_month, birth_year) %>%
  summarise(n=sum(n)) %>% arrange(birth_year)

colnames(pop_a) <- c("m","y","n")

pop_a$dob <- str_c(pop_a$y,pop_a$m,sep = "-") %>% ym()

time1 = df1$adm_date
age1 =  df1$age1
dob = pop_a$dob
n = pop_a$n

dft <- data.frame(time1,age1) %>%
  filter(!is.na(time1) & !is.na(age1)) %>%
  arrange(time1)


dft$agr=as.factor(cut(dft$age1, c(0,0.5,
                                  1,1.5,
                                  2,2.5,
                                  3,3.5,
                                  4,4.5,5,5.5,6,100), right=TRUE ))
levels (dft$agr) = c("0-0.5", "0.5-1", "1.0-1.5",
                     "1.5-2","2-2.5","2.5-3",
                     "3-3.5","3.5-4","4-4.5",
                     "4.5-5","5-5.5","5.5-6","6+")

sus_pop <- data.frame(dob = dob, n = n)

out_total <- data.frame()

hfmd23 <- df1 %>% filter(year(adm_week) == "2023") %>%
  filter(!is.na(adm_week) ) %>%
  count(adm_week)

hfmd23$week <- 1:length(hfmd23$adm_week)
hfmd23$week <- ifelse(hfmd23$week == 53,52,hfmd23$week)

hfmd23$n2 <- ifelse(hfmd23$week == 52, sum(hfmd23$n[hfmd23$week==52]), hfmd23$n)

hfmd23 <- hfmd23[-53,]

dateaa <- hfmd23$adm_week+3

sus_pop <- data.frame(dob = dob, n = n)

for (i in 1:52){
  sus_pop$age <- interval(sus_pop$dob, dateaa[i]) / years(1)
  
  sus_pop$agr=as.factor(cut(sus_pop$age, c(0,0.5,
                                           1,1.5,
                                           2,2.5,
                                           3,3.5,
                                           4,4.5,5,5.5,6,100), right=TRUE ))
  levels (sus_pop$agr) = c("0-0.5", "0.5-1", "1.0-1.5",
                           "1.5-2","2-2.5","2.5-3",
                           "3-3.5","3.5-4","4-4.5",
                           "4.5-5","5-5.5","5.5-6","6+")
  outcum <- sus_pop %>% group_by(agr) %>%
    summarise(n = sum(n)) %>%
    as.data.frame()
  outcum$date <- rep(dateaa[i],nrow(outcum))
  out_total <- rbind(out_total,outcum)
}

deno <- out_total %>%
  pivot_wider(names_from = agr, values_from = n) %>% as.data.frame()

casss <- wwww$wdat

casss$agr=as.factor(cut(casss$age, c(0,0.5,1,1.5,2,2.5,
                                     3,3.5,4,4.5,5,5.5,6,100), right=TRUE ))
levels (casss$agr) = c("0-0.5", "0.5-1", "1.0-1.5",
                       "1.5-2","2-2.5","2.5-3",
                       "3-3.5","3.5-4","4-4.5",
                       "4.5-5","5-5.5","5.5-6","6+")

casss <- casss %>% group_by(date,agr) %>%
  count() %>% pivot_wider(names_from = agr, values_from = n) %>% as.data.frame()


casss <- casss[,-ncol(casss)]
casss <- replace(casss,is.na(casss), 0)
casss <- casss[,c(1:10,13,11,14,12)]

atkr <- data.frame()
atkr <- rbind(atkr,as.numeric(casss[1,-1])/as.numeric(deno[1,-1]))
for (i in 1:51){
  new <- as.numeric(casss[i+1,-1])/(as.numeric(deno[i+1,-1]) - as.numeric(casss[i,-1]))
  atkr <- rbind(atkr,new)
}
atkr <- cbind(deno$date,atkr)

colnames(atkr) <- colnames(deno)
atkr <- replace(atkr,is.na(atkr), 0)



atk_plot <- atkr %>% pivot_longer(cols=colnames(atkr)[-1],
                                  names_to= 'agr',
                                  values_to='atk') %>% as.data.frame()

atk <- ggplot(atk_plot, aes(x=as.character(date), y=agr, fill = atk)) +
  geom_raster()+
  scale_fill_paletteer_c("grDevices::Inferno",
                         breaks = c(0.01, 0.02, 0.03, 0.04, 0.05))+
  scale_y_discrete(limits=rev)+
  scale_x_discrete(name = "Admission week",labels = leb_month)+
  theme_minimal()+
  labs(tag = "B",fill = "Attack rate",y = "Age group (years)")+
  theme(axis.title.y = element_text(size = 18),
        axis.ticks.x = element_blank(),
        legend.position = "bottom",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 18))+ 
  guides(fill=guide_colourbar(barwidth=20,label.position="bottom")) 
Code
ts/
  atk

Code
ggsave("./fig_manuscript/sup1.svg",
       width = 15,height = 8,dpi = 500)

4.6 Figure 4: Calibration plot

Code
# correct_lv <- c("0≤ & <1","1≤ & <2","2≤ & <3","3≤ & <4","4≤ & <5",
#                 "5≤ & <6","6≤ & <7","7≤ & <8","8≤ & <9","9≤ & <10",
#                 "10≤ & <11","11≤ & <12","12≤ & <13","13≤ & <14","14≤ & <15")
# 
# cali_sero <- sero %>% group_by(age_gr) %>%
#             count(pos) %>%
#             pivot_wider(names_from = pos,values_from = n) %>%
#             clean_names() %>%
#             mutate(sp = x1/sum(x0,x1)) %>%
#             select(-c(x0,x1)) %>%
#             ungroup()
# 
# cali_sero$age_gr <- factor(cali_sero$age_gr,levels = correct_lv)
# 
# ## case noti
# df23$age_cali <- cut(df23$age1+0.00000001, breaks = c(seq(0, 15, by = 1),82),
#                     labels = c(correct_lv,
#                                "16+"))
# 
# ## population
# hcm19$age_cali <- cut(hcm19$age2,breaks = c(seq(0, 15, by = 1),82),
#                       labels = c(correct_lv,
#                                  "16+"))
# 
# pop_agegr_cali <- hcm19 %>%
#                   group_by(age_cali) %>%
#                   summarise(n = sum(n))
# 
# cali_plot_df <- df23 %>%
#                 select(age_cali) %>%
#                 group_by(age_cali) %>%
#                 count() %>% na.omit() %>%
#                 left_join(pop_agegr_cali,by = join_by(age_cali == age_cali)) %>%
#                 # group_by(age_gr2) %>%
#                 mutate(atr = n.x/n.y) %>%
#                 filter(age_cali != "16+")  %>%
#                 left_join(cali_sero,by = join_by(age_cali == age_gr))
# 
# model_clb <- glm(atr*100000 ~ sp,data = cali_plot_df)
# 
# model_clb$pred <- predict.glm(model_clb, type = 'response')
Code
# calibration_plot(data = data.frame(y = model_clb$y,pred = model_clb$pred),
#                  obs = "y", pred = "pred")