2  HFMD Seroprevalence

Code
library(readxl)
library(dplyr)
library(stringr)
library(purrr)
library(tidyr)
library(lubridate)
library(magrittr)
library(mgcv)
library(tidyverse)
library(patchwork)
library(sf)
library(janitor)
library(ggsci)
library(cowplot)
invisible(Sys.setlocale("LC_TIME", "English"))

3 Import data

3.1 Serology data

Code
apr_2023 <- read_excel("D:/OUCRU/hfmd/data/4_2023.xlsx")
aug_2023 <- read_excel("D:/OUCRU/hfmd/data/08_2023.xlsx")
dec_2022 <- read_excel("D:/OUCRU/hfmd/data/12_2022.xls")
dec_2023 <- read_excel("D:/OUCRU/hfmd/data/12_2023.xlsx")

t423 <- data.frame(apr_2023[-c(1,2),c(6,8,10:14)])
t423$pos <- replace(t423$...14,is.na(t423$...14),0) %>%
  str_detect(regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t423) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t423$age <- as.numeric(t423$age)
t423$col_time <- rep("Apr 2023",nrow(t423))


t823 <- data.frame(aug_2023[-c(1,2),c(6,8,9,14:17)])
t823$pos <- str_detect(t823$...17,regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t823) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t823$age <- as.numeric(t823$age)
t823$col_time <- rep("Aug 2023",nrow(t823))

t1222 <- data.frame(dec_2022[-c(1,2),c(6,8,10:14)])
t1222$pos <- replace(t1222$...14,is.na(t1222$...14),0) %>%
  str_detect(regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t1222) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t1222$age <- as.numeric(t1222$age)
t1222$col_time <- rep("Dec 2022",nrow(t1222))


t1223 <- data.frame(dec_2023[-c(1,2),c(6,8,9,14:17)])
t1223$pos <- replace(t1223$...17,is.na(t1223$...17),0) %>%
  str_detect(regex(paste(2^(4:10), collapse = "|"))) %>%
  as.integer(as.logical())
colnames(t1223) <- c("id","age_gr","age","col_day","col_month","col_year","neutralization","pos")
t1223$age <- as.numeric(t1223$age)
t1223$col_time <- rep("Dec 2023",nrow(t1223))

3.2 Case

Code
df1 <- read_excel("D:/OUCRU/hfmd/data/TCM_full.xlsx", 
                  col_types = c("date", "numeric", "text", 
                                "text", "text", "date", "date", "date", 
                                "text", "text", "text"))
colnames(df1) <- c("dob", "age", "gender", "commune", "district",
                   "reported_date", "onset_date","adm_date",
                   "medi_cen","inout","severity")
df1$dob <- df1$dob %>% as_date()
df1$adm_date <- df1$adm_date %>% as_date()

df1$age1 <- interval(df1$dob, df1$adm_date) / years(1)
df1$adm_week <- as.Date(floor_date(df1$adm_date, "week"))
df1$district <- df1$district %>% str_replace_all(
  c( "Quận Gò vấp"  = "Quận Gò Vấp"))
df1$district <- df1$district %>% 
  str_remove("Quận|Huyện|Thành phố") %>%
  trimws(which = "both") 

df_plot <- df1 %>% filter(year(adm_week) == "2023") %>%
  filter(!is.na(adm_week) ) %>%
  count(adm_week) %>% as.data.frame()

ts <- ggplot()+
  geom_bar(data = df_plot, aes(x = as.Date(adm_week), y = n),stat = "identity",
           alpha = 0.5) +
  labs(x = "Admission week","Cases")+
  geom_vline(xintercept = as.Date("2022-12-01"),
             alpha = 0.4,col = "#0808cf")+
  geom_vline(xintercept = as.Date("2022-12-30"),
             alpha = 0.4,col = "#0808cf")+
  geom_vline(xintercept = as.Date("2023-04-01"),
             alpha = 0.4,col = "#ed097b")+
  geom_vline(xintercept = as.Date("2023-04-30"),
             alpha = 0.4,col = "#ed097b")+
  geom_vline(xintercept = as.Date("2023-08-01"),
             alpha = 0.4,col = "#ed6b00")+
  geom_vline(xintercept = as.Date("2023-08-30"),
             alpha = 0.4,col = "#ed6b00")+
  geom_vline(xintercept = as.Date("2023-12-01"),
             alpha = 0.4,col = "#33516b")+
  geom_vline(xintercept = as.Date("2023-12-30"),
             alpha = 0.4,col = "#33516b")+
  xlim(as.Date("2022-11-24"),as.Date("2024-01-01"))+theme_classic()

4 Function

Code
predict2 <- function(x, ci = .95, le = 512, m = 100) {
  p <- (1 - ci) / 2

  link_inv <- x$family$linkinv
  dataset <- x$data
  n <- nrow(dataset) - length(x$coefficients)
  age_range <- range(dataset$age)

  ages <- seq(age_range[1], age_range[2], le = le)

  x |>
    predict(data.frame(age = ages), se.fit = TRUE) |>
    extract(c("fit", "se.fit")) %>%
    c(age = list(ages), .) |>
    as_tibble() |>
    mutate(lwr = m * link_inv(fit + qt(    p, n) * se.fit),
           upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
           fit = m * link_inv(fit)) |>
    select(- se.fit)
}

5 Seroprevalence by age

5.1 Fit by GLM

Code
m <- 100
eps <- 1

plot1222 <-  glm(pos ~ age, binomial, data = t1222) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2022"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5)+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2022" = "#0808cf"))+
  labs(y = "Seroprevalence(%)")+
  geom_point(data= t1222, aes(x = age,y = m * pos + eps),shape = "|",size = 3,
             col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.title= element_blank(),
    legend.position = "inside",
    legend.position.inside =  c(0.15,0.80),
    legend.text = element_text(size = 15))
Code
plot1222 <-  glm(pos ~ age, binomial, data = t1222) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2022"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#0808cf")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2022" = "#0808cf"))+
  labs(y = "Seroprevalence (%)")+
  geom_point(data= t1222, aes(x = age,y = m * pos + eps),shape = "|",
             col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Dec 2022"),size = 6)

plot0423 <-  glm(pos ~ age + I(age ^2), binomial, data = t423) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Apr 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#ed097b")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Apr 2023" = "#ed097b"))+
  labs(y = "Seroprevalence (%)")+
  geom_point(data= t423, aes(x = age, m * pos + eps),
             shape = "|",
             col = "#ed097b")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Apr 2023"),size = 6)

plot0823 <-  glm(pos ~ age + I(age^2) + I(age^3), binomial, data = t823) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Aug 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#ed6b00")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Aug 2023" = "#ed6b00"))+
  labs(y = "Seroprevalence (%)")+
  geom_point(data= t823, aes(x = age, m * pos + eps),
             shape = "|",
             col = "#ed6b00")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Aug 2023"),size = 6)

plot1223 <-  glm(pos ~ age + I(age^2) + I(age^3), binomial, data = t1223) |>
  predict2() %>% as.data.frame() %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#33516b")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2023" = "#33516b"))+
  labs(y = "Seroprevalence (%)")+
  geom_point(data= t1223, aes(x = age, m * pos + eps),
             shape = "|",
             col = "#33516b")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Dec 2023"),size = 6)
Code
plot1222 + plot0423 + plot0823 + plot1223

5.2 Fit by GAM

Code
## 
predictg <- function(x, ci = .95, le = 512, m = 100) {
  p <- (1 - ci) / 2
  
  link_inv <- x$family$linkinv
  dataset <- x$model
  n <- nrow(dataset) - length(x$coefficients)
  age_range <- range(dataset$age)
  
  ages <- seq(age_range[1], age_range[2], le = le)
  
  x |>
    predict(data.frame(age = ages), se.fit = TRUE) |>
    extract(c("fit", "se.fit")) %>%
    c(age = list(ages), .) |>
    as_tibble() |>
    mutate(lwr = m * link_inv(fit + qt(    p, n) * se.fit),
           upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
           fit = m * link_inv(fit)) |>
    select(- se.fit)
}

## plot

plot_gam <- function(x,date){
  
  clo <- case_when(date == 1222 ~ "Dec 2022",
                   date == 423 ~ "Apr 2023",
                   date == 823 ~ "Aug 2023",
                   date == 1223 ~ "Dec 2023")
  clo2 <- case_when(date == 1222 ~ "#0808cf",
                    date == 423 ~ "#ed097b",
                    date == 823 ~ "#ed6b00",
                    date == 1223 ~ "#33516b")
  
  dtaa <- case_when(date == 1222 ~ t1222,
                    date == 423 ~ t423,
                    date == 823 ~ t823,
                    date == 1223 ~ t1223)
  
  x %>% as.data.frame() %>%
    ggplot(aes(x = age,y = fit))+
    geom_line(aes(col = clo))+
    geom_ribbon(aes(x = age,y = fit,
                    ymin=lwr, ymax=upr),alpha = 0.5,fill = clo2)+
    ylim(0,101)+
    theme_minimal() +
  scale_color_manual(name = "Y series",
                     values = c(clo = clo2))+
    labs(y = "Seroprevalence (%)")+
    geom_point(data= dtaa, aes(x = age, m * pos + 1),
               shape = "|",
               col = clo2)+
    theme(
      axis.text.x = element_text(size = 15),
      axis.text.y = element_text(size = 15),
      legend.position = "hide",
      legend.text = element_text(size = 15),
      axis.title.x = element_text(size = 15),
      axis.title.y = element_text(size = 15))+
    annotate("text", x = 3, y = 90, label = c(clo),size = 6) 
}  

g1222 <- gam(pos~s(age,bs = "bs"),method = "REML",
    family = "binomial",data = t1222) %>% 
  predictg() %>% plot_gam(date = 1222)  

g423 <- gam(pos~s(age,bs = "bs"),method = "REML",
    family = "binomial",data = t423) %>% 
  predictg() %>% plot_gam(date = 423)  

g823 <-gam(pos~s(age,bs = "bs"),method = "REML",
    family = "binomial",data = t823) %>% 
  predictg() %>% plot_gam(date = 823)  

g1223 <-gam(pos~s(age,bs = "bs"),method = "REML",
    family = "binomial",data = t1223) %>% 
  predictg() %>% plot_gam(date = 1223)  
Code
g1222 | g423 | g823 | g1223

5.3 Constrain in each time point

Code
## Constrain function
contrain <- function(data1,data2){
  new_data <- data.frame(age = data2$age,
                         fit = rep(0,nrow(data2)),
                         lwr = rep(0,nrow(data2)),
                         up = rep(0,nrow(data2)))
  for (i in 1:512){
    if(data2$fit[i] < data1$fit[i]){
      new_data$fit[i] <- data1$fit[i]
      new_data$lwr[i] <- data1$lwr[i]
      new_data$upr[i] <- data1$upr[i]
    } else{
      new_data$fit[i] <- data2$fit[i]
      new_data$lwr[i] <- data2$lwr[i]
      new_data$upr[i] <- data2$upr[i]
    }
  }
  
  new_data$fit <- gam(fit ~ s(age),data = new_data)$fitted.values
  new_data$lwr <- gam(lwr ~ s(age),data = new_data)$fitted.values
  new_data$upr <- gam(upr ~ s(age),data = new_data)$fitted.values
  
  return(new_data)
}

tes1222 <- gam(pos~s(age,bs = "bs"),method = "REML",
    family = "binomial",data = t1222) %>% 
  predictg() 
tes0423 <- gam(pos~s(age,bs = "bs"),method = "REML",
    family = "binomial",data = t423) %>% 
  predictg() 
tes0823 <- gam(pos~s(age,bs = "bs"),method = "REML",
               family = "binomial",data = t823) %>% 
  predictg() 
tes1223 <- gam(pos~s(age,bs = "bs"),method = "REML",
               family = "binomial",data = t1223) %>% 
  predictg() 

## contrain 
con423 <- contrain(tes1222,tes0423)
con823 <- contrain(con423,tes0823)
con1223 <- contrain(con823,tes1223)


c423 <- con423 %>% plot_gam(date = 423) 
c823 <- con823 %>% plot_gam(date = 823) 
c1223 <- con1223 %>% plot_gam(date = 1223) 
Code
gam_contrain <- g1222 | c423 | c823 | c1223

gam_contrain/
  ts

6 Model age and time at the same time

Code
library(plotly)
library(scam)
Code
atdf <- rbind(t1222,t423,t823,t1223) %>%
  mutate(col_date = make_date(year = col_year,
                              month = col_month,
                              day = col_day))
Code
head(atdf)
               id  age_gr        age col_day col_month col_year neutralization
1 20FL-001-060001 0≤ & <1 0.36986301      28        12     2022           <NA>
2 20FL-001-060002 0≤ & <1 0.33150685      27        12     2022           <NA>
3 20FL-001-060003 0≤ & <1 0.07671233       4         1     2023           <NA>
4 20FL-001-060004 0≤ & <1 0.43835616      27        12     2022           <NA>
5 20FL-001-060005 0≤ & <1 0.93150685       3         1     2023           <NA>
6 20FL-001-060006 1≤ & <2 1.95616438      26        12     2022           1:16
  pos col_time   col_date
1   0 Dec 2022 2022-12-28
2   0 Dec 2022 2022-12-27
3   0 Dec 2022 2023-01-04
4   0 Dec 2022 2022-12-27
5   0 Dec 2022 2023-01-03
6   1 Dec 2022 2022-12-26

6.1 Fit with SCAM model

mpi is monotone increasing SCOP-splines: bs=“mpi”. To achieve monotone increasing smooths this reparameterizes the coefficients so that they form an increasing sequence.

Model

Code
s1 <- scam(pos~s(age)+s(col_date,bs = "mpi"),family=binomial,
           mutate(atdf, across(col_date, as.numeric)))

6.2 3D plot

Code
age_val <- c(.1,1:14)

collection_date_val <- seq(min(atdf$col_date),
                           max(atdf$col_date), le = 512)

new_data <- expand.grid(age = age_val,
                        col_date = as.numeric(collection_date_val))

scamf <- cbind(new_data,
               fit = 100 * predict(s1, new_data,"response"))

scamf$col_date <- as.Date(scamf$col_date)
Code
plot_ly(scamf, x = ~sort(unique(as.Date(col_date))),
        y = ~sort(unique(age)),
        z = ~matrix(fit, 15),
        showscale = F) %>%
  add_surface()%>%
  layout(scene = list(
    xaxis = list(title = "Collection date"),
    yaxis = list(title = "Age"),
    zaxis = list(title = "Seroprevalence",range = c(0,100))
  ))%>% layout(autosize = F, width = 890, height = 1000, margin = m)

6.3 2-D plot

Code
predict2 <- function(x, ci = .95, le = 512, m = 100){
  p <- (1 - ci) / 2
  link_inv <- x$family$linkinv
  dataset <- x$model
  n <- nrow(dataset) - length(x$coefficients)
  age_range <- range(dataset$age)
  ages <- seq(age_range[1], age_range[2], le = le)
  date_range <- range(dataset$col_date)
  dates <- seq(date_range[1], date_range[2], le = le)

out <- x |>
  predict(expand.grid(age = ages,
                        col_date = dates), se.fit = TRUE) |>
  extract(c("fit", "se.fit")) %>%
  c(expand.grid(age = ages,
               date = dates),
    .) |>
  as_tibble() |>
  mutate(lwr = m * link_inv(fit + qt(    p, n) * se.fit),
           upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
           fit = m * link_inv(fit)) |>
  select(-se.fit)

return(out)
}
Code
out <- predict2(s1)

out$date <- as.Date(out$date)
Code
s1222 <- out %>% filter(month(date) == 12 & year(date) == 2022) %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2022"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#0808cf")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2022" = "#0808cf"))+
  labs(y = "Seroprevalence (%)")+
  # geom_point(data= t1222, aes(x = age, m * pos + 1),
  #            shape = "|",
  #            col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Dec 2022"),size = 6)

s423 <- out %>% filter(month(date) == 4 & year(date) == 2023) %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Apr 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#ed097b")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Apr 2023" = "#ed097b"))+
  labs(y = "Seroprevalence (%)")+
  # geom_point(data= t1222, aes(x = age, m * pos + 1),
  #            shape = "|",
  #            col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Apr 2023"),size = 6)

s823 <- out %>% filter(month(date) == 8 & year(date) == 2023) %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Aug 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#ed6b00")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Aug 2023" = "#ed6b00"))+
  labs(y = "Seroprevalence (%)")+
  # geom_point(data= t1222, aes(x = age, m * pos + 1),
  #            shape = "|",
  #            col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Aug 2023"),size = 6)

s1223 <- out %>% filter(month(date) == 12 & year(date) == 2023) %>%
  ggplot(aes(x = age,y = fit))+
  geom_line(aes(col = "Dec 2023"))+
  geom_ribbon(aes(x = age,y = fit,
                  ymin=lwr, ymax=upr),alpha = 0.5,fill = "#33516b")+
  ylim(0,101)+
  theme_minimal()+
  scale_color_manual(name = "Y series",
                     values = c("Dec 2023" = "#33516b"))+
  labs(y = "Seroprevalence (%)")+
  # geom_point(data= t1222, aes(x = age, m * pos + 1),
  #            shape = "|",
  #            col = "#0808cf")+
  theme(
    axis.text.x = element_text(size = 15),
    axis.text.y = element_text(size = 15),
    legend.position = "hide",
    legend.text = element_text(size = 15),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15))+
  annotate("text", x = 3, y = 90, label = c("Dec 2023"),size = 6)
Code
result_sero <- s1222 | s423 | s823 | s1223

result_sero/
  ts

6.4 GLM with PAVA algorithm

In chapter 9 of 2012 serobook, there are an argument that following Friedman and Tibshirani (1984) and Mammen et al. (2001), Shkedy et al. (2003) suggested to estimate π(a) and λ(a) using local polynomials and smoothing splines and, if necessary, a posteriori apply the PAVA to isotonize the resulting estimate.

Without loss of generality they assume \(\pi(a_{1}) \leq \pi(a_{2}) \leq ...\leq \pi(a_{i})\). The PAVA states that if \(\pi(a_{i}) \leq \pi(a_{i - 1})\) these values need to be “pooled.” In other words \(\hat{\pi}(a_{i})\) and \(\hat{\pi}(a_{i-1})\) are both replaced by

\[\frac{\hat{\pi}(a_{i})+\hat{\pi}(a_{i-1})}{2}\]

In my case, I fitted the glm to seropositive by age and time, then applied PAVA for fitted seroprevalence and 95% CI of each age group.

Model
mod1 <- glm(pos ~ age * col_date +
              I(age ^ 2) * col_date, binomial,
            mutate(atdf, across(col_date, as.numeric)))
Code
age_val <- c(.1, seq(1,15,0.5))
collection_date_val <- seq(min(atdf$col_date),
                           max(atdf$col_date))

new_data <- expand.grid(age = age_val, col_date = as.numeric(collection_date_val))

prdcts <- cbind(new_data, fit = 100 * predict(mod1, new_data, "response")) |> 
  as_tibble() |> 
  arrange(col_date) |> 
  mutate(across(col_date, as_date))

6.4.1 Unconstrained plot

Code
plot_ly(prdcts, x = ~sort(unique(as.Date(col_date))),
        y = ~sort(unique(age)),
        z = ~matrix(fit, length(age_val)),
        showscale = F) %>%
  add_surface()%>%
  layout(scene = list(
    xaxis = list(title = "Collection date"),
    yaxis = list(title = "Age"),
    zaxis = list(title = "Seroprevalence",range = c(0,100))
  ))%>% layout(autosize = F, width = 700, height = 1000, margin = m)

6.4.2 Applying PAVA algorithm

Code
library(Iso)
out_pava <- prdcts %>% 
  mutate(time_numeric = as.numeric(col_date)) %>% 
  group_by(age) %>% 
  arrange(time_numeric) %>%
  mutate(
    seroprev_monotonic = pava(fit, time_numeric, decreasing = FALSE)
  )
Code
plot_ly(out_pava,x = ~sort(unique(as.Date(col_date))),
          y = ~sort(unique(age)),
          z = ~matrix(seroprev_monotonic, length(age_val)),
          showscale = F) %>%
  add_surface()%>%
  layout(scene = list(
    xaxis = list(title = "Collection date"),
    yaxis = list(title = "Age"),
    zaxis = list(title = "Seroprevalence",range = c(0,100))
  ))%>% layout(autosize = F, width = 700, height = 1000, margin = m)
Code
## boostrap 

bootstrap <- function(data, new_data) {
  # Step 1: resample
  boot_data <- data %>% slice_sample(n = nrow(data), replace = TRUE)
  
  # Step 2: refit model
  mod <- glm(pos ~ age * col_date + I(age^2) * col_date, 
             family = binomial, 
             data = mutate(boot_data, col_date = as.numeric(col_date)))
  
  # Step 3: predict
  preds <- predict(mod, new_data, type = "response") * 100  # seroprev %
  preds_df <- bind_cols(new_data, fit = preds) |> 
    mutate(col_date = as_date(col_date))
  
  # Step 4: apply PAVA per age
  preds_df %>%
    mutate(time_numeric = as.numeric(col_date)) %>%
    group_by(age) %>%
    arrange(time_numeric) %>%
    summarise(
      col_date = col_date,
      seroprev_monotonic = pava(fit, time_numeric, decreasing = FALSE),
      .groups = "keep"
    )
}


B <- 500  # number of bootstrap iterations

set.seed(42)  # for reproducibility
boot_results <- map_dfr(1:B, ~bootstrap(atdf, new_data) %>% 
                          mutate(bootstrap = .x))

point_est <- out_pava %>% 
  mutate(col_time = case_when(
    month(col_date) == 12 & year(col_date) == 2022 ~ "Dec 2022",
    month(col_date) == 04 & year(col_date) == 2023 ~ "Apr 2023",
    month(col_date) == 08 & year(col_date) == 2023 ~ "Aug 2023",
    month(col_date) == 12 & year(col_date) == 2023 ~ "Dec 2023"
  )) %>% na.omit() %>% 
  group_by(col_time,age) %>%
  summarise(
    sp = mean(seroprev_monotonic)
  )

sp_ci <- boot_results %>% 
  mutate(col_time = case_when(
    month(col_date) == 12 & year(col_date) == 2022 ~ "Dec 2022",
    month(col_date) == 04 & year(col_date) == 2023 ~ "Apr 2023",
    month(col_date) == 08 & year(col_date) == 2023 ~ "Aug 2023",
    month(col_date) == 12 & year(col_date) == 2023 ~ "Dec 2023"
  )) %>% na.omit() %>% 
  group_by(col_time,age) %>%
  summarise(
    lower = quantile(seroprev_monotonic, 0.025),
    upper = quantile(seroprev_monotonic, 0.975),
    .groups = "drop"
  )  

# df.list <- list(t1222,t423,t823,t1223)

ggplot() +
  geom_line(data = point_est,aes(x = age, y = sp), color = "blue") +
  geom_ribbon(data = sp_ci, aes(x = age,ymin = lower, ymax = upper), fill = "blue", alpha = 0.3) +
  geom_point(aes(x = age, pos*100),
             shape = "|",data = atdf)+
  labs(y = "Seroprevalence (%)", x = "Age (years)") +
  facet_wrap(~factor(col_time,levels = c("Dec 2022","Apr 2023","Aug 2023","Dec 2023")),
             nrow = 1) +
  scale_x_continuous(breaks = seq(0,15,by = 3), minor_breaks = NULL)+
  scale_y_continuous(limits = c(0,100),breaks = seq(0,100,by = 25),
                     labels = scales::label_percent(scale = 1), minor_breaks = NULL)+
  theme_bw()+ 
  theme(axis.title.x = element_text(size = 18),
        axis.text.x = element_text(size = 18),
        legend.position = "none",
        plot.tag = element_text(face = "bold", size = 18),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 18),
        strip.text.x = element_text(size = 18))

6.5 GAM with PAVA algorithm

Model

Code
gam1 <- gam(pos ~ s(age, bs = "bs") + s(col_date, bs = "bs"), method = "REML",
            family = "binomial",
            data = mutate(atdf, across(col_date, as.numeric)))
Code
gam_pred <- cbind(new_data, fit = 100 * predict(gam1, new_data, "response")) |> 
  as_tibble() |> 
  arrange(col_date) |> 
  mutate(across(col_date, as_date))

out <- gam_pred %>% 
mutate(time_numeric = as.numeric(col_date)) %>% 
  group_by(age) %>% 
  arrange(time_numeric) %>%
  mutate(
    seroprev_monotonic = pava(fit, time_numeric, decreasing = FALSE)
  ) 
  
sp1222 <- out %>% filter(month(col_date) == 12 & year(col_date) == 2022) %>% 
  group_by(age) %>% 
  summarise(
    sp = mean(seroprev_monotonic),
  ) %>% ggplot(aes(x = age,y = sp))+
  geom_line()+
  ylim(c(0,100))+
  labs(tag = "Dec 2022",y = "Seroprevalence (%)")+
  theme_minimal()



sp0423 <-out %>% filter(month(col_date) == 04 & year(col_date) == 2023) %>% 
  group_by(age) %>% 
  summarise(
    sp = mean(seroprev_monotonic),
  ) %>% ggplot(aes(x = age,y = sp))+
  geom_line()+
  ylim(c(0,100))+
  labs(tag = "Apr 2023",y = "Seroprevalence (%)")+
  theme_minimal()

sp0823 <- out %>% filter(month(col_date) == 08 & year(col_date) == 2023) %>% 
  group_by(age) %>% 
  summarise(
    sp = mean(seroprev_monotonic),
  ) %>% ggplot(aes(x = age,y = sp))+
  geom_line()+
  ylim(c(0,100))+
  labs(tag = "Aug 2023",y = "Seroprevalence (%)")+
  theme_minimal()

sp1223 <- out %>% filter(month(col_date) == 12 & year(col_date) == 2023) %>% 
  group_by(age) %>% 
  summarise(
    sp = mean(seroprev_monotonic),
  ) %>% ggplot(aes(x = age,y = sp))+
  geom_line()+
  ylim(c(0,100))+
  labs(tag = "Dec 2023",y = "Seroprevalence (%)")+
  theme_minimal()
Code
plot_ly(out, x = ~sort(unique(as.Date(col_date))),
        y = ~sort(unique(age)),
        z = ~matrix(seroprev_monotonic, length(age_val)),
        showscale = F) %>%
  add_surface()%>%
  layout(scene = list(
    xaxis = list(title = "Collection date"),
    yaxis = list(title = "Age"),
    zaxis = list(title = "Seroprevalence",range = c(0,100))
  ))%>% layout(autosize = F, width = 700, height = 1000, margin = m)
Code
sp1222 | sp0423 | sp0823 | sp1223

7 Spatial distribution of serum sample

Code
# cleaned <- read_csv("D:/OUCRU/HCDC/project phân tích sero quận huyện/cleaned.csv")
# 
# sero <- rbind(t1222,t1223,t423,t823)
# 
# sero_add <- full_join(cleaned,sero, by =  c("id" = "id"))
# 
# data_pt <- sero_add %>% filter(!is.na(age)&!is.na(qhchuan)) %>% 
#   select(qhchuan,age,col_month,col_year,pos) %>% 
#   as.data.frame()
# 
# # HCMC map
# map_path <- "D:/OUCRU/HCDC/project phân tích sero quận huyện/"
# vn_qh <- st_read(dsn = file.path(map_path, "gadm41_VNM.gpkg"), layer = "ADM_ADM_2")
# 
# vn_qh1 <- vn_qh %>%
#   clean_names() %>%     ## cho thành chữ thường
#   filter(
#     str_detect(
#       name_1,
#       "Hồ Chí Minh"
#     )
#   )
# qhtp <- vn_qh1[-c(14,21),]
# 
# qhtp$geom[qhtp$varname_2 == "Thu Duc"] <- vn_qh1[c("21","24","14"),] %>%
#   st_union()
# 
# qhtp <- qhtp %>% st_cast("MULTIPOLYGON")
# library(stringi)
# qhtp$varname_2 <- stri_trans_general(qhtp$varname_2, "latin-ascii") %>%
#   tolower() %>%
#   str_remove("district") %>%
#   trimws(which = "both")
# 
# qhtp$nl_name_2 <- c("BC","BTân","BT","CG","CC","GV",
#                     "HM","NB","PN","1","10","11","12",
#                     "3","4","5","6","7","8","TB",
#                     "TP","TĐ")
Code
# ## function
# get_sp <- function(month,year){
#   data_pt %>% filter(col_month == month & col_year == year) %>% 
#     select(-age) %>% group_by(qhchuan) %>% count()  %>% 
#     as.data.frame() %>%
#     dplyr::mutate(pre = n / sum(n))  
# }
# 
# plot_cover <- function(data,map){
#   output <- left_join(map, data.frame(data), by = join_by(varname_2 == qhchuan))
#   output %>% ggplot() +
#     geom_sf(aes(fill = pre),show.legend = T)+
#     # scale_fill_continuous(low="yellow", high="red",
#     #                       guide="colorbar",na.value="white",
#     #                       name = "Percentage",
#     #                       limits = c(0,20))+
#     # geom_sf_text(aes(label = nl_name_2),size=2.5)+
#     scale_fill_gsea(na.value = "white",
#                     breaks = seq(0,0.2,by = 0.05),
#                     limits = c(0, 0.2),
#                     name = "Percentage",
#                     labels = scales::label_percent())+
#     theme_void()  
# }
Code
# sp1222 <- get_sp(12,2022)
# sp0423 <- get_sp(4,2023)
# sp0823 <- get_sp(8,2023)
# sp1223 <- get_sp(12,2023)
# 
# p1222 <- plot_cover(data = sp1222,map = qhtp)
# p423 <- plot_cover(data = sp0423,map = qhtp)
# p823 <- plot_cover(data = sp0823,map = qhtp)
# p1223 <- plot_cover(data = sp1223,map = qhtp)
# 
# serum_dis <- plot_grid(p1222,p423,p823,p1223,
#           # labels = c("Dec 2022","Apr 2023","Aug 2023","Dec 2023"),
#           ncol = 4)

8 Final visualization

8.1 Unconstrained fit

Code
# #| fig-width: 15
# #| fig-height: 8
# #| out-width: "100%"
# glmfit <- plot1222 | plot0423 | plot0823 | plot1223
# 
# serum_dis/
#   glmfit/
#   ts
# 
# ggsave("D:/OUCRU/hfmd/figure/EV71 present/uncon.svg",dpi = 500,width = 15,height = 8,bg = "white")
# ggsave("D:/OUCRU/hfmd/figure/EV71 present/uncon.png",dpi = 500,width = 15,height = 8,bg = "white")