Code
<- "D:/OUCRU/hfmd/data/hfmd_sero.rds" data_file
<- "D:/OUCRU/hfmd/data/hfmd_sero.rds" data_file
library(dplyr)
library(stringr)
library(purrr)
library(tidyr)
library(magrittr)
library(mgcv)
library(scam)
<- function(..., data) {
gam2 <- mgcv::gam(..., data = data, method = "REML")
out $data <- data
out
out }
<- function(..., data) {
scam2 <- scam::scam(..., data = data)
out $data <- data
out
out }
<- data_file |>
hfmd readRDS() |>
as_tibble() |>
mutate(collection = id |>
str_remove(".*-") |>
as.numeric() |>
divide_by(1e4) |>
round(),
col_date2 = as.numeric(col_date),
across(pos, ~ .x > 0))
<- function(x, ci = .95, le = 512, m = 100) {
predict2 <- (1 - ci) / 2
p
<- x$family$linkinv
link_inv <- x$data
dataset <- nrow(dataset) - length(x$coefficients)
n <- range(dataset$age)
age_range
<- seq(age_range[1], age_range[2], le = le)
ages
|>
x predict(data.frame(age = ages), se.fit = TRUE) |>
extract(c("fit", "se.fit")) %>%
c(age = list(ages), .) |>
as_tibble() |>
mutate(lwr = m * link_inv(fit + qt( p, n) * se.fit),
upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
fit = m * link_inv(fit)) |>
select(- se.fit)
}
<- function(x) {
plot_predictions with(x, {
plot(age, fit, ylim = c(0, 100), type = "n",
xlab = "age (years)", ylab = "seroprevalence")
polygon(c(age, rev(age)), c(lwr, rev(upr)), border = NA, col = adjustcolor(4, .2))
lines(age, fit, col = 4)
}) }
|>
hfmd filter(collection < 7) %>%
gam2(pos ~ s(age), binomial, data = .) |>
predict2() |>
plot_predictions()
|>
hfmd filter(collection < 7) %>%
scam2(pos ~ s(age, bs = "mpi"), binomial, data = .) |>
predict2() |>
plot_predictions()
<- scam2(pos ~ s(age) + s(col_date2, bs = "mpi"), binomial, data = hfmd) m
<- hfmd |>
mean_col_dates group_by(collection) |>
summarise(mean_col_date = mean(col_date2)) |>
pull(mean_col_date)
<- function(x, newdata, ci = .95, le = 512, m = 100) {
predict3 <- (1 - ci) / 2
p
<- x$family$linkinv
link_inv <- x$data
dataset <- nrow(dataset) - length(x$coefficients)
n
|>
x predict(newdata, se.fit = TRUE) |>
extract(c("fit", "se.fit")) %>%
c(age = list(ages), .) |>
as_tibble() |>
mutate(lwr = m * link_inv(fit + qt( p, n) * se.fit),
upr = m * link_inv(fit + qt(1 - p, n) * se.fit),
fit = m * link_inv(fit)) |>
select(- se.fit)
}
<- seq(0, 15, le = 512)
ages <- map(mean_col_dates, ~ predict3(m, data.frame(age = ages, col_date2 = .x))) p
plot_predictions(p[[1]])
plot_predictions(p[[2]])
plot_predictions(p[[3]])
plot_predictions(p[[4]])
<- function(alpha, mu) alpha * (1 - mu) / mu
beta_from_alpha
<- function(L, U, mu, ci = .95, interval = c(.01, 1e3), ...) {
beta_parameters <- (1 - ci) / 2
p <- function(alpha) {
objective <- beta_from_alpha(alpha, mu)
beta <- qbeta(p, alpha, beta)
q025 <- qbeta(1 - p, alpha, beta)
q975 - L)^2 + (q975 - U)^2
(q025
}<- optimize(objective, interval, ...)$minimum
alpha_est c(alpha = alpha_est, beta = beta_from_alpha(alpha_est, mu))
}
<- function(col = 6, ages = seq(0, 15, le = 512),
simulate_values ci = .95, interval = c(.01, 1e3), n = 100, ...) {
<- (1 - ci) / 2
p <- filter(hfmd, collection == col)
dat <- gam(pos ~ s(age), binomial, dat)
mod <- nrow(dat) - length(coef(mod))
df <- family(mod)$linkinv
link_inv predict(mod, list(age = ages), se.fit = TRUE) |>
::extract(c("fit", "se.fit")) %>%
magrittrc(age = list(ages), .) |>
as_tibble() |>
mutate(lwr = link_inv(fit + qt( p, df) * se.fit),
upr = link_inv(fit + qt(1 - p, df) * se.fit),
fit = link_inv(fit)) |>
select(- se.fit) |>
mutate(est = pmap(list(lwr, upr, fit), beta_parameters, ci, interval, ...)) |>
unnest_wider(est) |>
mutate(meancheck = map2_dbl(alpha, beta, ~ .x / (.x + .y)),
rndvalues = map2(alpha, beta, ~ rbeta(n, .x, .y)))
}
<- hfmd |>
mean_col_dates group_by(collection) |>
summarise(mean_col_date = mean(col_date2)) |>
pull(mean_col_date)
set.seed(123)
<- map(6:9, simulate_values) simulations
<- length(simulations[[1]]$rndvalues[[1]])
l
<- map2(simulations, mean_col_dates,
sim_data ~ .x |>
select(age, rndvalues) |>
mutate(coll_time = list(rep(.y, l)))) |>
bind_rows()
<- length(mean_col_dates) * l
N <- .95
ci
<- (1 - ci) / 2
p
<- sim_data |>
mod filter(age == 0) |>
select(-age) |>
map(unlist) |>
as_tibble() |>
mutate(pos = rbinom(N, 1, rndvalues)) %>%
scam(pos ~ s(coll_time, bs = "mpi"), binomial, .)
<- N - length(coef(mod))
df
<- family(mod)$linkinv
link_inv
|>
mod predict(list(coll_time = mean_col_dates), se.fit = TRUE) |>
::extract(c("fit", "se.fit")) %>%
magrittrc(coll_time = list(mean_col_dates), .) |>
as_tibble() |>
mutate(lwr = link_inv(fit + qt( p, df) * se.fit),
upr = link_inv(fit + qt(1 - p, df) * se.fit),
fit = link_inv(fit)) |>
select(- se.fit)
# A tibble: 4 × 4
coll_time fit lwr upr
<dbl> <dbl[1d]> <dbl[1d]> <dbl[1d]>
1 19349. 0.0513 0.0280 0.0921
2 19457. 0.0513 0.0280 0.0922
3 19580. 0.252 0.197 0.316
4 19700. 0.256 0.199 0.321
28”:
<- sim_data |>
models group_by(age) |>
group_map(~ .x |>
map(unlist) |>
as_tibble() |>
mutate(pos = rbinom(N, 1, rndvalues)) %>%
scam(pos ~ s(coll_time, bs = "mpi"), binomial, .))
<- map2(models,
a - map_int(models, ~ length(coef(.x))),
N ~{
<- family(.x)$linkinv
linkinv |>
.x predict(list(coll_time = mean_col_dates), se.fit = TRUE) |>
::extract(c("fit", "se.fit")) %>%
magrittrc(coll_time = list(mean_col_dates), .) |>
as_tibble() |>
mutate(lwr = link_inv(fit + qt( p, .y) * se.fit),
upr = link_inv(fit + qt(1 - p, .y) * se.fit),
fit = link_inv(fit)) |>
select(- se.fit)
|>
} ) setNames(unique(sim_data$age)) |>
bind_rows(.id = "age") |>
mutate(across(age, as.numeric))
|> arrange(coll_time, age) a
# A tibble: 2,048 × 5
age coll_time fit lwr upr
<dbl> <dbl> <dbl[1d]> <dbl[1d]> <dbl[1d]>
1 0 19349. 0.0762 0.0448 0.127
2 0.0294 19349. 0.0611 0.0351 0.104
3 0.0587 19349. 0.0427 0.0201 0.0881
4 0.0881 19349. 0.0642 0.0341 0.117
5 0.117 19349. 0.0490 0.0210 0.110
6 0.147 19349. 0.0510 0.0278 0.0919
7 0.176 19349. 0.0494 0.0209 0.112
8 0.205 19349. 0.0859 0.0541 0.134
9 0.235 19349. 0.0494 0.0209 0.112
10 0.264 19349. 0.0411 0.0208 0.0795
# ℹ 2,038 more rows
|>
a mutate(across(coll_time, round)) |>
filter(coll_time == 19349) |>
with({
plot(age, fit, ylim = 0:1)
})
<- seq(0, 15, le = 512)
ages
|>
a mutate(across(coll_time, round)) |>
filter(coll_time == 19349) %>%
gam(fit ~ s(age), betar, .) |>
predict(list(age = ages), "response") %>%
plot(ages, .,type = "l",ylim = 0:1)
|>
a mutate(across(coll_time, round)) |>
filter(coll_time == 19349) |>
with({
plot(age, fit, ylim = 0:1)
})
|>
a mutate(across(coll_time, round)) |>
filter(coll_time == 19457) |>
with({
plot(age, fit, ylim = 0:1)
})
|>
a mutate(across(coll_time, round)) |>
filter(coll_time == 19580) |>
with({
plot(age, fit, ylim = 0:1)
})
|>
a mutate(across(coll_time, round)) |>
filter(coll_time == 19700) |>
with({
plot(age, fit, ylim = 0:1)
})
<- function(timee){
smothh <- a |>
dt_fit mutate(across(coll_time, round)) |>
filter(coll_time == timee)
<- gam(fit ~ s(age, k = 30), data = dt_fit)
mod_fit <- gam(lwr ~ s(age, k = 30), data = dt_fit)
mod_lwr <- gam(upr ~ s(age, k = 30), data = dt_fit)
mod_upr
# Predict smooths
tibble(age = seq(min(dt_fit$age), max(dt_fit$age), length.out = 500)) %>%
mutate(
fit = predict(mod_fit, newdata = .),
lwr = predict(mod_lwr, newdata = .),
upr = predict(mod_upr, newdata = .)
) }
library(tidyverse)
map(c(19349,19457,19580,19700),smothh) |>
bind_rows() %>%
mutate(col_time = rep(c(19349,19457,19580,19700),each = 500),
col_time = factor(col_time,
levels = c(19349,19457,19580,19700),
labels = c("Dec 2022",
"Apr 2023",
"Aug 2023",
"Dec 2023"))) %>%
ggplot(aes(x = age, y = fit)) +
geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "blue", alpha = 0.3) +
geom_line(color = "blue") +
facet_wrap(~col_time,nrow = 1)+
labs(y = "Smoothed fit", x = "Age") +
ylim(c(0,1))+
theme_minimal()