Code
library(tidyverse)
library(lubridate)
library(janitor)
library(stringi)
library(patchwork)
library(vegan)
invisible(Sys.setlocale("LC_TIME", "English"))
<- readRDS("D:/OUCRU/assigned github/vac_coverage/data/vaxreg_hcmc_measles.rds") df_raw
library(tidyverse)
library(lubridate)
library(janitor)
library(stringi)
library(patchwork)
library(vegan)
invisible(Sys.setlocale("LC_TIME", "English"))
<- readRDS("D:/OUCRU/assigned github/vac_coverage/data/vaxreg_hcmc_measles.rds") df_raw
## data cleaning
<- df_raw %>% na.omit() %>%
df subset(date_m1 > dob &
> dob &
date_m2 < date_m2 &
date_m1 year(date_m2) <= 2024 &
year(date_m1) >= min(year(df_raw$dob)))
Vaccine shortage public started in May 2022 source
## vaccine coverage 1st and 2nd dose
<- seq(as.Date("2022-09-01"),as.Date("2024-11-20"),by = "weeks")
date_compute
## second dose
<- data.frame()
out_fn
for (i in 1:length(date_compute)){
<- df %>%
cov mutate(
age = decimal_date(date_compute[i]) - decimal_date(dob),
is_m2 = if_else(date_m2 <= date_compute[i], 1, 0)) %>%
filter(age >= 1, age <= 5) %>%
summarise(m2 = sum(is_m2, na.rm = T), n = n(), cov = m2/n)
<- df %>%
cov_2w mutate(
age = decimal_date(date_compute[i]) - decimal_date(dob),
is_m2 = if_else(date_m2 <= date_compute[i] %m-% weeks(2), 1, 0)) %>%
filter(age >= 1 + 0.5/12, age <= 5 + 0.5/12) %>%
summarise(m2 = sum(is_m2, na.rm = T), n = n(), cov = m2/n)
<- data.frame(date = as.Date(date_compute[i]),
out cov = as.numeric(cov$cov),
cov_2w = as.numeric(cov_2w$cov))
<- rbind(out_fn,out)
out_fn
}
## first dose
<- data.frame()
out_fn1
for (i in 1:length(date_compute)){
<- df %>%
cova mutate(
age = decimal_date(date_compute[i]) - decimal_date(dob),
is_m1 = if_else(date_m1 <= date_compute[i], 1, 0)) %>%
filter(age >= 0.75, age <= 5) %>%
summarise(m1 = sum(is_m1, na.rm = T), n = n(), cov = m1/n)
<- df %>%
cov_2wa mutate(
age = decimal_date(date_compute[i]) - decimal_date(dob),
is_m1 = if_else(date_m1 <= date_compute[i] %m-% weeks(2), 1, 0)) %>%
filter(age >= 0.75 + 0.5/12, age <= 5 + 0.5/12) %>%
summarise(m1 = sum(is_m1, na.rm = T), n = n(), cov = m1/n)
<- data.frame(date = as.Date(date_compute[i]),
outa cov = as.numeric(cova$cov),
cov_2w = as.numeric(cov_2wa$cov))
<- rbind(out_fn1,outa)
out_fn1
}
## plot
<- ggplot()+
vac_co geom_line(data = out_fn,aes(x = date,y = cov*100,color="2"))+
geom_line(data = out_fn1,aes(x = date,y = cov*100,color="1"))+
scale_y_continuous(limits = c(0, 100), breaks = seq(0,100,by = 10)) +
labs(x = "Date",y = "Vaccine coverage (%)")+
scale_x_date(breaks = "4 month",
date_labels= "%b %Y",
limits = c(as.Date("2022-09-01"),as.Date("2024-11-20")))+
theme(axis.text.x = element_text(angle = 45,size = 8,
hjust=1))+
scale_color_manual(name="Dose", values=c("1"="red", "2"="blue"))+
theme_bw()
<-ggplot()+
vac_co2w geom_line(data = out_fn,aes(x = date,y = cov_2w*100,color="2"))+
geom_line(data = out_fn1,aes(x = date,y = cov_2w*100,color="1"))+
scale_y_continuous(limits = c(0, 100), breaks = seq(0,100,by = 10)) +
labs(x = "Date",y = "Vaccine coverage with 2-week hypothesis (%)")+
scale_x_date(breaks = "4 month",
date_labels= "%b %Y",
limits = c(as.Date("2022-09-01"),as.Date("2024-11-20")))+
theme(axis.text.x = element_text(angle = 45,size = 8,
hjust=1))+
scale_color_manual(name="Dose", values=c("1"="red", "2"="blue"))+
theme_bw()
| vac_co2w vac_co
<- unique(df$district)
district
<- data.frame()
out_d <- data.frame()
out_d1
for (k in 1:length(district)){
<- df %>% filter(district == district[k])
dfd for (i in 1:length(date_compute)){
<- dfd %>%
cov mutate(
age = decimal_date(date_compute[i]) - decimal_date(dob),
is_m2 = if_else(date_m2 <= date_compute[i], 1, 0)) %>%
filter(age >= 1, age <= 5) %>%
summarise(m2 = sum(is_m2, na.rm = T), n = n(), cov = m2/n)
<- dfd %>%
cov_2w mutate(
age = decimal_date(date_compute[i]) - decimal_date(dob),
is_m2 = if_else(date_m2 <= date_compute[i] %m-% weeks(2), 1, 0)) %>%
filter(age >= 1 + 0.5/12, age <= 5 + 0.5/12) %>%
summarise(m2 = sum(is_m2, na.rm = T), n = n(), cov = m2/n)
<- data.frame(district = as.character(district[k]),
out_d1 date = as.Date(date_compute[i]),
cov = as.numeric(cov$cov),
cov_2w = as.numeric(cov_2w$cov))
<- rbind(out_d,out_d1)
out_d
}
}
<- out_d %>%
meancv group_by(district) %>%
summarise(mean = mean(cov))
<- meancv[order(-meancv$mean),]
sorted
<- out_d %>% mutate(dis = factor(district,
vac_cov levels = as.character(sorted$district))) %>%
ggplot()+
geom_line(aes(x = date,y = cov*100,color="2"))+
scale_y_continuous(limits = c(0, 100), breaks = seq(0,100,by = 10)) +
labs(x = "Date",y = "Vaccine coverage (%)")+
scale_x_date(breaks = "4 months",
date_labels= "%b %Y",
limits = c(as.Date("2022-09-01"),as.Date("2024-11-20")))+
facet_wrap(vars(dis),ncol = 1)+
scale_color_manual(name="Dose", values=c("1"="red", "2"="blue"))+
theme_bw()+
theme(axis.text.x = element_text(angle = 45,size = 8,hjust=1),
axis.text.y = element_text(size = 6))
<- seq(as.Date("2022-05-01"),as.Date("2024-07-01"),by = "month")
week
<- df[,c("dob","district","date_m1","date_m2")]
dttlv <- data.frame()
out_timely
for (k in 1:length(district)){
<- subset(dttlv, district == district[k])
td for (i in 1:length(week)){
$lackd <- week[i]
td$ageuntil <- decimal_date(td$lackd) - decimal_date(td$dob)
td
## subset children aged from 9 months to 9 months 2 weeks at chosen time
<- td[td$ageuntil >= 0.75 & td$ageuntil <= 0.75 + 0.5*(1/12),]
slec
$agevac <- decimal_date(slec$date_m1) - decimal_date(slec$dob)
slec$vac_on_date1 <- ifelse(slec$agevac < slec$ageuntil + 1/12,1,0)
slec$vac_on_date1 <- replace(slec$vac_on_date, is.na(slec$vac_on_date1),0)
slec
<- slec %>% group_by(vac_on_date1) %>% count()
re
if(nrow(re) == 1){
2,] <- re[1,]
re[1,1] <- 0
re[1,2] <- 0
re[
}
<- data.frame(district = district[k],
cus date = week[i],
per = as.numeric(re[2,2])/(as.numeric(re[2,2])+as.numeric(re[1,2])))
<- rbind(out_timely,cus)
out_timely
} }
<- read_csv("data/GEO1_VN2019_79.csv")
ipsum
$district <- case_when(
ipsum$GEO2_VN == 704079760 ~ "Quận 1",
ipsum$GEO2_VN == 704079761 ~ "Quận 12",
ipsum$GEO2_VN == 704079762 ~ "Thủ Đức",
ipsum$GEO2_VN == 704079763 ~ "Thủ Đức",
ipsum$GEO2_VN == 704079764 ~ "Gò Vấp",
ipsum$GEO2_VN == 704079765 ~ "Bình Thạnh",
ipsum$GEO2_VN == 704079766 ~ "Tân Bình",
ipsum$GEO2_VN == 704079767 ~ "Tân Phú",
ipsum$GEO2_VN == 704079768 ~ "Phú Nhuận",
ipsum$GEO2_VN == 704079769 ~ "Thủ Đức",
ipsum$GEO2_VN == 704079770 ~ "Quận 3",
ipsum$GEO2_VN == 704079771 ~ "Quận 10",
ipsum$GEO2_VN == 704079772 ~ "Quận 11",
ipsum$GEO2_VN == 704079773 ~ "Quận 4",
ipsum$GEO2_VN == 704079774 ~ "Quận 5",
ipsum$GEO2_VN == 704079775 ~ "Quận 6",
ipsum$GEO2_VN == 704079776 ~ "Quận 8",
ipsum$GEO2_VN == 704079777 ~ "Bình Tân",
ipsum$GEO2_VN == 704079778 ~ "Quận 7",
ipsum$GEO2_VN == 704079783 ~ "Củ Chi",
ipsum$GEO2_VN == 704079784 ~ "Hóc Môn",
ipsum$GEO2_VN == 704079785 ~ "Bình Chánh",
ipsum$GEO2_VN == 704079786 ~ "Nhà Bè",
ipsum$GEO2_VN == 704079787 ~ "Cần Giờ") %>%
ipsumstri_trans_general("latin-ascii") %>%
str_remove_all("Quan") %>%
trimws(which = "both")
## census for denominator
<- readRDS("D:/OUCRU/hfmd/data/census2019.rds") %>%
hcm19 filter(province == "Thành phố Hồ Chí Minh")
$district <- hcm19$district %>%
hcm19str_remove_all("Quận|Huyện") %>%
str_replace_all(
c("\\b2\\b|\\b9\\b" = "Thủ Đức")) %>%
stri_trans_general("latin-ascii") %>%
trimws(which = "both")
<- hcm19 %>% group_by(district) %>%
popdis summarise(n = sum(n))
<- popdis$district districtc
## function to calculate percentage of each district
<- function(data){
scale_per <- data.frame()
ou for (i in 1:22){
<- data %>% filter(district == districtc[i]) %>%
oo mutate(pop = rep(as.numeric(popdis$n[popdis$district == districtc[i]])),
per = (n/pop)*100)
<- rbind(ou,oo)
ou
}return(ou)
}
## Level of education or training currently attending
$edu <- case_when(
ipsum$VN2019A_SCHOOLLEV == 99 ~ "NIU",
ipsum$VN2019A_SCHOOLLEV == 1 ~ "Pre-school below 5 years old",
ipsum$VN2019A_SCHOOLLEV == 2 ~ "Pre-school at 5 years old",
ipsum$VN2019A_SCHOOLLEV == 3 ~ "Primary",
ipsum$VN2019A_SCHOOLLEV == 4 ~ "Lower secondary",
ipsum$VN2019A_SCHOOLLEV == 5 ~ "Higher secondary",
ipsum$VN2019A_SCHOOLLEV == 6 ~ "Pre-intermediate",
ipsum$VN2019A_SCHOOLLEV == 7 ~ "Intermediate",
ipsum$VN2019A_SCHOOLLEV == 8 ~ "College",
ipsum$VN2019A_SCHOOLLEV == 9 ~ "University",
ipsum$VN2019A_SCHOOLLEV == 10 ~ "Master",
ipsum$VN2019A_SCHOOLLEV == 11 ~ "Ph.D. (doctorate)") %>%
ipsum
factor(levels = c("NIU",
"Pre-school below 5 years old",
"Pre-school at 5 years old",
"Primary",
"Lower secondary",
"Higher secondary",
"Pre-intermediate",
"Intermediate",
"College",
"University",
"Master",
"Ph.D. (doctorate)")
)
<- ipsum %>% group_by(district,edu) %>% count()
hcm_edu
<- scale_per(hcm_edu) %>% filter(edu != "NIU") %>%
edu mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
ggplot() +
geom_col(aes(x = per,
y = edu)) +
facet_wrap(vars(dis),
# scales = "free",
ncol = 1)+
labs(x = "Percentage of total population(%)",
y = "Education")+
theme_light()+
theme(axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 8))
## Employment status
$employ <- case_when(
ipsum$VN2019A_EMPSTAT == 1 ~ "Employed",
ipsum$VN2019A_EMPSTAT == 2 ~ "Unemployed",
ipsum$VN2019A_EMPSTAT == 3 ~ "Inactive",
ipsum$VN2019A_EMPSTAT == 4 ~ "Overseas",
ipsum$VN2019A_EMPSTAT == 9 ~ "NIU") %>%
ipsum
factor(levels = c("Employed",
"Unemployed",
"Inactive",
"Overseas",
"NIU")
)
<- ipsum %>% group_by(district,employ) %>% count()
hcm_employ
<- scale_per(hcm_employ) %>% filter(employ != "NIU") %>%
employ mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
ggplot() +
geom_col(aes(x = per,
y = employ)) +
facet_wrap(vars(dis),
# scales = "free",
ncol = 1)+
labs(x = "Percentage of total population(%)",
y = "Employment status")+
theme_light()+
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15))
##Number of sons living in household
$son_in_house <- as.character(ipsum$VN2019A_CHHOMEM)
ipsum$son_in_house <- ifelse(ipsum$son_in_house == "0","None",
ipsumifelse(ipsum$son_in_house == "7","7+",
ifelse(ipsum$son_in_house == "99","NIU",
$son_in_house)))
ipsum
<- ipsum %>% group_by(district,son_in_house) %>% count()
hcm_sih
##Number of daughter living in household
$dau_in_house <- as.character(ipsum$VN2019A_CHHOMEF)
ipsum$dau_in_house <- ifelse(ipsum$dau_in_house == "0","None",
ipsumifelse(ipsum$dau_in_house == "7","7+",
ifelse(ipsum$dau_in_house == "99","NIU",
$dau_in_house)))
ipsum
<- ipsum %>% group_by(district,dau_in_house) %>% count()
hcm_dih
<- full_join(scale_per(hcm_sih) %>% select(district,son_in_house,per),
son_dau scale_per(hcm_dih) %>% select(district,dau_in_house,per),
by = c("district" = "district",
"son_in_house" = "dau_in_house"))
colnames(son_dau) <- c("district","num_child","son","daughter")
<- son_dau %>% pivot_longer(cols=c('son', 'daughter'),
hcm_childnames_to='sex',
values_to='per')
$per <- replace(hcm_child$per, is.na(hcm_child$per), 0)
hcm_child
<- hcm_child %>% mutate(
hcm_child per2 = case_when(
== "daughter" ~ -per,
sex TRUE ~ per
))
<- hcm_child %>% filter(num_child != "NIU")
hcm_childa
<- range(hcm_childa$per2 %>% na.omit())
pop_range
<- pretty(pop_range, n = 6)
pop_range_breaks
<- hcm_child %>% filter(num_child != "NIU") %>%
chil mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
ggplot() +
geom_col(aes(x = per2,
y = num_child,
fill = sex)) +
scale_x_continuous(breaks = pop_range_breaks,
labels = abs(pop_range_breaks))+
facet_wrap(vars(dis),
# scales = "free",
ncol = 1)+
theme_light()+
labs(x = "Percentage of total population(%)",
y = "Number of children")
## Faith or religion
$reli <- case_when(
ipsum$VN2019A_RELIG2 == 99 ~ "NIU",
ipsum$VN2019A_RELIG2 == 1 ~ "Buddhism",
ipsum$VN2019A_RELIG2 == 2 ~ "Catholic",
ipsum$VN2019A_RELIG2 == 3 ~ "Evangelical",
ipsum$VN2019A_RELIG2 == 4 ~ "Caodaism",
ipsum$VN2019A_RELIG2 == 5 ~ "Hoa Hao Buddhism",
ipsum$VN2019A_RELIG2 == 6 ~ "Islamic",
ipsum$VN2019A_RELIG2 == 7 ~ "Baha’i Faith",
ipsum$VN2019A_RELIG2 == 8 ~ "Pure Land Buddhist Association of Vietnam",
ipsum$VN2019A_RELIG2 == 9 ~ "Tu An Hieu Nghia (Four Debts of Gratitude) Buddhism",
ipsum$VN2019A_RELIG2 == 11 ~ "Cham Brahmin",
ipsum$VN2019A_RELIG2 == 12 ~ "Church of Jesus Christ of Latter-day Saints (Mormon)",
ipsum$VN2019A_RELIG2 == 13 ~ "Hieu Nghia Ta Lon Buddhism (Registration Granted)",
ipsum$VN2019A_RELIG2 == 14 ~ "Vietnam Seventh-day Adventist Church") %>%
ipsum
factor(levels = c("NIU",
"Buddhism",
"Catholic",
"Evangelical",
"Caodaism",
"Hoa Hao Buddhism",
"Islamic",
"Baha’i Faith",
"Pure Land Buddhist Association of Vietnam",
"Tu An Hieu Nghia (Four Debts of Gratitude) Buddhism",
"Cham Brahmin",
"Church of Jesus Christ of Latter-day Saints (Mormon)",
"Hieu Nghia Ta Lon Buddhism (Registration Granted)",
"Vietnam Seventh-day Adventist Church")
)
<- ipsum %>% group_by(district,reli) %>% count()
hcm_reli
<- scale_per(hcm_reli) %>% filter(reli != "NIU" & per >=0.1) %>%
reli mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
ggplot() +
geom_col(aes(x = per,
y = reli)) +
facet_wrap(vars(dis),
# scales = "free",
ncol = 1)+
labs(x = "Percentage of total population(%)",
y = "Religion")+
theme_light()+
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15))
# Delay distribution
$date_m1_ontime <- dttlv$dob %m+% months(9)
dttlv$delay <- interval(dttlv$date_m1_ontime,dttlv$date_m1) / months(1)
dttlv
## Delay distribution of HCMC
%>% na.omit() %>%
dttlv filter(delay > 0 & date_m1_ontime >= as.Date("2022-05-01")
& date_m1_ontime <= as.Date("2023-12-31")) %>%
ggplot(aes(x=delay)) +
geom_density()+
labs(x = "Month")
## Each districts with demographic variables
<- dttlv %>% na.omit() %>%
meandl filter(delay > 0 & date_m1_ontime >= as.Date("2022-05-01")
& date_m1_ontime <= as.Date("2023-12-31")) %>%
group_by(district) %>%
summarise(median = median(delay,na.rm = T))
<- dttlv %>%
vac_dl mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
filter(delay > 0 & date_m1_ontime >= as.Date("2022-05-01")
& date_m1_ontime <= as.Date("2023-12-31")) %>%
ggplot(aes(x=delay)) +
geom_density()+
labs(x = "Month")+
xlim(0,5)+
facet_wrap(vars(dis),ncol = 1)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45,size = 8,
hjust=1))
##
<- seq(as.Date("2022-05-01"),as.Date("2024-01-01"),by = "month")
month <- dttlv$district %>% unique()
dis2 <- data.frame()
uot <- data.frame()
uot2 for (k in 1:length(dis2)){
<- dttlv %>% filter(district == dis2[k])
dtd for (i in 1:(length(month)-1)){
<- dtd %>% na.omit() %>%
emon filter(date_m1_ontime >= month[i] &
<= month[i+1] &
date_m1_ontime > 0)
delay <- data.frame(district = dis2[k],
uot2 month = month[i],
del = median(emon$delay))
<- rbind(uot,uot2)
uot
}
}
<- uot %>%
month_delay mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
ggplot(aes(x = month,y=del)) +
geom_bar(stat = "identity")+
labs(x = "Month", y = "The median of delayed month")+
facet_wrap(vars(dis),ncol = 1)+
scale_x_date(breaks = "2 months",
date_labels= "%b %Y")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45,size = 8,
hjust=1))
| time_vac | vac_dl | month_delay | edu | employ | reli | chil vac_cov
$tv <- ifelse(ipsum$VN2019A_TV == 1,1,0)
ipsum$radio <- ifelse(ipsum$VN2019A_RADIO == 1,1,0)
ipsum$computer <- ifelse(ipsum$VN2019A_COMPUTER == 1,1,0)
ipsum$phone <- ifelse(ipsum$VN2019A_PHONE == 1,1,0)
ipsum$refrig <- ifelse(ipsum$VN2019A_REFRIG == 1,1,0)
ipsum$washer <- ifelse(ipsum$VN2019A_WASHER == 1,1,0)
ipsum$watheat <- ifelse(ipsum$VN2019A_WATHEAT == 1,1,0)
ipsum$aircon <- ifelse(ipsum$VN2019A_AIRCON == 1,1,0)
ipsum$motorcyc <- ifelse(ipsum$VN2019A_MOTORCYC == 1,1,0)
ipsum$bicycle <- ifelse(ipsum$VN2019A_BIKE == 1,1,0)
ipsum$car <- ifelse(ipsum$VN2019A_CAR == 1,1,0)
ipsum
<- ipsum[,c("district","tv","computer","phone",
wealth_index "refrig","washer","watheat","aircon","motorcyc",
"bicycle","car")]
<- nestedtemp(wealth_index[,-1])
d1
plot(d1, kind="incid",names = TRUE,ylab="",yaxt="n",las=1)
## rearrange column follow the order of nestedness analysis
<- ipsum[,c("district","phone","motorcyc","tv","refrig",
wealth_index "washer","computer","aircon","watheat",
"bicycle","car")]
## select the highest property of each individual
<- cbind(wealth_index[,1],
wealth_index -1] %>%
wealth_index[,rowwise() %>%
mutate(highest_property = names(.)[max(which(c_across(everything()) == 1))]) %>%
ungroup()) %>% na.omit()
$quantile <-
wealth_indexcase_when(wealth_index$highest_property == "car" ~ "Richest, 25%",
$highest_property == "bicycle" |
wealth_index$highest_property == "watheat" |
wealth_index$highest_property == "aircon" ~ "Middle, 25%",
wealth_index$highest_property == "washer" |
wealth_index$highest_property == "refrig"|
wealth_index$highest_property == "computer" ~ "Lower Middle, 25%",
wealth_index$highest_property == "phone" |
wealth_index$highest_property == "motorcyc" |
wealth_index$highest_property == "tv" ~ "Poorest, 25%") %>%
wealth_indexfactor(levels = c("Richest, 25%",
"Middle, 25%",
"Lower Middle, 25%",
"Poorest, 25%")
)
<- wealth_index %>% group_by(district) %>%
wealth count(quantile) %>% scale_per() %>%
mutate(dis = factor(district,
levels = as.character(sorted$district))) %>%
ggplot() +
geom_col(aes(x = per,
y = quantile)) +
facet_wrap(vars(dis),
# scales = "free",
ncol = 1)+
labs(x = "Percentage of total population(%)",
y = "Wealth Quantile")+
theme_light()+
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15))
| time_vac | vac_dl | month_delay | wealth |edu | employ | reli | chil vac_cov