Fitting real COVID-19 case data
Estimating for Bogota using only the case data
We first load the COVID-19 data for Colombia and graph it.
# load data
# plot
covid_colombia_cases_deaths_mobility %>%
pivot_longer(c(cases, deaths)) %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
facet_grid(vars(name), vars(city),
scales = "free"
) +
xlab("Date") +
We first estimate for Bogota using only the case data using optimisation to give us a quick set of estimates.
df_bogota <- covid_colombia_cases_deaths_mobility %>%
filter(city == "Bogota")
# generate serial interval for COVID-19 based on reasonable mean, sd
mean_si <- 6.5
sd_si <- 4.03
w <- generate_vector_serial(nrow(df_bogota), mean_si, sd_si)
# fit using optimisation
fit <- fit_epifilter(
N = nrow(df_bogota),
C = df_bogota$cases,
w = w,
is_sampling = FALSE,
as_vector = FALSE
# plot
R <- fit$par$R
df_bogota %>%
mutate(Rt = R) %>%
select(date, Rt, cases) %>%
filter(date >= as.Date("2020-04-01")) %>%
pivot_longer(-date) %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
scale_color_brewer("R_t", palette = "Dark2") +
ylab("R_t") +
facet_grid(vars(name), scales = "free")
We now fit using a fully Bayesian framework which outputs uncertainty.
fit <- fit_epifilter(
N = nrow(df_bogota),
C = df_bogota$cases,
w = w,
is_sampling = TRUE,
iter = 50,
chains = 1 # as CRAN does not allow multiple cores
# extract posterior quantiles
R_draws <- rstan::extract(fit, "R")[[1]]
lower <- apply(R_draws, 2, function(x) quantile(x, 0.025))
middle <- apply(R_draws, 2, function(x) quantile(x, 0.5))
upper <- apply(R_draws, 2, function(x) quantile(x, 0.975))
# plot
df_bogota %>%
lower = lower,
middle = middle,
upper = upper
) %>%
select(date, lower, middle, upper) %>%
filter(date >= as.Date("2020-04-01")) %>%
ggplot(aes(x = date)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "blue", alpha = 0.6) +
geom_line(aes(y = middle), colour = "blue") +
geom_hline(yintercept = 1, linetype = 2, colour = "orange") +
xlab("Date") +
Probing the drivers of using mobility data
We assume a relationship between workplace mobility and of the form:
where represents the components of unrelated to workplace mobility.
We now fit this model using epidp
X <- tibble(
cons = rep(1, nrow(df_bogota)),
m = df_bogota$workplaces
) %>%
m = scale(m)[, 1]
) %>%
fit <- fit_epifilter_covariates(
N = nrow(df_bogota),
C = df_bogota$cases,
w = w,
X = X,
is_sampling = TRUE,
iter = 50, # probably too few iterations
chains = 1 # as CRAN does not allow multiple cores; should run with more cores
print(fit, "beta[2]")
#> Inference for Stan model: epifilter_covariates.
#> 1 chains, each with iter=50; warmup=25; thin=1;
#> post-warmup draws per chain=25, total post-warmup draws=25.
#> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
#> beta[2] -0.06 0.01 0.01 -0.08 -0.07 -0.05 -0.05 -0.04 4 1.47
#> Samples were drawn using NUTS(diag_e) at Thu Nov 14 19:26:15 2024.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split chains (at
#> convergence, Rhat=1).
This negative association probably is a result of individuals responding to the COVID-19 pandemic conditions or governmental policy.
Determining drivers of for Bogota using financial time series
Smooth the weekly time series
daily_dates <- seq(
max(bogota_financial_time_series$date), by = "day")
merchants <- unique(bogota_financial_time_series$merch_category)
for(i in seq_along(merchants)) {
df_short <- bogota_financial_time_series %>%
spendamountusd_interpolated <- approx(
df_short$spendamt, xout = daily_dates)$y
countamountusd_interpolated <- approx(df_short$date, df_short$nb_transactions, xout = daily_dates)$y
df_daily <- tibble(
) %>%
if(i == 1)
big_df <- df_daily
big_df <- big_df %>% bind_rows(df_daily)
df_both <- big_df %>%
left_join(covid_colombia_cases_deaths_mobility) %>%
Here, we demonstrate how we can investigate whether the (smoothed) daily number of transactions in Bogota is associated with $R_$. We pick the “Grocery Stores/Supermarkets” category here because this is likely a particularly high contact shop.
df_supermarkets <- df_both %>%
filter(merch_type=="Grocery Stores/Supermarkets")
X <- tibble(
cons = rep(1, nrow(df_bogota)),
m = df_supermarkets$countamountusd
) %>%
m = scale(m)[, 1]
) %>%
fit <- fit_epifilter_covariates(
N = nrow(df_supermarkets),
C = df_supermarkets$cases,
w = w,
X = X,
is_sampling = TRUE,
iter = 200,
chains = 4
print(fit, "beta[2]")
Plotting using these estimates.
# extract posterior quantiles
R_draws <- rstan::extract(fit, "R")[[1]]
lower <- apply(R_draws, 2, function(x) quantile(x, 0.025))
middle <- apply(R_draws, 2, function(x) quantile(x, 0.5))
upper <- apply(R_draws, 2, function(x) quantile(x, 0.975))
# plot
df_supermarkets %>%
lower = lower,
middle = middle,
upper = upper
) %>%
select(date, lower, middle, upper) %>%
filter(date >= as.Date("2020-04-01")) %>%
ggplot(aes(x = date)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "blue", alpha = 0.6) +
geom_line(aes(y = middle), colour = "blue") +
geom_hline(yintercept = 1, linetype = 2, colour = "orange") +
xlab("Date") +