# Load packages
library(linelist)
library(incidence2)
library(cfr)
library(tidyverse)
# Read data
<- subset(outbreaks::ebola_sim_clean$linelist ,!is.na(hospital)) %>%
dat ::as_tibble()
dplyr
# View data
dat#> # A tibble: 4,373 × 11
#> case_id generation date_of_infection date_of_onset date_of_hospitalisation
#> <chr> <int> <date> <date> <date>
#> 1 d1fafd 0 NA 2014-04-07 2014-04-17
#> 2 53371b 1 2014-04-09 2014-04-15 2014-04-20
#> 3 f5c3d8 1 2014-04-18 2014-04-21 2014-04-25
#> 4 0f58c4 2 2014-04-22 2014-04-26 2014-04-29
#> 5 f9149b 3 NA 2014-05-03 2014-05-04
#> 6 881bd4 3 2014-04-26 2014-05-01 2014-05-05
#> 7 e66fa4 2 NA 2014-04-21 2014-05-06
#> 8 20b688 3 NA 2014-05-05 2014-05-06
#> 9 40ae5f 4 2014-05-04 2014-05-07 2014-05-08
#> 10 f547d6 3 2014-05-02 2014-05-07 2014-05-08
#> # ℹ 4,363 more rows
#> # ℹ 6 more variables: date_of_outcome <date>, outcome <fct>, gender <fct>,
#> # hospital <fct>, lon <dbl>, lat <dbl>
# Specify seed to create age column
set.seed(33)
# Create linelist object
<- dat %>%
dat_linelist # add age as a normal-distributed variable
::mutate(age = charlatan::ch_norm(n = n(), mean = 55, sd = 10)) %>%
dplyr# categorize age
mutate(age_category = base::cut(
x = age,
breaks = c(0,30,50,70,100),
include.lowest = TRUE,
right = FALSE
)%>%
) # create date of death variable
::mutate(date_of_death = dplyr::case_when(
dplyr== "Death" ~ date_of_outcome,
outcome TRUE ~ NA_Date_
%>%
))
# tag and validate key variables
::make_linelist(
linelistid = "case_id",
date_onset = "date_of_onset",
date_death = "date_of_death",
age_group = "age_category", allow_extra = TRUE
%>%
) ::validate_linelist(
linelistallow_extra = TRUE,
ref_types = linelist::tags_types(
age_group = c("factor"),
allow_extra = TRUE
)%>%
) ::tags_df()
linelist
# View linelist object
dat_linelist#> # A tibble: 4,373 × 4
#> id date_onset date_death age_group
#> <chr> <date> <date> <fct>
#> 1 d1fafd 2014-04-07 NA [50,70)
#> 2 53371b 2014-04-15 NA [50,70)
#> 3 f5c3d8 2014-04-21 NA [50,70)
#> 4 0f58c4 2014-04-26 NA [50,70)
#> 5 f9149b 2014-05-03 NA [30,50)
#> 6 881bd4 2014-05-01 NA [50,70)
#> 7 e66fa4 2014-04-21 NA [30,50)
#> 8 20b688 2014-05-05 2014-05-14 [50,70)
#> 9 40ae5f 2014-05-07 NA [50,70)
#> 10 f547d6 2014-05-07 NA [30,50)
#> # ℹ 4,363 more rows
# Create incidence2 object
<- dat_linelist %>%
dat_incidence # aggregate by groups and date type
::incidence(
incidence2date_index = c("date_onset", "date_death"),
groups = "age_group", # change: "age_group" or "age_category",
interval = "day", # change between: "day" or "week"
# complete_dates = TRUE, # change: does it affect the downstream analysis? [no]
)
# View incidence object
dat_incidence#> # incidence: 1,441 x 4
#> # count vars: date_onset, date_death
#> # groups: age_group
#> date_index age_group count_variable count
#> <date> <fct> <chr> <int>
#> 1 2014-04-07 [50,70) date_onset 1
#> 2 2014-04-15 [50,70) date_onset 1
#> 3 2014-04-21 [30,50) date_onset 1
#> 4 2014-04-21 [50,70) date_onset 1
#> 5 2014-04-26 [50,70) date_onset 1
#> 6 2014-05-01 [50,70) date_onset 2
#> 7 2014-05-03 [30,50) date_onset 1
#> 8 2014-05-05 [50,70) date_onset 1
#> 9 2014-05-06 [50,70) date_onset 2
#> 10 2014-05-07 [30,50) date_onset 1
#> # ℹ 1,431 more rows
# # exploratory plot
# dat_incidence %>%
# incidence2:::plot.incidence2(
# fill = "age_group" # change: "age_group" or "age_category",
# )
# Access to onset to death delay
<-
delay_onset_death ::epiparameter_db(
epiparameterdisease = "ebola",
epi_name = "onset to death",
single_epiparameter = TRUE
)
# View delay
delay_onset_death#> Disease: Ebola Virus Disease
#> Pathogen: Ebola Virus
#> Epi Parameter: onset to death
#> Study: WHO Ebola Response Team, Agua-Agum J, Ariyarajah A, Aylward B, Blake I,
#> Brennan R, Cori A, Donnelly C, Dorigatti I, Dye C, Eckmanns T, Ferguson
#> N, Formenty P, Fraser C, Garcia E, Garske T, Hinsley W, Holmes D,
#> Hugonnet S, Iyengar S, Jombart T, Krishnan R, Meijers S, Mills H,
#> Mohamed Y, Nedjati-Gilani G, Newton E, Nouvellet P, Pelletier L,
#> Perkins D, Riley S, Sagrado M, Schnitzler J, Schumacher D, Shah A, Van
#> Kerkhove M, Varsaneux O, Kannangarage N (2015). "West African Ebola
#> Epidemic after One Year — Slowing but Not Yet under Control." _The New
#> England Journal of Medicine_. doi:10.1056/NEJMc1414992
#> <https://doi.org/10.1056/NEJMc1414992>.
#> Distribution: gamma
#> Parameters:
#> shape: 1.642
#> scale: 4.995
# Estimate age-stratified CFR
%>%
dat_incidence # Adapt <incidence2> class output to {cfr} input
::prepare_data(
cfrcases_variable = "date_onset",
deaths_variable = "date_death"
%>%
) as_tibble() %>%
# Stratify {cfr} estimation
group_by(age_group) %>%
::nest() %>%
tidyrmutate(
temp =
::map(
purrr.x = data,
.f = cfr::cfr_static,
delay_density = function(x) density(delay_onset_death, x)
)%>%
) ::unnest(cols = temp) %>%
tidyridentity()
#> # A tibble: 4 × 5
#> # Groups: age_group [4]
#> age_group data severity_estimate severity_low severity_high
#> <fct> <list> <dbl> <dbl> <dbl>
#> 1 [0,30) <tibble [395 × 3]> 0.320 0.142 0.544
#> 2 [30,50) <tibble [395 × 3]> 0.394 0.367 0.421
#> 3 [50,70) <tibble [395 × 3]> 0.377 0.360 0.396
#> 4 [70,100] <tibble [395 × 3]> 0.340 0.288 0.396
How to estimate a delay-adjusted age-stratified case fatality risk?
Ingredients
- Simulated outbreak data of Ebola Virus Disease that matches some key properties of the West African Ebola outbreak of 2014-2015 from the package
{outbreaks}
- The
{linelist}
package to keep tagged and validated columns in a line list data set. - The
{incidence2}
package to generate aggregated incidence data with the daily number of reported cases and deaths. - The functions
tidyr::nest()
andpurrr::map()
to stratify estimates. - The
{cfr}
package to estimate delay-adjusted case fatality risk.
Steps in code
Steps in detail
- pending
Please note that the code assumes the necessary packages are already installed. If they are not, you can install them using first the install.packages("pak")
function and then the pak::pak()
function for both packages in CRAN or GitHub before loading them with library()
.
Additionally, make sure to adjust the serial interval distribution parameters according to the specific outbreak you are analyzing.