Estimate the Probability of Extinction Accounting for Individual-level Transmission Variation

Author
Joshua W. Lambert
Published
November 11, 2025
What do we have?

Line list and contact data.
Steps in code

## Transmission clusters and superspreading events

# Load required R packages ------------------------------------------------

library(simulist)
library(epicontacts)
library(superspreading)
library(fitdistrplus)

# Choose a seed that results in suitable and reproducible outbreak --------

set.seed(1)

# Simulate outbreak -------------------------------------------------------

## Contact distribution R = 1.2 and k = 0.5
## (i.e. overdispersed contact and transmission).

outbreak <- simulist::sim_outbreak(
  contact_distribution = function(x) dnbinom(x = x, mu = 1.2, size = 0.4),
  prob_infection = 0.5,
  outbreak_size = c(50, 100)
)

# Plot contact network ----------------------------------------------------

contact_net <- epicontacts::make_epicontacts(
  linelist = outbreak$linelist,
  contacts = outbreak$contacts,
  id = "case_name",
  from = "from",
  to = "to",
  directed = TRUE
)

# plot(contact_net)


# Plot transmission network -----------------------------------------------

transmission_net <- outbreak$contacts[outbreak$contacts$was_case == TRUE, ]

transmission_net <- epicontacts::make_epicontacts(
  linelist = outbreak$linelist,
  contacts = transmission_net,
  id = "case_name",
  from = "from",
  to = "to",
  directed = TRUE
)

transmission_net
#> 
#> /// Epidemiological Contacts //
#> 
#>   // class: epicontacts
#>   // 162 cases in linelist; 161 contacts;  directed 
#> 
#>   // linelist
#> 
#> # A tibble: 162 × 13
#>    id        id.1 case_type sex     age date_onset date_reporting date_admission
#>    <chr>    <int> <chr>     <chr> <int> <date>     <date>         <date>        
#>  1 Dakei D…     1 confirmed m        20 2023-01-01 2023-01-01     2023-01-09    
#>  2 Heriber…     2 confirmed m        62 2023-01-04 2023-01-04     NA            
#>  3 Monica …     3 suspected f        40 2023-01-08 2023-01-08     NA            
#>  4 Bianca …     4 probable  f        75 2023-01-10 2023-01-10     2023-01-13    
#>  5 Antonio…     8 suspected m        40 2023-01-03 2023-01-03     2023-01-07    
#>  6 Austin …    10 probable  m         8 2023-01-01 2023-01-01     NA            
#>  7 Heather…    13 probable  f         9 2023-01-13 2023-01-13     NA            
#>  8 Kaitlyn…    14 confirmed f         9 2023-01-04 2023-01-04     NA            
#>  9 Lawrenc…    15 confirmed m        22 2023-01-09 2023-01-09     2023-01-14    
#> 10 Benjami…    17 confirmed m        39 2023-01-12 2023-01-12     NA            
#> # ℹ 152 more rows
#> # ℹ 5 more variables: outcome <chr>, date_outcome <date>,
#> #   date_first_contact <date>, date_last_contact <date>, ct_value <dbl>
#> 
#>   // contacts
#> 
#> # A tibble: 161 × 8
#>    from   to      age sex   date_first_contact date_last_contact was_case status
#>    <chr>  <chr> <int> <chr> <date>             <date>            <lgl>    <chr> 
#>  1 Dakei… Heri…    62 m     2022-12-31         2023-01-04        TRUE     case  
#>  2 Dakei… Moni…    40 f     2022-12-30         2023-01-03        TRUE     case  
#>  3 Dakei… Bian…    75 f     2022-12-25         2023-01-07        TRUE     case  
#>  4 Dakei… Anto…    40 m     2023-01-01         2023-01-01        TRUE     case  
#>  5 Dakei… Aust…     8 m     2022-12-29         2023-01-05        TRUE     case  
#>  6 Herib… Heat…     9 f     2023-01-02         2023-01-08        TRUE     case  
#>  7 Herib… Kait…     9 f     2022-12-29         2023-01-09        TRUE     case  
#>  8 Herib… Lawr…    22 m     2022-12-29         2023-01-08        TRUE     case  
#>  9 Herib… Benj…    39 m     2023-01-01         2023-01-07        TRUE     case  
#> 10 Herib… Anwa…    85 m     2022-12-31         2023-01-05        TRUE     case  
#> # ℹ 151 more rows

# plot(transmission_net)

# Extract secondary case data from outbreak -------------------------------

contacts <- outbreak$contacts

# subset to contacts that caused transmission
infections <- contacts[contacts$was_case == TRUE, ]

# Tabulate number of infections from each infector
secondary_cases <- table(infections$from)

# Calculate number of infections that did not cause any secondary cases
num_no_transmit <- sum(!infections$to %in% infections$from)

# Bind number of secondary cases with those that had no onward transmission
all_cases <- sort(c(rep(0, num_no_transmit), secondary_cases))

# Fit and compare offspring distributions ---------------------------------

# fit a set of offspring distribution models:
# - Poisson
# - Geometric
# - Negative Binomial
# - Poisson-lognormal compound
# - Poisson-Weibull compound
pois_fit <- fitdistrplus::fitdist(data = all_cases, distr = "pois")
geom_fit <- fitdistrplus::fitdist(data = all_cases, distr = "geom")
nbinom_fit <- fitdistrplus::fitdist(data = all_cases, distr = "nbinom")
poislnorm_fit <- fitdistrplus::fitdist(
  data = all_cases,
  distr = "poislnorm",
  start = list(meanlog = 1, sdlog = 1)
)
poisweibull_fit <- fitdistrplus::fitdist(
  data = all_cases,
  distr = "poisweibull",
  start = list(shape = 1, scale = 1)
)

# compare model fits
model_tbl <- superspreading::ic_tbl(
  pois_fit, geom_fit, nbinom_fit, poislnorm_fit, poisweibull_fit
)
model_tbl
#>   distribution      AIC   DeltaAIC         wAIC      BIC   DeltaBIC
#> 1       nbinom 422.6880   0.000000 8.854134e-01 428.8632   0.000000
#> 2  poisweibull 426.8456   4.157665 1.107441e-01 433.0208   4.157665
#> 3    poislnorm 433.5685  10.880482 3.841311e-03 439.7436  10.880482
#> 4         geom 449.7700  27.082010 1.165098e-06 452.8576  23.994414
#> 5         pois 572.4927 149.804723 2.614952e-33 575.5803 146.717127
#>           wBIC
#> 1 8.854096e-01
#> 2 1.107436e-01
#> 3 3.841294e-03
#> 4 5.455361e-06
#> 5 1.224405e-32

# Extract parameters from best fit model ----------------------------------

R <- nbinom_fit$estimate[["mu"]]
k <- nbinom_fit$estimate[["size"]]

# print estimates
message("R = ", signif(R, digits = 4), "\n", "k = ", signif(k, digits = 4))

# Estimate proportions of cases occur in clusters of >= a given si --------

superspreading::proportion_cluster_size(
  R = R, 
  k = k, 
  cluster_size = c(2, 5, 10)
)
#>           R        k prop_2 prop_5 prop_10
#> 1 0.9937403 0.287379  85.2%  46.9%   15.2%


# Estimate proportion of cases causing 80% transmission -------------------

superspreading::proportion_transmission(
  R = R, 
  k = k,
  prop_transmission = 0.8
)
#>           R        k prop_80
#> 1 0.9937403 0.287379   17.7%

# Estimate probability of outbreak extinction -----------------------------

superspreading::probability_extinct(
  R = R,
  k = k,
  num_init_infect = 1
)
#> [1] 1

# calculate upper confidence interval for R estimate to explore extinction probability
R_upper_bound <- nbinom_fit$estimate[["mu"]] + (qnorm(0.975) * nbinom_fit$sd[["mu"]])

superspreading::probability_extinct(
  R = R_upper_bound, 
  k = k, 
  num_init_infect = 1
)
#> [1] 0.8867148

# increase number of initial introductions seeding outbreaks to assess risk
superspreading::probability_extinct(
  R = R_upper_bound,
  k = k,
  num_init_infect = 10
)
#> [1] 0.3004967

# apply small control measure on transmission to see affect on extinction probability
superspreading::probability_extinct(
  R = R,
  k = k,
  num_init_infect = 10,
  ind_control = 0.1
)
#> [1] 1
Steps in detail

(To fill)
Please note that the code assumes the necessary packages are already installed. If they are not, you can install them using first the install.packages("pak") function and then the pak::pak() function for both packages in CRAN or GitHub before loading them with library().
What do we have?

Steps in code

Steps in detail

Related