Estimate the Probability of Extinction Accounting for Individual-level Transmission Variation

Author

Joshua W. Lambert

Published

November 11, 2025

What do we have?

  • Line list and contact data.

Steps in code

## Transmission clusters and superspreading events

# Load required R packages ------------------------------------------------

library(simulist)
library(epicontacts)
library(superspreading)
library(fitdistrplus)

# Choose a seed that results in suitable and reproducible outbreak --------

set.seed(1)

# Simulate outbreak -------------------------------------------------------

## Contact distribution R = 1.2 and k = 0.5
## (i.e. overdispersed contact and transmission).

outbreak <- simulist::sim_outbreak(
  contact_distribution = function(x) dnbinom(x = x, mu = 1.2, size = 0.4),
  prob_infection = 0.5,
  outbreak_size = c(50, 100)
)

# Plot contact network ----------------------------------------------------

contact_net <- epicontacts::make_epicontacts(
  linelist = outbreak$linelist,
  contacts = outbreak$contacts,
  id = "case_name",
  from = "from",
  to = "to",
  directed = TRUE
)

# plot(contact_net)


# Plot transmission network -----------------------------------------------

transmission_net <- outbreak$contacts[outbreak$contacts$was_case == TRUE, ]

transmission_net <- epicontacts::make_epicontacts(
  linelist = outbreak$linelist,
  contacts = transmission_net,
  id = "case_name",
  from = "from",
  to = "to",
  directed = TRUE
)

transmission_net
#> 
#> /// Epidemiological Contacts //
#> 
#>   // class: epicontacts
#>   // 162 cases in linelist; 161 contacts;  directed 
#> 
#>   // linelist
#> 
#> # A tibble: 162 × 13
#>    id        id.1 case_type sex     age date_onset date_reporting date_admission
#>    <chr>    <int> <chr>     <chr> <int> <date>     <date>         <date>        
#>  1 Dakei D…     1 confirmed m        20 2023-01-01 2023-01-01     2023-01-09    
#>  2 Heriber…     2 confirmed m        62 2023-01-04 2023-01-04     NA            
#>  3 Monica …     3 suspected f        40 2023-01-08 2023-01-08     NA            
#>  4 Bianca …     4 probable  f        75 2023-01-10 2023-01-10     2023-01-13    
#>  5 Antonio…     8 suspected m        40 2023-01-03 2023-01-03     2023-01-07    
#>  6 Austin …    10 probable  m         8 2023-01-01 2023-01-01     NA            
#>  7 Heather…    13 probable  f         9 2023-01-13 2023-01-13     NA            
#>  8 Kaitlyn…    14 confirmed f         9 2023-01-04 2023-01-04     NA            
#>  9 Lawrenc…    15 confirmed m        22 2023-01-09 2023-01-09     2023-01-14    
#> 10 Benjami…    17 confirmed m        39 2023-01-12 2023-01-12     NA            
#> # ℹ 152 more rows
#> # ℹ 5 more variables: outcome <chr>, date_outcome <date>,
#> #   date_first_contact <date>, date_last_contact <date>, ct_value <dbl>
#> 
#>   // contacts
#> 
#> # A tibble: 161 × 8
#>    from   to      age sex   date_first_contact date_last_contact was_case status
#>    <chr>  <chr> <int> <chr> <date>             <date>            <lgl>    <chr> 
#>  1 Dakei… Heri…    62 m     2022-12-31         2023-01-04        TRUE     case  
#>  2 Dakei… Moni…    40 f     2022-12-30         2023-01-03        TRUE     case  
#>  3 Dakei… Bian…    75 f     2022-12-25         2023-01-07        TRUE     case  
#>  4 Dakei… Anto…    40 m     2023-01-01         2023-01-01        TRUE     case  
#>  5 Dakei… Aust…     8 m     2022-12-29         2023-01-05        TRUE     case  
#>  6 Herib… Heat…     9 f     2023-01-02         2023-01-08        TRUE     case  
#>  7 Herib… Kait…     9 f     2022-12-29         2023-01-09        TRUE     case  
#>  8 Herib… Lawr…    22 m     2022-12-29         2023-01-08        TRUE     case  
#>  9 Herib… Benj…    39 m     2023-01-01         2023-01-07        TRUE     case  
#> 10 Herib… Anwa…    85 m     2022-12-31         2023-01-05        TRUE     case  
#> # ℹ 151 more rows

# plot(transmission_net)

# Extract secondary case data from outbreak -------------------------------

contacts <- outbreak$contacts

# subset to contacts that caused transmission
infections <- contacts[contacts$was_case == TRUE, ]

# Tabulate number of infections from each infector
secondary_cases <- table(infections$from)

# Calculate number of infections that did not cause any secondary cases
num_no_transmit <- sum(!infections$to %in% infections$from)

# Bind number of secondary cases with those that had no onward transmission
all_cases <- sort(c(rep(0, num_no_transmit), secondary_cases))

# Fit and compare offspring distributions ---------------------------------

# fit a set of offspring distribution models:
# - Poisson
# - Geometric
# - Negative Binomial
# - Poisson-lognormal compound
# - Poisson-Weibull compound
pois_fit <- fitdistrplus::fitdist(data = all_cases, distr = "pois")
geom_fit <- fitdistrplus::fitdist(data = all_cases, distr = "geom")
nbinom_fit <- fitdistrplus::fitdist(data = all_cases, distr = "nbinom")
poislnorm_fit <- fitdistrplus::fitdist(
  data = all_cases,
  distr = "poislnorm",
  start = list(meanlog = 1, sdlog = 1)
)
poisweibull_fit <- fitdistrplus::fitdist(
  data = all_cases,
  distr = "poisweibull",
  start = list(shape = 1, scale = 1)
)

# compare model fits
model_tbl <- superspreading::ic_tbl(
  pois_fit, geom_fit, nbinom_fit, poislnorm_fit, poisweibull_fit
)
model_tbl
#>   distribution      AIC   DeltaAIC         wAIC      BIC   DeltaBIC
#> 1       nbinom 422.6880   0.000000 8.854134e-01 428.8632   0.000000
#> 2  poisweibull 426.8456   4.157665 1.107441e-01 433.0208   4.157665
#> 3    poislnorm 433.5685  10.880482 3.841311e-03 439.7436  10.880482
#> 4         geom 449.7700  27.082010 1.165098e-06 452.8576  23.994414
#> 5         pois 572.4927 149.804723 2.614952e-33 575.5803 146.717127
#>           wBIC
#> 1 8.854096e-01
#> 2 1.107436e-01
#> 3 3.841294e-03
#> 4 5.455361e-06
#> 5 1.224405e-32

# Extract parameters from best fit model ----------------------------------

R <- nbinom_fit$estimate[["mu"]]
k <- nbinom_fit$estimate[["size"]]

# print estimates
message("R = ", signif(R, digits = 4), "\n", "k = ", signif(k, digits = 4))

# Estimate proportions of cases occur in clusters of >= a given si --------

superspreading::proportion_cluster_size(
  R = R, 
  k = k, 
  cluster_size = c(2, 5, 10)
)
#>           R        k prop_2 prop_5 prop_10
#> 1 0.9937403 0.287379  85.2%  46.9%   15.2%


# Estimate proportion of cases causing 80% transmission -------------------

superspreading::proportion_transmission(
  R = R, 
  k = k,
  prop_transmission = 0.8
)
#>           R        k prop_80
#> 1 0.9937403 0.287379   17.7%

# Estimate probability of outbreak extinction -----------------------------

superspreading::probability_extinct(
  R = R,
  k = k,
  num_init_infect = 1
)
#> [1] 1

# calculate upper confidence interval for R estimate to explore extinction probability
R_upper_bound <- nbinom_fit$estimate[["mu"]] + (qnorm(0.975) * nbinom_fit$sd[["mu"]])

superspreading::probability_extinct(
  R = R_upper_bound, 
  k = k, 
  num_init_infect = 1
)
#> [1] 0.8867148

# increase number of initial introductions seeding outbreaks to assess risk
superspreading::probability_extinct(
  R = R_upper_bound,
  k = k,
  num_init_infect = 10
)
#> [1] 0.3004967

# apply small control measure on transmission to see affect on extinction probability
superspreading::probability_extinct(
  R = R,
  k = k,
  num_init_infect = 10,
  ind_control = 0.1
)
#> [1] 1

Steps in detail

  • (To fill)

Please note that the code assumes the necessary packages are already installed. If they are not, you can install them using first the install.packages("pak") function and then the pak::pak() function for both packages in CRAN or GitHub before loading them with library().