Simulating trials with multiple events

library(survobj)
library(survival)
library(dplyr)

Introduction

This document presents a way to simulate a trial with multiple events and compare the empirical power of the analysis of the first or only episode using Cox regression and the analysis of multiple episodes under an Andersen and Gill model. Data is generated using a renewal homogeneous Poisson process as described Leemis (1987) .

Trial simulation

A total of 1000 trials are simulated using a survival object of class Weibull with shape of 0.5 and failure rate at time 1 of 40%. Each group will include 250 participants, and the hazard ratio (HR) for the intervention group will be 0.7 and the follow-up will be censored at time 1. Empirical power is defined as the proportion of trials with a robust p-value below 0.05

nsim = 1000
s_obj = s_weibull(fail = 0.4, t=1, shape = 0.5)
n = 250
subjid = seq(1, 2*n)
group = c(rep(0,n), rep(1,n))
hr = c(rep(1,n), rep(0.7,n))
tmax = 1
set.seed = 12345
sim <- lapply(
  1:nsim,
  function(x){
    # simulate survival times for one trial
    tsim <- matrix(rsurvhr(s_obj, hr), ncol = 1)
    i = 1
    while(min(tsim[,i]) < tmax) {
      i = i+1
      tsim<- cbind(tsim,renewhr(s_obj, hr, tsim[,i-1]))
    }
    # Analysis data.frame
    df <- data.frame(
      subjid = rep(subjid,i),
      group = rep(group, i),
      time = as.vector(tsim)
      ) |> 
      arrange(subjid, time)  |>  
      group_by(subjid)  |> 
      mutate(ncase = row_number()) |> 
      mutate(start = lag(time, default = 0)) |> 
      filter(start < tmax) |> 
      mutate(event = censor_event(tmax, time, 1)) |> 
      mutate(end = censor_time(tmax, time))

    # Analysis multiple episodes
    mult <- summary(
              coxph(Surv(start,end,event)~group, 
                method = "breslow",  
                id = subjid, 
                robust = T,  
                data = df,
                control = coxph.control(timefix = FALSE)))

    # Analysis first or only episode
    sing <- summary(
              coxph(Surv(start,end,event)~group, 
                method = "breslow",  
                id = subjid, 
                robust = T,  
                data = filter(df, ncase == 1),
                control = coxph.control(timefix = FALSE)))  
    
    # Export results for analysis
    return(
      data.frame(
      simid = c(x,x),
      res = c("recurrent","single"),
      events = c(mult$nevent, sing$nevent),
      hr = c(mult$coefficients[1,"exp(coef)"],sing$coefficients[1,"exp(coef)"]),
      pvalue =  c(mult$coefficients[1,"Pr(>|z|)"],sing$coefficients[1,"Pr(>|z|)"])
      )
    )
  }
) 

# Join all the simulations in a single data frame
sim_rec <- do.call(rbind, sim)

Analysis of the simulation

For recurrent events

rec_empirical_power = 
  binom.test(
    sum(sim_rec$pvalue[sim_rec$res == "recurrent"] <= 0.05), 
    length(sim_rec$pval[sim_rec$res == "recurrent"] ))
rec_empirical_power$estimate
#> probability of success 
#>                  0.801
rec_empirical_power$conf.int
#> [1] 0.7748841 0.8253299
#> attr(,"conf.level")
#> [1] 0.95

# Distribution of the simulated VEs}
summary(sim_rec$hr[sim_rec$res == "recurrent"])
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.3901  0.6025  0.6644  0.6731  0.7359  1.0877

# Distribution of the simulated number of events
summary(sim_rec$events[sim_rec$res == "recurrent"])
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   191.0   233.0   246.0   245.7   258.0   303.0

For first or only event

sing_empirical_power = 
  binom.test(
    sum(sim_rec$pvalue[sim_rec$res == "single"] <= 0.05), 
    length(sim_rec$pval[sim_rec$res == "single"] ))
sing_empirical_power$estimate
#> probability of success 
#>                  0.672
sing_empirical_power$conf.int
#> [1] 0.6419273 0.7010560
#> attr(,"conf.level")
#> [1] 0.95

# Distribution of the simulated VEs}
summary(sim_rec$hr[sim_rec$res == "single"])
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.4196  0.6243  0.6934  0.7012  0.7713  1.0931

# Distribution of the simulated number of events
summary(sim_rec$events[sim_rec$res == "single"])
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   143.0   168.0   175.0   175.1   183.0   212.0

References

Leemis, Lawrence M. 1987. “Variate Generation for Accelerated Life and Proportional Hazards Models.” Operations Research 35 (6): 892–94.