## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----eval=FALSE---------------------------------------------------------------
# install.packages('crrstep')

## ----message=FALSE, warning=FALSE---------------------------------------------
library(crrstep)
library(cmprsk)

## -----------------------------------------------------------------------------
set.seed(123)
n <- 400
age <- rnorm(n, mean = 60, sd = 10)
sex <- factor(sample(c('Male', 'Female'), n, replace = TRUE))
treatment <- factor(sample(c('Control', 'Treatment'), n, replace = TRUE))
biomarker <- rnorm(n, mean = 100, sd = 20)
linpred <- 0.03 * (age - 60) + 0.5 * (sex == 'Male') - 0.4 * (treatment == 'Treatment') + 0.01 * biomarker
base_hazard <- 0.1
true_time <- rexp(n, rate = base_hazard * exp(linpred))
p_cause1 <- plogis(linpred)
cause <- ifelse(runif(n) < p_cause1, 1, 2)
censor_time <- rexp(n, rate = 0.05)
ftime <- pmin(true_time, censor_time)
fstatus <- ifelse(ftime == censor_time, 0, cause)
sim_data <- data.frame(ftime, fstatus, age, sex, treatment, biomarker)
table(sim_data$fstatus)

## -----------------------------------------------------------------------------
result_back <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_back)

## -----------------------------------------------------------------------------
result_fwd <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'forward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_fwd)

## -----------------------------------------------------------------------------
result_bic <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = FALSE
)
cat('AIC selected variables:', nrow(result_back$coefficients), '\\n')
cat('BIC selected variables:', nrow(result_bic$coefficients), '\\n')

## -----------------------------------------------------------------------------
result_min <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~ age + sex,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_min)

## -----------------------------------------------------------------------------
set.seed(456)
sim_data$stage <- factor(sample(c('I', 'II', 'III', 'IV'), n, replace = TRUE))
sim_data$region <- factor(sample(c('North', 'South', 'East', 'West'), n, replace = TRUE))
result_factors <- crrstep(
  formula = ftime ~ age + sex + stage + region,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
summary(result_factors)
coef(result_factors)
logLik(result_factors)
BIC(result_factors)

## -----------------------------------------------------------------------------
result_interact <- crrstep(
  formula = ftime ~ age + sex + treatment + age:sex + sex:treatment,
  scope.min = ~ age + sex,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_interact)

## -----------------------------------------------------------------------------
sim_data$biomarker_c <- scale(sim_data$biomarker, scale = FALSE)[,1]
result_poly <- crrstep(
  formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2),
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_poly)

## -----------------------------------------------------------------------------
result_complex <- crrstep(
  formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + biomarker_c:treatment + I(biomarker_c^2):treatment,
  scope.min = ~ age,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
print(result_complex)

## -----------------------------------------------------------------------------
sim_data$biomarker_pos <- sim_data$biomarker - min(sim_data$biomarker) + 1
result_log <- crrstep(
  formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_log)

## -----------------------------------------------------------------------------
result_threeway <- crrstep(
  formula = ftime ~ age + sex + treatment + stage + sex:treatment + sex:stage + treatment:stage + sex:treatment:stage,
  scope.min = ~ age,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
print(result_threeway)

## -----------------------------------------------------------------------------
fit_full <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = FALSE,
  crr.object = TRUE
)
cat('Coefficients:\\n')
print(fit_full$coef)
cat('\\nLog-likelihood:\\n')
print(fit_full$loglik)

## -----------------------------------------------------------------------------
sim_data_miss <- sim_data
sim_data_miss$age[sample(1:n, 20)] <- NA
sim_data_miss$biomarker[sample(1:n, 15)] <- NA
result_miss <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data_miss,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_miss)

## -----------------------------------------------------------------------------
result_subset <- crrstep(
  formula = ftime ~ age + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  subset = sim_data$sex == 'Female',
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = FALSE
)
print(result_subset)

## -----------------------------------------------------------------------------
sessionInfo()

