## data-raw/prepare_data.R
## Run this script once to create data/*.rda files from the CSV sources.
## Usage: Rscript data-raw/prepare_data.R

ckd_cvd_long <- read.csv("inst/extdata/longitudinal_biomarkers.csv",
                          stringsAsFactors = FALSE)

ckd_cvd_surv <- read.csv("inst/extdata/survival_events.csv",
                          stringsAsFactors = FALSE)

## Fix arrow encoding in transition column
ckd_cvd_surv$transition <- gsub("\u2192", "->", ckd_cvd_surv$transition)

cat("Longitudinal data:", nrow(ckd_cvd_long), "rows,",
    length(unique(ckd_cvd_long$patient_id)), "patients\n")
cat("Survival data:", nrow(ckd_cvd_surv), "rows,",
    length(unique(ckd_cvd_surv$patient_id)), "patients\n")
cat("Biomarkers:", paste(unique(ckd_cvd_long$biomarker), collapse = ", "), "\n")
cat("Transitions (events):\n")
print(sort(table(ckd_cvd_surv$transition[ckd_cvd_surv$status == 1]),
           decreasing = TRUE))

if (!dir.exists("data")) dir.create("data")
save(ckd_cvd_long, file = "data/ckd_cvd_long.rda", compress = "xz")
save(ckd_cvd_surv, file = "data/ckd_cvd_surv.rda", compress = "xz")

cat("\nSaved data/ckd_cvd_long.rda and data/ckd_cvd_surv.rda\n")
