cancer_treatment_duration <- procedures |>
filter(grepl(
"cancer|carcinoma|neoplasm|malignant|lymphoma|leukemia|melanoma",
reasondescription,
ignore.case = TRUE
)) |>
group_by(patient, reasondescription) |>
summarise(
first_treatment = min(start),
last_treatment = max(stop),
num_procedures = n(),
total_cost = sum(base_cost, na.rm = TRUE),
num_encounters = n_distinct(encounter),
.groups = "drop"
) |>
mutate(
treatment_days = as.numeric(difftime(
last_treatment,
first_treatment,
units = "days"
)),
treatment_months = treatment_days / 30.44,
treatment_years = treatment_days / 365.25,
cancer_type = case_when(
grepl(
"lung.*cancer|lung.*carcinoma|small cell",
reasondescription,
ignore.case = TRUE
) ~ "Lung Cancer",
grepl("breast", reasondescription, ignore.case = TRUE) ~ "Breast Cancer",
grepl("colon", reasondescription, ignore.case = TRUE) ~ "Colon Cancer",
grepl(
"prostate",
reasondescription,
ignore.case = TRUE
) ~ "Prostate Cancer",
grepl("leukemia", reasondescription, ignore.case = TRUE) ~ "Leukemia",
TRUE ~ "Other Cancer"
)
)
duration_summary <- cancer_treatment_duration |>
group_by(cancer_type) |>
summarise(
num_patients = n(),
median_days = median(treatment_days, na.rm = TRUE),
mean_days = mean(treatment_days, na.rm = TRUE),
median_months = median(treatment_months, na.rm = TRUE),
mean_months = mean(treatment_months, na.rm = TRUE),
min_days = min(treatment_days, na.rm = TRUE),
max_days = max(treatment_days, na.rm = TRUE),
median_cost = median(total_cost, na.rm = TRUE),
mean_cost = mean(total_cost, na.rm = TRUE),
.groups = "drop"
) |>
arrange(desc(mean_months))
duration_summary |>
select(cancer_type, num_patients, median_months, mean_months, max_days) |>
mutate(
max_years = round(max_days / 365.25, 1),
median_months = round(median_months, 1),
mean_months = round(mean_months, 1)
)