This notebook generates the waitlist mortality data.
The required input files are:
data/throacic.rds
data/cand_wl_hist.rds
data/refusals.csv
data/STAR File Documentation.xls
The outputs are:
data/model_data.{csv, rds}
Settings and Functions
Show the code
plot_data <-function(data, var, n =12){# n is the maximum number of bins/levels to use in plot y =1-data$outcome # set survival as outcome of interest x = data %>%pull({{var}})if(all(is.na(x))) return(ggplot()) k =5# shrinkage prior mu =mean(y)if(n_distinct(x) <= {{n}}) x =factor(x)if(is.character(x)) x =factor(x)if(is.factor(x)) x =fct_lump_n(x, n = {{n}})if(is.numeric(x) | lubridate::is.POSIXt(x) | lubridate::is.Date(x)){ brks = ggplot2:::breaks(x, "numbers", nbins = {{n}}) %>%unique()#x = cut(x, c(-Inf,brks)) x =cut(x, brks, include.lowest =TRUE) } x =fct_na_value_to_level(x) # convert NAs to explicit leveltibble(x, y) %>%group_by(x) %>%summarize(n =n(), # number in bin/leveln1 =sum(y >0), # number of outcomes of interest in bin/levelp = (n1 + k*mu)/(n + k), # laplace smoothing estimatese =sqrt(p*(1-p)/n), # confidence intervals based on laplace lower =pmax(0, p -2*se),# approx 95% intervalsupper =pmin(1, p +2*se) ) %>%ggplot(aes(x, p)) +geom_hline(yintercept =mean(y), lty =3) +geom_errorbar(aes(ymin=lower, ymax=upper)) +geom_point() +# labs(x = xlab) + # scale_x_discrete(guide = guide_axis(n.dodge = 2))scale_x_discrete(labels = scales::label_wrap(10)) +labs(x =sym(var), y ="Waitlist Survival") +coord_cartesian(ylim =c(.80, 1))}
Get Population
Thoracic Data
The thoracic data has information collected on transplant candidates and recipients. The data was retrieved from UNOS on 2022-04-01.
The REM_CD is the coded reason for removal from the waitlist. The details are in the STAR documentation.
The created outcome variable corresponds to an adverse outcome defined as death (REM_CD = 8) or Candidate too sick to transplant (REM_CD = 13, “Cand. cond. deteriorated,too sick to tx”).
outcome = 1 is adverse.
outcome = 0 is everything else (i.e., candidate was removed from the waitlist for anything besides death or deteriorization).
Show the code
REM_CD_code = readxl::read_excel(file.path(dir_data, "STAR File Documentation.xls"),sheet ="THORACIC_FORMATS_FLATFILE",skip=1) %>%filter(`SAS ANALYSIS FORMAT`=="REMCD") %>%select(REM_CD =`Data Field Value`,REMOVAL_REASON =`Data Field Formatted Value` ) %>%mutate(outcome =1L*(REM_CD %in%c("8", "13")),outcome_full =case_match(REM_CD,#: adverse outcomes"8"~"Death","13"~"Too sick", # Cand. cond. deteriorated,too sick to tx#: Still alive or Tx'edc("2", "3", "4", "14", "15", "21", "23") ~"Tx","6"~"Alive", # Refused transplant"7"~"Alive", # Transferred to another center "12"~"Alive", # Cand. condition improved, tx not needed"16"~"Alive", # Candidate Removed in Error#: other"10"~"Listed in Error","24"~"Lost contact",.default ="Other" ) )
Population
1. Pediatric Candidate (Age < 18) on waitlist for heart
There are 6244 pediatric waitlists between 2010-01-01 and 2020-12-31. There were 5972 unique candidates (since some were on multiple waitlists during this period).
3. No prior heart transplants
Show the code
thor3 = thor2 %>%filter( NUM_PREV_TX ==0, !(PREV_TX %in%"Y"),!(THORACIC_DGN %in%c(1700, 1100:1199)) # codes related to RE-TX/GF )
This removed 399 and leaves 5845.
4. Patient’s first heart waitlist
Keep only the data from the first time a patient was listed. This will exclude the waitlists corresponding to patients i) who had previous heart transplants, ii) are listed multiple times, and iii) were transferred to another center.
The removal code corresponding to listing mistakes (REM_CD = 10 “Candidate listed in error”) are removed from the data.
Show the code
thor5 = thor4 %>%filter( !(REM_CD %in%c("10")) )
This removed 0 and leaves 5669.
6. Remove waitlists with unknown outcomes
The removal codes corresponding to candidates lost (REM_CD = 24 “Unable to contact candidate”) and candidates still alive at data collection/censored (REM_CD = NA) are removed from the data.
DATA_WL = WL %>%filter(CHG_TY =="A") %>%# collect data when added to waitlistsemi_join(population, by ="WL_ID_CODE") %>%transmute( WL_ID_CODE, WL_DT = CHG_DT, # Date-time candidate first added to waitlistPRELIM_XMATCH_REQ =recode_YNU(PRELIM_XMATCH_REQ), )
EDA: Waitlist Data
Show the code
model_data = DATA_WL %>%full_join(population, by ="WL_ID_CODE")vars =colnames(model_data) %>%setdiff(colnames(population))walk(vars, ~plot_data(model_data, var = .x) %>% print)
Candidate Demographic Attributes
Get candidate demographic predictor variables
Show the code
candidate_demographic_data = thoracic %>%semi_join(population, by="WL_ID_CODE") %>%transmute( WL_ID_CODE, #: Age at time of registration or listingAGE =pmax(INIT_AGE, 0), #set -1 to age 0 #: Gender {M,F}GENDER =factor(GENDER),#: RaceRACE =recode_race(ETHCAT), #: Weight in kgWEIGHT_KG =coalesce(INIT_WGT_KG_CALC, WGT_KG_TCR), #: Height in cmHEIGHT_CM =coalesce(INIT_HGT_CM_CALC, HGT_CM_TCR),#: BMIBMI =coalesce(INIT_BMI_CALC, BMI_TCR),#: Body Surface Area (BSA) using Mosteller's formulaBSA =sqrt(HEIGHT_CM * WEIGHT_KG /3600), #: Blood TypeABO =recode_ABO(ABO),#: CitizenshipCITIZENSHIP =case_match(CITIZENSHIP, c(1,2) ~"US", # 2 ~ "Resident Alien",#3 ~ "Non-resident Alien",#c(4, 5) ~ "Non US",6~"Travel for Tx", .default ="Other" ) )
EDA: Candidate Demographics
Show the code
model_data = candidate_demographic_data %>%full_join(population, by ="WL_ID_CODE")vars =colnames(model_data) %>%setdiff(colnames(population))walk(vars, ~plot_data(model_data, var = .x) %>% print)
Candidate Risk Level Attributes
Code to convert functional status code into description
Show the code
# Functional Statuslibrary(readxl)FUNC_STAT_dict =read_excel(file.path(dir_data, "STAR File Documentation.xls"), sheet ="THORACIC_FORMATS_FLATFILE",skip =1) %>%filter(`SAS ANALYSIS FORMAT`=="FUNCSTAT") %>%transmute(code =`Data Field Value`,descr =`Data Field Formatted Value` ) %>%# convert to integer code; drop non-numeric valuesfilter(!is.na(code), code !="**OTHER**", code !="Null or Missing") %>%mutate(code =as.integer(code)) recode_FUNC_STAT <-function(FUNC_STAT){ levs = FUNC_STAT_dict %>%filter(code %in%c(996, 998, 4000:4900)) %>%pull(descr) ind =match(FUNC_STAT, FUNC_STAT_dict$code)factor(FUNC_STAT_dict$descr[ind], levels = levs)}
Get candidate risk predictor variables
Show the code
candidate_risk_data = thoracic %>%semi_join(population, by="WL_ID_CODE") %>%transmute( WL_ID_CODE,STATUS =recode_STATUS(INIT_STAT),#: Life Support in general LIFE_SUPPORT_CAND_REG =recode_YNU(LIFE_SUP_TCR),LIFE_SUPPORT_OTHER = OTH_LIFE_SUP_TCR, # Binary PGE_TCR, # LIFE_SUPPORT_PGE = PGE_TCR # Binary #: ECMO (Life Support)ECMO_CAND_REG = ECMO_TCR, # ifelse(ECMO_TCR == 1, "Yes", "No"),#: VAD VAD_CAND_REG =case_when( VAD_DEVICE_TY_TCR ==1~"No", VAD_DEVICE_TY_TCR >1~"Yes", .default ="No"# not many missing ),VAD_DEVICE_TY_TCR =case_match(VAD_DEVICE_TY_TCR,1~"NONE",2~"LVAD",3~"RVAD",4~"TAH",5~"LVAD+RVAD",6~"LVAD/RVAD/TAH Unspecified" ) %>%factor(),#: PATIENT ON LIFE SUPPORT - VENTILATORVENTILATOR_CAND_REG = VENTILATOR_TCR, #ifelse(VENTILATOR_TCR == 1, "Yes", "No"),#: Life Support (Other)# IABP_TCR# IABP_TRR# PGE_TCR# PGE_TRR#: Functional Status FUNC_STAT_CAND_REG =recode_FUNC_STAT(FUNC_STAT_TCR),# on other waitlists?WL_OTHER_ORG =case_when( MULTIORG =="Y"~"Y", WLHL =="Y"~"Y", WLIN =="Y"~"Y", WLKI =="Y"~"Y", WLKP =="Y"~"Y", WLLI =="Y"~"Y", WLLU =="Y"~"Y",.default ="N" ) %>%recode_YNU(),#: transplant history# NUM_PREV_TX, # number of prev HR TX (self-reported?)# PREV_TX_HR = PREV_TX,# {Y,N} previous HR TX (from data); NA if no Tx (data leakage warning: this is missing if not transplanted)# PREV_TX_ANY_ORGAN = PREV_TX_ANY,# DAYS_SINCE_PREV_TX_HR = PRVTXDIF, this is at time of TX#: OtherCEREB_VASC =recode_YNU(CEREB_VASC), DIAB =case_match(DIAB, 1~"None", 2~"Type I", 3~"Type II", c(4,5) ~"Type Unknown", .default ="Unknown") %>%factor(),DIALYSIS_CAND =case_match(DIAL_TY_TCR, 1~"No", c(2,3)~"Yes", .default="Unknown") %>%factor(),HEMODYNAMICS_CO = HEMO_CO_TCR, IMPL_DEFIBRIL =recode_YNU(IMPL_DEFIBRIL),# INHALED_NO,INOTROP_VASO_CO_REG =recode_YNU(INOTROP_VASO_CO_TCR), INOTROPES_TCR, MOST_RCNT_CREAT, # CREATININE_REG = eGFR =0.412*coalesce(INIT_HGT_CM_CALC, HGT_CM_TCR) /pmax(MOST_RCNT_CREAT, .001), TOT_SERUM_ALBUM, )
EDA: Candidate Risk
Show the code
model_data = candidate_risk_data %>%full_join(population, by ="WL_ID_CODE")vars =colnames(model_data) %>%setdiff(colnames(population))walk(vars, ~plot_data(model_data, var = .x) %>% print)
Candidate Diagnosis
The candidate diagnosis is found in the thoracic data.
THORACIC_DGN: Waitlist Candidate Diagnosis
TCR_DGN: Candidate Diagnosis at Listing
DIAG: Diagnosis from at Transplant and if missing, from TCR
TCR_DGN_OSTXT and DIAG_OSTXT give text diagnosis
Recode candidate diagnosis into: Myocarditis, Congenital Heart Disease Without Surgery, Congenital Heart Disease With Surgery, Dilated Cardiomyopathy, Restrictive Cardiomyopathy, Hypertrophic Cardiomyopathy, Valvular Heart Disease, or Other.
model_data = candidate_diagnosis %>%full_join(population, by ="WL_ID_CODE")vars =colnames(model_data) %>%setdiff(colnames(population))walk(vars, ~plot_data(model_data, var = .x) %>% print)
Outcomes using the individual diagnosis codes:
Show the code
candidate_diagnosis %>%left_join(population, by ="WL_ID_CODE") %>%count(CAND_DIAG_CODE, CAND_DIAG, outcome) %>%spread(outcome, n, fill =0) %>%mutate(p = (`0`+ .9*5) / (`0`+`1`+5)) %>%# smooth toward 0.90arrange(p)%>%print_table()
CAND_DIAG_CODE
CAND_DIAG
0
1
p
1202: Valvular Heart Disease
Valvular Heart Disease
9
4
0.750
1054: Restrictive Cardiomyopathy
Restrictive Cardiomyopathy
7
2
0.821
1206: Congenital Heart Disease Without Surgery
Congenital Heart Disease Without Surgery
280
55
0.837
1205: Congenital Heart Disease Without Surgery
Congenital Heart Disease Without Surgery
35
6
0.859
1207: Congenital Heart Disease With Surgery
Congenital Heart Disease With Surgery
2124
326
0.867
999: Other
Other
57
8
0.879
1203: Other
Other
7
1
0.885
1002: Dilated Cardiomyopathy
Dilated Cardiomyopathy
8
1
0.893
1004: Myocarditis
Myocarditis
180
19
0.904
999: Dilated Cardiomyopathy
Dilated Cardiomyopathy
32
3
0.912
1005: Dilated Cardiomyopathy
Dilated Cardiomyopathy
1
0
0.917
1010: Other
Other
1
0
0.917
1204: Other
Other
1
0
0.917
1099: Restrictive Cardiomyopathy
Restrictive Cardiomyopathy
48
4
0.921
1008: Other
Other
2
0
0.929
1052: Restrictive Cardiomyopathy
Restrictive Cardiomyopathy
2
0
0.929
1001: Dilated Cardiomyopathy
Dilated Cardiomyopathy
42
3
0.930
1201: Hypertrophic Cardiomyopathy
Hypertrophic Cardiomyopathy
174
12
0.935
1049: Dilated Cardiomyopathy
Dilated Cardiomyopathy
280
17
0.942
1050: Restrictive Cardiomyopathy
Restrictive Cardiomyopathy
233
14
0.942
1209: Dilated Cardiomyopathy
Dilated Cardiomyopathy
5
0
0.950
1000: Dilated Cardiomyopathy
Dilated Cardiomyopathy
1178
58
0.953
1006: Myocarditis
Myocarditis
26
1
0.953
1200: Other
Other
10
0
0.967
1003: Dilated Cardiomyopathy
Dilated Cardiomyopathy
191
6
0.968
1208: Other
Other
19
0
0.979
1007: Dilated Cardiomyopathy
Dilated Cardiomyopathy
31
0
0.986
Center Level Predictors
Listing Center Volume
Calculate the number of pediatric heart transplants each year by listing center.
Show the code
#: Number of pediatric hr transplants in previous yearTX_YR = thoracic %>%filter(coalesce(AGE, INIT_AGE) <18, # Age of transplant recipient ORGAN =="HR" ) %>%count(LISTING_CTR_CODE, YR = TX_YEAR) %>%complete(LISTING_CTR_CODE, YR, fill =list(n=0)) %>%group_by(LISTING_CTR_CODE) %>%arrange(YR) %>%mutate(LISTING_CTR_PEDHRTX_PREV_YR = dplyr::lag(n, default=0) ) %>%ungroup() %>%select(-n)
Listing Center Practice
median_refusals: median number of offer refusals per candidate at listing center. Averaged over 2010-2019.
p_refusals: proportion of offers at listing center that are refused. Averaged over 2010-2019.
LISTING_CTR_PEDHRTX_PREV_YR: number of pediatric heart transplants at listing center in the previous year.