I have a variable in my dataframe called "Cardiac Comorbidity Types" with either NAs or a column delimited list of various cardiac comorbidity types. How can I make a column for each possible comorbidity, and then fill the observations in with 1/0 where 1=indicates the presense of a comorbidity and 0=no comorbidity.
dput(head(et1$`Cardiac Comorbidity Types`,20))
c("MI,", NA, "CAD, Previous CABG or PTCA, MI, Pacemaker,", "Arrhythmia,",
"CAD, Previous CABG or PTCA, MI, Arrhythmia,", NA, "CAD, Previous CABG or PTCA, MI,",
"CAD, Previous CABG or PTCA, CHF, Pacemaker,", "CAD, Previous CABG or PTCA,",
"CAD, Previous CABG or PTCA, Arrhythmia,", "CAD, Previous CABG or PTCA,",
"CAD, Previous CABG or PTCA, MI,", "CAD, Previous CABG or PTCA, CHF, Arrhythmia,",
"CAD, Previous CABG or PTCA, Pacemaker,", "CAD, Previous CABG or PTCA, MI, CHF,",
"CAD, Previous CABG or PTCA, MI, CHF,", NA, "CAD, Previous CABG or PTCA, PVD, Pacemaker,",
"PVD,", "CAD, Previous CABG or PTCA,")
Additionally, how could I do this if the data was semicolon delimited?
We can use a combination of unnest
and pivot_wider
from tidyr
.
library(dplyr)
library(tidyr)
library(stringr)
data <- data %>% mutate(ID = 1:nrow(data))
data %>%
mutate(Cardiac.Comorbidity.Types = str_split(Cardiac.Comorbidity.Types, ", ?")) %>%
unnest(Cardiac.Comorbidity.Types) %>%
filter(Cardiac.Comorbidity.Types != "") %>%
pivot_wider(id_cols = "ID", names_from = Cardiac.Comorbidity.Types, values_from = Cardiac.Comorbidity.Types) %>%
right_join(data, by="ID") %>%
mutate_at(vars(-ID,-Cardiac.Comorbidity.Types), ~ as.integer(!is.na(.x))) %>% select(-ID)
# A tibble: 20 x 8
# MI CAD `Previous CABG or PTCA` Pacemaker Arrhythmia CHF PVD Cardiac.Comorbidity.Types
# <int> <int> <int> <int> <int> <int> <int> <fct>
# 1 1 0 0 0 0 0 0 MI,
# 2 0 0 0 0 0 0 0 NA
# 3 1 1 1 1 0 0 0 CAD, Previous CABG or PTCA, MI, Pacemaker,
# 4 0 0 0 0 1 0 0 Arrhythmia,
# 5 1 1 1 0 1 0 0 CAD, Previous CABG or PTCA, MI, Arrhythmia,
...
Data
data <- c("MI,", NA, "CAD, Previous CABG or PTCA, MI, Pacemaker,", "Arrhythmia,",
"CAD, Previous CABG or PTCA, MI, Arrhythmia,", NA, "CAD, Previous CABG or PTCA, MI,",
"CAD, Previous CABG or PTCA, CHF, Pacemaker,", "CAD, Previous CABG or PTCA,",
"CAD, Previous CABG or PTCA, Arrhythmia,", "CAD, Previous CABG or PTCA,",
"CAD, Previous CABG or PTCA, MI,", "CAD, Previous CABG or PTCA, CHF, Arrhythmia,",
"CAD, Previous CABG or PTCA, Pacemaker,", "CAD, Previous CABG or PTCA, MI, CHF,",
"CAD, Previous CABG or PTCA, MI, CHF,", NA, "CAD, Previous CABG or PTCA, PVD, Pacemaker,",
"PVD,", "CAD, Previous CABG or PTCA,")
data <- data.frame(Cardiac.Comorbidity.Types = data)