Create binary categorical variables based on contents of a column delimited list

I have a variable in my dataframe called "Cardiac Comorbidity Types" with either NAs or a column delimited list of various cardiac comorbidity types. How can I make a column for each possible comorbidity, and then fill the observations in with 1/0 where 1=indicates the presense of a comorbidity and 0=no comorbidity.

dput(head(et1$`Cardiac Comorbidity Types`,20))
c("MI,", NA, "CAD, Previous CABG or PTCA, MI, Pacemaker,", "Arrhythmia,", 
"CAD, Previous CABG or PTCA, MI, Arrhythmia,", NA, "CAD, Previous CABG or PTCA, MI,", 
"CAD, Previous CABG or PTCA, CHF, Pacemaker,", "CAD, Previous CABG or PTCA,", 
"CAD, Previous CABG or PTCA, Arrhythmia,", "CAD, Previous CABG or PTCA,", 
"CAD, Previous CABG or PTCA, MI,", "CAD, Previous CABG or PTCA, CHF, Arrhythmia,", 
"CAD, Previous CABG or PTCA, Pacemaker,", "CAD, Previous CABG or PTCA, MI, CHF,", 
"CAD, Previous CABG or PTCA, MI, CHF,", NA, "CAD, Previous CABG or PTCA, PVD, Pacemaker,", 
"PVD,", "CAD, Previous CABG or PTCA,")

Additionally, how could I do this if the data was semicolon delimited?

Solution

We can use a combination of unnest and pivot_wider from tidyr.

library(dplyr)
library(tidyr)
library(stringr)
data <- data %>% mutate(ID = 1:nrow(data))

data %>% 
  mutate(Cardiac.Comorbidity.Types = str_split(Cardiac.Comorbidity.Types, ", ?")) %>%
  unnest(Cardiac.Comorbidity.Types) %>%
  filter(Cardiac.Comorbidity.Types != "") %>%
  pivot_wider(id_cols = "ID", names_from = Cardiac.Comorbidity.Types, values_from = Cardiac.Comorbidity.Types) %>%
  right_join(data, by="ID") %>%
  mutate_at(vars(-ID,-Cardiac.Comorbidity.Types), ~ as.integer(!is.na(.x))) %>% select(-ID)
# A tibble: 20 x 8
#      MI   CAD `Previous CABG or PTCA` Pacemaker Arrhythmia   CHF   PVD Cardiac.Comorbidity.Types                   
#   <int> <int>                   <int>     <int>      <int> <int> <int> <fct>                                       
# 1     1     0                       0         0          0     0     0 MI,                                         
# 2     0     0                       0         0          0     0     0 NA                                          
# 3     1     1                       1         1          0     0     0 CAD, Previous CABG or PTCA, MI, Pacemaker,  
# 4     0     0                       0         0          1     0     0 Arrhythmia,                                 
# 5     1     1                       1         0          1     0     0 CAD, Previous CABG or PTCA, MI, Arrhythmia, 
...

Data

data <- c("MI,", NA, "CAD, Previous CABG or PTCA, MI, Pacemaker,", "Arrhythmia,", 
"CAD, Previous CABG or PTCA, MI, Arrhythmia,", NA, "CAD, Previous CABG or PTCA, MI,", 
"CAD, Previous CABG or PTCA, CHF, Pacemaker,", "CAD, Previous CABG or PTCA,", 
"CAD, Previous CABG or PTCA, Arrhythmia,", "CAD, Previous CABG or PTCA,", 
"CAD, Previous CABG or PTCA, MI,", "CAD, Previous CABG or PTCA, CHF, Arrhythmia,", 
"CAD, Previous CABG or PTCA, Pacemaker,", "CAD, Previous CABG or PTCA, MI, CHF,", 
"CAD, Previous CABG or PTCA, MI, CHF,", NA, "CAD, Previous CABG or PTCA, PVD, Pacemaker,", 
"PVD,", "CAD, Previous CABG or PTCA,")
data <- data.frame(Cardiac.Comorbidity.Types = data)