Search code examples
rsimplify

How to simplify converting several column attributes and recoding multiple columns?


I have several lines of codes that I'm figuring out how to simplify. My attempts to do so have resulted in errors. Below is a small section of the lines of code:

SS_data$Cope1 <- as.numeric(SS_data$Cope1)
SS_data$Cope2 <- as.numeric(SS_data$Cope2)
SS_data$Cope3 <- as.numeric(SS_data$Cope3)
SS_data$Cope4 <- as.numeric(SS_data$Cope4)
SS_data$Cope5 <- as.numeric(SS_data$Cope5)
SS_data$Cope6 <- as.numeric(SS_data$Cope6)
SS_data$Cope7 <- as.numeric(SS_data$Cope7)
SS_data$Cope8 <- as.numeric(SS_data$Cope8)
SS_data$Cope9 <- as.numeric(SS_data$Cope9)
SS_data$Cope10 <- as.numeric(SS_data$Cope10)
SS_data$Cope11 <- as.numeric(SS_data$Cope11)
SS_data$Cope12 <- as.numeric(SS_data$Cope12)
SS_data$Cope13 <- as.numeric(SS_data$Cope13)
SS_data$Cope14 <- as.numeric(SS_data$Cope14)
SS_data$Cope15 <- as.numeric(SS_data$Cope15)
SS_data$Cope16 <- as.numeric(SS_data$Cope16)
SS_data$Cope17 <- as.numeric(SS_data$Cope17)
SS_data$Cope18 <- as.numeric(SS_data$Cope18)
SS_data$Cope19 <- as.numeric(SS_data$Cope19)
SS_data$Cope20 <- as.numeric(SS_data$Cope20)

I'm also trying to simplify the codes below. I end up recoding for each variable and I'm wondering if there is a way to simplify this as well.

WHOQOL16[WHOQOL16 == "Very dissatisfied"] <- 1
WHOQOL16[WHOQOL16 == "Dissatisfied"] <- 2
WHOQOL16[WHOQOL16 == "Neither satisfied nor dissatisfied"] <- 3
WHOQOL16[WHOQOL16 == "Satisfied"] <- 4
WHOQOL16[WHOQOL16 == "Very satisfied"] <- 5
              
WHOQOL17[WHOQOL17 == "Very dissatisfied"] <- 1
WHOQOL17[WHOQOL17 == "Dissatisfied"] <- 2
WHOQOL17[WHOQOL17 == "Neither satisfied nor dissatisfied"] <- 3
WHOQOL17[WHOQOL17 == "Satisfied"] <- 4
WHOQOL17[WHOQOL17 == "Very satisfied"] <- 5
              
WHOQOL18[WHOQOL18 == "Very dissatisfied"] <- 1
WHOQOL18[WHOQOL18 == "Dissatisfied"] <- 2
WHOQOL18[WHOQOL18 == "Neither satisfied nor dissatisfied"] <- 3
WHOQOL18[WHOQOL18 == "Satisfied"] <- 4
WHOQOL18[WHOQOL18 == "Very satisfied"] <- 5
              
WHOQOL19[WHOQOL19 == "Very dissatisfied"] <- 1
WHOQOL19[WHOQOL19 == "Dissatisfied"] <- 2
WHOQOL19[WHOQOL19 == "Neither satisfied nor dissatisfied"] <- 3
WHOQOL19[WHOQOL19 == "Satisfied"] <- 4
WHOQOL19[WHOQOL19 == "Very satisfied"] <- 5

Solution

  • In dplyr you can use across function to apply same function to multiple columns.

    We change columns that start with "Cope" to numeric and recode the columns which start with "WHOQOL".

    library(dplyr)
    
    SS_data_new <- SS_data %>% 
                        mutate(across(starts_with('Cope'), as.numeric), 
                               across(starts_with('WHOQOL'), 
                               ~recode(., "Very dissatisfied" = 1, 
                                           "Dissatisfied" = 2, 
                                           "Neither satisfied nor dissatisfied" = 3, 
                                           "Satisfied" = 4, 
                                           "Very satisfied" = 5)))
    SS_data_new
    #  Cope1 Cope2 WHOQOL
    #1     1     4      1
    #2     2     5      1
    #3     3     6      4
    str(SS_data_new)
    #data.frame':   3 obs. of  3 variables:
    # $ Cope1 : num  1 2 3
    # $ Cope2 : num  4 5 6
    # $ WHOQOL: num  1 1 4
    

    data

    SS_data <- data.frame(Cope1 = c('1', '2', '3'), Cope2 = c('4', '5', '6'), 
               WHOQOL = c("Very dissatisfied", "Very dissatisfied", "Satisfied"))