Search code examples
rdplyracross

Create dummy variable based on if a number in a range appears in select columns


I am trying to query multiple columns in my dataset to see if a number between 135 and 225 is present.

I've currently been doing it a very manual way, but this isn't sustainable since there are much more than only 2 columns I'm trying to query. How can I more efficiently code this so I can list 6+ column names without it being burdensome and long?

sample_dates_test$day180_completion[(sample_dates_test$blood_day180_sincedx <=225 & sample_dates_test$blood_day180_sincedx >=135) | (sample_dates_test$prevaxblood.dayneg7_sincedx <= 225 & sample_dates_test$prevaxblood.dayneg7_sincedx >=135]) <- "Complete"


structure(list(record_id = 2:4, first_dose = structure(c(NA, 
18718, 18660), class = "Date"), second_dose = structure(c(NA, 
18745, 18688), class = "Date"), day0 = structure(c(18449, 18451, 
18462), class = "Date"), blood_it_plus2 = structure(c(18459, 
18464, NA), class = "Date"), blood_day180 = structure(c(18647, 
NA, NA), class = "Date"), blood_day90 = structure(c(NA, 18551, 
NA), class = "Date"), blood_it_plus7 = structure(c(NA, NA, 18470
), class = "Date"), blood_day30 = structure(c(NA_real_, NA_real_, 
NA_real_), class = "Date"), blood_day365 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), postvaxblood.day90 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), blood_day7 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), blood_day0 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), postvaxblood.day30 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), postvaxblood.day7 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), prevaxblood.dayneg7 = structure(c(NA_real_, 
NA_real_, NA_real_), class = "Date"), blood_it_plus2_sincedx = c(10, 
13, NA), blood_it_plus7_sincedx = c(NA, NA, 8), blood_day0_sincedx = c(NA_real_, 
NA_real_, NA_real_), blood_day7_sincedx = c(NA_real_, NA_real_, 
NA_real_), blood_day30_sincedx = c(NA_real_, NA_real_, NA_real_
), blood_day90_sincedx = c(NA, 100, NA), blood_day180_sincedx = c(198, 
NA, NA), blood_day365_sincedx = c(NA_real_, NA_real_, NA_real_
), prevaxblood.dayneg7_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day7_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day30_sincedx = c(NA_real_, NA_real_, NA_real_
), postvaxblood.day90_sincedx = c(NA_real_, NA_real_, NA_real_
), first_dose_sincedx = c(NA, 267, 198), second_dose_sincedx = c(NA, 
294, 226), today = c(378, 376, 365)), reshapeWide = list(v.names = "enrollment_obtdatetime", 
    timevar = "redcap_event_name", idvar = "record_id", times = c("enrollment_arm_1", 
    "it__2_days_arm_1", "day_180__30_days_arm_1", "day_90__14_days_arm_1", 
    "it__7_days_arm_1", "day_30__7_days_arm_1", "day_365__60_days_arm_1", 
    "post_vaccine_day_9_arm_1", "day_7__2_days_arm_1", "day_0_3_days_arm_1", 
    "post_vaccine_day_3_arm_1", "post_vaccine_day_7_arm_1", "vaccine_day_7_to_1_arm_1"
    ), varying = structure(c("enrollment_obtdatetime.enrollment_arm_1", 
    "enrollment_obtdatetime.it__2_days_arm_1", "enrollment_obtdatetime.day_180__30_days_arm_1", 
    "enrollment_obtdatetime.day_90__14_days_arm_1", "enrollment_obtdatetime.it__7_days_arm_1", 
    "enrollment_obtdatetime.day_30__7_days_arm_1", "enrollment_obtdatetime.day_365__60_days_arm_1", 
    "enrollment_obtdatetime.post_vaccine_day_9_arm_1", "enrollment_obtdatetime.day_7__2_days_arm_1", 
    "enrollment_obtdatetime.day_0_3_days_arm_1", "enrollment_obtdatetime.post_vaccine_day_3_arm_1", 
    "enrollment_obtdatetime.post_vaccine_day_7_arm_1", "enrollment_obtdatetime.vaccine_day_7_to_1_arm_1"
    ), .Dim = c(1L, 13L))), row.names = c(1L, 4L, 7L), class = "data.frame")```

Solution

  • Let's say your data is named sample. Do this

    library(dplyr, warn.conflicts = FALSE)
    
    sample <- sample %>%
      rowwise() %>%
      mutate(dummy = ifelse(any(between(c_across(17:28), 135, 225)), 'complete', 'incomplte')) %>% ungroup()
    
    #> # A tibble: 3 x 32
    #>   record_id first_dose second_dose day0       blood_it_plus2 blood_day180
    #>       <int> <date>     <date>      <date>     <date>         <date>      
    #> 1         2 NA         NA          2020-07-06 2020-07-16     2021-01-20  
    #> 2         3 2021-04-01 2021-04-28  2020-07-08 2020-07-21     NA          
    #> 3         4 2021-02-02 2021-03-02  2020-07-19 NA             NA          
    #> # ... with 26 more variables: blood_day90 <date>, blood_it_plus7 <date>,
    #> #   blood_day30 <date>, blood_day365 <date>, postvaxblood.day90 <date>,
    #> #   blood_day7 <date>, blood_day0 <date>, postvaxblood.day30 <date>,
    #> #   postvaxblood.day7 <date>, prevaxblood.dayneg7 <date>,
    #> #   blood_it_plus2_sincedx <dbl>, blood_it_plus7_sincedx <dbl>,
    #> #   blood_day0_sincedx <dbl>, blood_day7_sincedx <dbl>,
    #> #   blood_day30_sincedx <dbl>, blood_day90_sincedx <dbl>,
    #> #   blood_day180_sincedx <dbl>, blood_day365_sincedx <dbl>,
    #> #   prevaxblood.dayneg7_sincedx <dbl>, postvaxblood.day7_sincedx <dbl>,
    #> #   postvaxblood.day30_sincedx <dbl>, postvaxblood.day90_sincedx <dbl>,
    #> #   first_dose_sincedx <dbl>, second_dose_sincedx <dbl>, today <dbl>,
    #> #   dummy <chr>
    

    Created on 2021-07-21 by the reprex package (v2.0.0)