R in Databricks - mutate gives error when using case_when

Using R scripting in Databricks, I want to change the "numeric" to a new "~ numeric" and create a new variable called dc_17$strength_conc_per_uom.

Can anyone give an insight as to why this mutate code only gives errors when using case_when.

dc_17$strength_extract <- as.character(dc_17$strength_extract)
dc_17$strength_conc_per_uom <- dc_17 %>% mutate(strength_extract = case_when(
                          str_detect(strength_extract, "1") ~ 1,
                          str_detect(strength_extract, "8") ~ 8, 
                          str_detect(strength_extract, "20") ~ 20,
                          str_detect(strength_extract, "30") ~ 30,
                          str_detect(strength_extract, "55") ~ 1,
                          str_detect(strength_extract, "105") ~ 2,
                          str_detect(strength_extract, "155") ~ 3,
                          str_detect(strength_extract, "255") ~ 5,
                          str_detect(strength_extract, "505") ~ 10,
                          str_detect(strength_extract, "1005") ~ 20,
                          str_detect(strength_extract, "3030") ~ 1,
                          str_detect(strength_extract, "5050") ~ 1,
                          str_detect(strength_extract, "6060") ~ 1,
                     TRUE ~ NA_character_))

I continue to get an error for this

Error in mutate(., strength_extract = case_when(str_detect(strength_extract,  : 
  Problem while computing `strength_extract = case_when(...)`.
Caused by error in `case_when()`:
Error in `mutate()`:
Error in `mutate()`:
! Problem while computing `strength_extract = case_when(...)`.
Caused by error in `case_when()`:

also tried the following to resolve the issue, however they also gives errors.

dc_17$strength_conc_per_uom <- NA_integer_
dc_17$strength_extract <- as.character(dc_17$strength_extract)

dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "1")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "8")] <- 8
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "20")] <- 20
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "30")] <- 30
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "55")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "105")] <- 2
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "155")] <- 3
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "255")] <- 5
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "505")] <- 10
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "1005")] <- 20
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "3030")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "5050")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "6060")] <- 1

This approach returns only NAs

and

dc_17$strength_extract <- as.character(dc_17$strength_extract)

dc_17$strength_conc_per_uom <- ifelse(dc_17$strength_extract == "1", 1,
ifelse(dc_17$strength_extract == "8", 8,
ifelse(dc_17$strength_extract == "20", 20,
ifelse(dc_17$strength_extract == "30", 30,
ifelse(dc_17$strength_extract == "55", 1,
ifelse(dc_17$strength_extract == "105", 2,
ifelse(dc_17$strength_extract == "155", 3,
ifelse(dc_17$strength_extract == "255", 5,
ifelse(dc_17$strength_extract == "505", 10,
ifelse(dc_17$strength_extract == "1005", 20,
ifelse(dc_17$strength_extract == "3030", 1,
ifelse(dc_17$strength_extract == "5050", 1,
ifelse(dc_17$strength_extract == "6060", 1, NA)))))))))))))

this approach drops values 2, 3, 5 and 10 in the output

Solution

You can easily avoid such tedious ifelse excesses.

Consider this data frame.

df
#    strength          X
# 1       255 0.20514564
# 2        20 0.50183656
# 3         1 0.15259178
# 4       155 0.15909367
# 5      1005 0.68420944
# 6       505 0.80878910
# 7        55 0.57800968
# 8      6060 0.88166418
# 9       105 0.08513027
# 10        8 0.36283751
# 11     5050 0.91330926
# 12     3030 0.65183322
# 13       30 0.02780792

Create a substitution vector,

u <- c(`1`=1, `8`=8, `20`=20, `30`=30, `55`=1, `105`=2, `155`=3, `255`=5, `505`=10, 
       `1005`=20, `3030`=1, `5050`=1, `6060`=1)

and match it with the desired column like so:

df$strength <- u[match(df$strength, names(u))]

Gives

df
#    strength          X
# 1         5 0.20514564
# 2        20 0.50183656
# 3         1 0.15259178
# 4         3 0.15909367
# 5        20 0.68420944
# 6        10 0.80878910
# 7         1 0.57800968
# 8         1 0.88166418
# 9         2 0.08513027
# 10        8 0.36283751
# 11        1 0.91330926
# 12        1 0.65183322
# 13       30 0.02780792

Data:

df <- structure(list(strength = c("3030", "8", "1", "255", "105", "505", 
"30", "1005", "6060", "5050", "20", "155", "55"), X = c(0.205145640065894, 
0.501836559036747, 0.15259177563712, 0.159093674505129, 0.684209441067651, 
0.808789104921743, 0.578009682707489, 0.881664179963991, 0.0851302673108876, 
0.362837513675913, 0.913309262134135, 0.651833220385015, 0.0278079155832529
)), class = "data.frame", row.names = c(NA, -13L))