Using R scripting in Databricks, I want to change the "numeric" to a new "~ numeric" and create a new variable called dc_17$strength_conc_per_uom.
Can anyone give an insight as to why this mutate code only gives errors when using case_when.
dc_17$strength_extract <- as.character(dc_17$strength_extract)
dc_17$strength_conc_per_uom <- dc_17 %>% mutate(strength_extract = case_when(
str_detect(strength_extract, "1") ~ 1,
str_detect(strength_extract, "8") ~ 8,
str_detect(strength_extract, "20") ~ 20,
str_detect(strength_extract, "30") ~ 30,
str_detect(strength_extract, "55") ~ 1,
str_detect(strength_extract, "105") ~ 2,
str_detect(strength_extract, "155") ~ 3,
str_detect(strength_extract, "255") ~ 5,
str_detect(strength_extract, "505") ~ 10,
str_detect(strength_extract, "1005") ~ 20,
str_detect(strength_extract, "3030") ~ 1,
str_detect(strength_extract, "5050") ~ 1,
str_detect(strength_extract, "6060") ~ 1,
TRUE ~ NA_character_))
I continue to get an error for this
Error in mutate(., strength_extract = case_when(str_detect(strength_extract, :
Problem while computing `strength_extract = case_when(...)`.
Caused by error in `case_when()`:
Error in `mutate()`:
Error in `mutate()`:
! Problem while computing `strength_extract = case_when(...)`.
Caused by error in `case_when()`:
also tried the following to resolve the issue, however they also gives errors.
dc_17$strength_conc_per_uom <- NA_integer_
dc_17$strength_extract <- as.character(dc_17$strength_extract)
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "1")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "8")] <- 8
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "20")] <- 20
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "30")] <- 30
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "55")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "105")] <- 2
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "155")] <- 3
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "255")] <- 5
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "505")] <- 10
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "1005")] <- 20
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "3030")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "5050")] <- 1
dc_17$strength_conc_per_uom[str_detect(dc_17$strength_extract, "6060")] <- 1
This approach returns only NAs
and
dc_17$strength_extract <- as.character(dc_17$strength_extract)
dc_17$strength_conc_per_uom <- ifelse(dc_17$strength_extract == "1", 1,
ifelse(dc_17$strength_extract == "8", 8,
ifelse(dc_17$strength_extract == "20", 20,
ifelse(dc_17$strength_extract == "30", 30,
ifelse(dc_17$strength_extract == "55", 1,
ifelse(dc_17$strength_extract == "105", 2,
ifelse(dc_17$strength_extract == "155", 3,
ifelse(dc_17$strength_extract == "255", 5,
ifelse(dc_17$strength_extract == "505", 10,
ifelse(dc_17$strength_extract == "1005", 20,
ifelse(dc_17$strength_extract == "3030", 1,
ifelse(dc_17$strength_extract == "5050", 1,
ifelse(dc_17$strength_extract == "6060", 1, NA)))))))))))))
this approach drops values 2, 3, 5 and 10 in the output
You can easily avoid such tedious ifelse
excesses.
Consider this data frame.
df
# strength X
# 1 255 0.20514564
# 2 20 0.50183656
# 3 1 0.15259178
# 4 155 0.15909367
# 5 1005 0.68420944
# 6 505 0.80878910
# 7 55 0.57800968
# 8 6060 0.88166418
# 9 105 0.08513027
# 10 8 0.36283751
# 11 5050 0.91330926
# 12 3030 0.65183322
# 13 30 0.02780792
Create a substitu
tion vector,
u <- c(`1`=1, `8`=8, `20`=20, `30`=30, `55`=1, `105`=2, `155`=3, `255`=5, `505`=10,
`1005`=20, `3030`=1, `5050`=1, `6060`=1)
and match
it with the desired column like so:
df$strength <- u[match(df$strength, names(u))]
df
# strength X
# 1 5 0.20514564
# 2 20 0.50183656
# 3 1 0.15259178
# 4 3 0.15909367
# 5 20 0.68420944
# 6 10 0.80878910
# 7 1 0.57800968
# 8 1 0.88166418
# 9 2 0.08513027
# 10 8 0.36283751
# 11 1 0.91330926
# 12 1 0.65183322
# 13 30 0.02780792
Data:
df <- structure(list(strength = c("3030", "8", "1", "255", "105", "505",
"30", "1005", "6060", "5050", "20", "155", "55"), X = c(0.205145640065894,
0.501836559036747, 0.15259177563712, 0.159093674505129, 0.684209441067651,
0.808789104921743, 0.578009682707489, 0.881664179963991, 0.0851302673108876,
0.362837513675913, 0.913309262134135, 0.651833220385015, 0.0278079155832529
)), class = "data.frame", row.names = c(NA, -13L))