I have a group of columns for each time and I want to convert it to a lot of boolean columns (one by category) with mutate()
and across()
like that :
data <- data.frame(category_t1 = c("A","B","C","C","A","B"),
category_t2 = c("A","C","B","B","B",NA),
category_t3 = c("C","C",NA,"B",NA,"A"))
data %>% mutate(across(starts_with("category"),
~case_when(.x == "A" ~ TRUE, !is.na(.x) ~ FALSE),
.names = "{str_replace(.col, 'category', 'A')}"),
across(starts_with("category"),
~case_when(.x == "B" ~ TRUE, !is.na(.x) ~ FALSE),
.names = "{str_replace(.col, 'category', 'B')}"),
across(starts_with("category"),
~case_when(.x == "C" ~ TRUE, !is.na(.x) ~ FALSE),
.names = "{str_replace(.col, 'category', 'C')}"))
Which makes :
category_t1 category_t2 category_t3 A_t1 A_t2 A_t3 B_t1 B_t2 B_t3 C_t1 C_t2
1 A A C TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
2 B C C FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
3 C B <NA> FALSE FALSE NA FALSE TRUE NA TRUE FALSE
4 C B B FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE
5 A B <NA> TRUE FALSE NA FALSE TRUE NA FALSE FALSE
6 B <NA> A FALSE NA TRUE TRUE NA FALSE FALSE NA
It works but I would like to know if there is a better idea because here I am doing the same code 3 times instead of one big code (and imagine if I had 10 times to repeat it...). I though I could do it with map()
but I didn't manage to make it work.
I think there is a problem because of .names
argument in across()
that cannot connect with the string I use in case_when()
.
I think maybe there is something to do in the ...
argument, like :
data %>% mutate(across(starts_with("category"),
~case_when(.x == mod ~ TRUE, !is.na(.x) ~ FALSE),
mod = levels(as.factor(data$category_t1)),
.names = "{str_replace(.col, 'category', mod)}"))
But of course that doesn't work here. Do you know how to do that ?
Thanks a lot.
We may use table
in across
library(dplyr)
library(stringr)
library(tidyr)
data %>%
mutate(across(everything(), ~ as.data.frame.matrix(table(row_number(), .x) *
NA^(is.na(.x)) > 0),
.names = "{str_remove(.col, 'category_')}")) %>%
unpack(where(is.data.frame), names_sep = ".")
-output
# A tibble: 6 × 12
category_t1 category_t2 category_t3 t1.A t1.B t1.C t2.A t2.B t2.C t3.A t3.B t3.C
<chr> <chr> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 A A C TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
2 B C C FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
3 C B <NA> FALSE FALSE TRUE FALSE TRUE FALSE NA NA NA
4 C B B FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
5 A B <NA> TRUE FALSE FALSE FALSE TRUE FALSE NA NA NA
6 B <NA> A FALSE TRUE FALSE NA NA NA TRUE FALSE FALSE
Or use model.matrix
from base R
data1 <- replace(data, is.na(data), "NA")
lvls <- lapply(data1, \(x) levels(factor(x, levels = c("NA", "A", "B", "C"))))
m1 <- model.matrix(~ 0 + ., data = data1, xlev = lvls)
out <- cbind(data, m1[, -grep("NA", colnames(m1))] > 0)
-output
out
category_t1 category_t2 category_t3 category_t1A category_t1B category_t1C category_t2A category_t2B category_t2C category_t3A category_t3B category_t3C
1 A A C TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
2 B C C FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
3 C B <NA> FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
4 C B B FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
5 A B <NA> TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
6 B <NA> A FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
> colnames(out)
[1] "category_t1" "category_t2" "category_t3"
[4] "category_t1A" "category_t1B" "category_t1C"
[7] "category_t2A" "category_t2B" "category_t2C"
[10] "category_t3A"
[11] "category_t3B" "category_t3C"
Or another option with table
cbind(data, do.call(cbind.data.frame,
lapply(data, \(x) (table(seq_along(x), x)* NA^is.na(x)) > 0)))
-output
category_t1 category_t2 category_t3 category_t1.A category_t1.B category_t1.C category_t2.A category_t2.B category_t2.C category_t3.A category_t3.B
1 A A C TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
2 B C C FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE
3 C B <NA> FALSE FALSE TRUE FALSE TRUE FALSE NA NA
4 C B B FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE
5 A B <NA> TRUE FALSE FALSE FALSE TRUE FALSE NA NA
6 B <NA> A FALSE TRUE FALSE NA NA NA TRUE FALSE
category_t3.C
1 TRUE
2 TRUE
3 NA
4 FALSE
5 NA
6 FALSE