I need to pivot longer grouped by column string prefix. the toy example below has two groups "A" and "B" but I need a general tidyverse solution for any number of groups by prefix.
#toy df
set.seed(1)
df <- data.table(
date = rep(seq(as.Date("2020-01-01"),as.Date("2020-01-05"),by="day"),each=6),
k = rep(c("A.mean","A.median","A.min","B.mean","B.median","B.min"),5),
v = runif(30,0,50)
) %>%
pivot_wider(names_from = k, values_from = v)
df %>% head
date A.mean A.median A.min B.mean B.median B.min
<date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2020-01-01 13.3 18.6 28.6 45.4 10.1 44.9
2 2020-01-02 47.2 33.0 31.5 3.09 10.3 8.83
3 2020-01-03 34.4 19.2 38.5 24.9 35.9 49.6
4 2020-01-04 19.0 38.9 46.7 10.6 32.6 6.28
5 2020-01-05 13.4 19.3 0.670 19.1 43.5 17.0
#pivot longer by group prefix
df %>%
select(date,matches("A\\.")) %>%
rename_with(~str_replace(.x,"A\\.","")) %>%
mutate( k = "A") %>%
bind_rows(
df %>%
select(date,matches("B\\.")) %>%
rename_with(~str_replace(.x,"B\\.","")) %>%
mutate( k = "B")
)
date mean median min k
<date> <dbl> <dbl> <dbl> <chr>
1 2020-01-01 13.3 18.6 28.6 A
2 2020-01-02 47.2 33.0 31.5 A
3 2020-01-03 34.4 19.2 38.5 A
4 2020-01-04 19.0 38.9 46.7 A
5 2020-01-05 13.4 19.3 0.670 A
6 2020-01-01 45.4 10.1 44.9 B
7 2020-01-02 3.09 10.3 8.83 B
8 2020-01-03 24.9 35.9 49.6 B
9 2020-01-04 10.6 32.6 6.28 B
10 2020-01-05 19.1 43.5 17.0 B
Here is a two step process (shown in two lines for demonstration purposes). First pivot longer to create columns for k, stat name and value, then pivot wider to create the desired result.
The edited code below, one can obtain the answer is one step, by using the ".value" wild card option in the "names_to" specifier.
library(tidyr)
set.seed(1)
df <- data.frame(
date = rep(seq(as.Date("2020-01-01"),as.Date("2020-01-05"),by="day"),each=6),
k = rep(c("A.mean","A.median","A.min","B.mean","B.median","B.min"),5),
v = runif(30,0,50)
) %>%
pivot_wider(names_from = k, values_from = v)
#temp <- pivot_longer(df, -date, names_sep = "\\.", names_to = c("k", "stat"))
#answer <- pivot_wider(temp, id_cols = c("date", "k"), names_from= "stat", values_from="value")
#updated answer simplified down to just the pivot longer function
answer <- pivot_longer(df, -date, names_sep = "\\.", names_to = c("k", ".value"))
print(head(answer))
# A tibble: 6 x 5
date k mean median min
<date> <chr> <dbl> <dbl> <dbl>
1 2020-01-01 A 13.3 18.6 28.6
2 2020-01-01 B 45.4 10.1 44.9
3 2020-01-02 A 47.2 33.0 31.5
4 2020-01-02 B 3.09 10.3 8.83
5 2020-01-03 A 34.4 19.2 38.5
6 2020-01-03 B 24.9 35.9 49.6