Search code examples
rdataframedplyrpurrrtibble

How to set_names on a list column using the tidyverse: tibble, purrr, dplyr


Short version, I would like to be able to set_names() on a "list-column" returned from a summarise() function. So if I have a list-column that uses the range() function, I would like to be able to set the names as "min" and "max".

Below are the details and a reproducible example.

library(tidyverse)

# Consider the following:
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  )
#> # A tibble: 5 x 2
#>   vore    sleep_total_range
#>   <chr>   <list>           
#> 1 carni   <dbl [2]>        
#> 2 herbi   <dbl [2]>        
#> 3 insecti <dbl [2]>        
#> 4 omni    <dbl [2]>        
#> 5 <NA>    <dbl [2]>

# I would like to be able to identify and label (i.e., set_names()) for the 
# min and max columns

# Fail 1: No Column, No Labels
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  ) %>% 
  unnest()
#> # A tibble: 10 x 2
#>    vore    sleep_total_range
#>    <chr>               <dbl>
#>  1 carni                 2.7
#>  2 carni                19.4
#>  3 herbi                 1.9
#>  4 herbi                16.6
#>  5 insecti               8.4
#>  6 insecti              19.9
#>  7 omni                  8  
#>  8 omni                 18  
#>  9 <NA>                  5.4
#> 10 <NA>                 13.7

# Fail 2: Column, but labels are not correct
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
  ) %>% 
  unnest()
#> # A tibble: 10 x 3
#>    vore    range_col value
#>    <chr>       <int> <dbl>
#>  1 carni           1   2.7
#>  2 carni           2  19.4
#>  3 herbi           1   1.9
#>  4 herbi           2  16.6
#>  5 insecti         1   8.4
#>  6 insecti         2  19.9
#>  7 omni            1   8  
#>  8 omni            2  18  
#>  9 <NA>            1   5.4
#> 10 <NA>            2  13.7

Desired Results

# Success: This is my desired result/output, but it feels verbose, 
# and not very "tidyverse / purrr"
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
  ) %>% 
  unnest() %>%
  mutate(
    range_col = ifelse(range_col == 1, "min", "max")
  )
#> # A tibble: 10 x 3
#>    vore    range_col value
#>    <chr>   <chr>     <dbl>
#>  1 carni   min         2.7
#>  2 carni   max        19.4
#>  3 herbi   min         1.9
#>  4 herbi   max        16.6
#>  5 insecti min         8.4
#>  6 insecti max        19.9
#>  7 omni    min         8  
#>  8 omni    max        18  
#>  9 <NA>    min         5.4
#> 10 <NA>    max        13.7

Close but not yet...

# I thought I was close with this
temp <- 
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  )

temp$sleep_total_range[[1]] %>% set_names(c("min", "max")) %>% enframe()
#> # A tibble: 2 x 2
#>   name  value
#>   <chr> <dbl>
#> 1 min     2.7
#> 2 max    19.4

# But this obviously does not work...
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total)) %>% 
        set_names(c("min", "max")) %>% 
        enframe()
  )
#> `nm` must be `NULL` or a character vector the same length as `x`

Created on 2019-07-18 by the reprex package (v0.3.0)


Solution

  • We can get this as two columns if we create a tibble

    library(tidyverse)
    msleep %>% 
        group_by(vore) %>% 
        summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)), 
             c("min", "max")) %>% as_tibble)) %>% 
       unnest
    

    -tested the code with packageVersion('dplyr')# [1] ‘0.8.99.9000’

    msleep %>% 
        group_by(vore) %>% 
         summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)), 
              c("min", "max")) %>%
        as_tibble)) %>% 
        unnest(c(sleep_total_range)) #changed behavior or else a warning
    # A tibble: 5 x 3
    #  vore      min   max
    #  <chr>   <dbl> <dbl>
    #1 carni     2.7  19.4
    #2 herbi     1.9  16.6
    #3 insecti   8.4  19.9
    #4 omni      8    18  
    #5 <NA>      5.4  13.7