Search code examples
rfunctiondplyrtidyversetibble

Follow-up: Re-ordering a symmetric tibble


I'm following up on this great answer. In particular, I was wondering how to re-order the output's first columns' values (which are really rownames) with the rest of the columnames?

I was suggested two solutions but it seems these two solutions don't work in the function. Is there a tidyverse solution to this problem?

foo <- function(data, study_id, ...){
  
   study_id <- rlang::ensym(study_id)
   cat_mod <- rlang::ensyms(...)
   purrr::map(cat_mod,  ~ {
  
   studies_cats <- 
     data %>%
     dplyr::group_by(!!study_id, !!.x) %>%
     dplyr::summarise(effects = n(), .groups = 'drop')
     nm1 <- rlang::as_string(.x)
     cat_names <- paste0(nm1, c(".x", ".y"))
    studies_cats <- 
      studies_cats %>%
      dplyr::inner_join(studies_cats, by = rlang::as_string(study_id)) %>%
      dplyr::group_by(!!!rlang::syms(cat_names)) %>%
      dplyr::summarise(
        studies = n(),
        effects = sum(effects.x), .groups = 'drop') %>% 
      dplyr::mutate(n = paste0(studies, " (", effects, ")") )

    studies_cats %>%
      dplyr::select(-studies, -effects) %>%
      tidyr::pivot_wider(names_from = cat_names[2], values_from = n) %>%
      dplyr::rename_with(~nm1,  cat_names[1]) })}

# EXAMPLE OF USE (notice columnames are ordered `0,1,10,2,3` but 
# first column values are ordered: `0,1,2,3,10`):

d <- read.csv("https://raw.githubusercontent.com/rnorouzian/s/main/w7_smd_raw.csv")
foo(data, study, error.type)

#  error.type `0`      `1`      `10`   `2`    `3`   
#  <fct>      <chr>    <chr>    <chr>  <chr>  <chr> 
#1 0          27 (189) 1 (6)    1 (2)  NA     NA    
#2 1          1 (18)   16 (118) 2 (10) 2 (6)  2 (6) 
#3 2          NA       2 (6)    NA     6 (33) 2 (6) 
#4 3          NA       2 (6)    NA     2 (6)  5 (27)
#5 10         1 (2)    2 (22)   6 (48) NA     NA   

Solution

  • pivot_wider behavior of creating the order of columns is based on the same order of unique appearance of values from the names_to column specified by default (because names_sort = FALSE). If we want to make the order dynamic, then one option is to arrange the column first (which is numeric). Doing the arrange also makes things more flexible i.e. suppose we want to order only based on a substring i.e. numeric part, can extract those and do the order whereas in names_sort that may not be the case

    foo <- function(data, study_id, ...){
      
         study_id <- rlang::ensym(study_id)
         cat_mod <- rlang::ensyms(...)
         purrr::map(cat_mod,  ~ {
      
         studies_cats <- 
           data %>%
           dplyr::group_by(!!study_id, !!.x) %>%
           dplyr::summarise(effects = n(), .groups = 'drop')
           nm1 <- rlang::as_string(.x)
           cat_names <- paste0(nm1, c(".x", ".y"))
          studies_cats <- 
            studies_cats %>%
            dplyr::inner_join(studies_cats, by = rlang::as_string(study_id)) %>%
            dplyr::group_by(!!!rlang::syms(cat_names)) %>%
            dplyr::summarise(
              studies = n(),
              effects = sum(effects.x), .groups = 'drop') %>% 
            dplyr::mutate(n = paste0(studies, " (", effects, ")") )
    
          studies_cats %>%
            dplyr::select(-studies, -effects) %>%
            dplyr::arrange(across(all_of(cat_names[2]))) %>%
            tidyr::pivot_wider(names_from = cat_names[2], values_from = n) %>%
            dplyr::rename_with(~nm1,  cat_names[1])  %>%
            dplyr::arrange(across(all_of(nm1))) %>%
            dplyr::mutate(across(all_of(nm1), as.character))
            
              })
            }
    

    -output

    foo(d, study, error.type)
    [[1]]
    # A tibble: 5 x 6
      error.type `0`      `1`      `2`    `3`    `10`  
      <chr>      <chr>    <chr>    <chr>  <chr>  <chr> 
    1 0          27 (189) 1 (6)    <NA>   <NA>   1 (2) 
    2 1          1 (18)   16 (118) 2 (6)  2 (6)  2 (10)
    3 2          <NA>     2 (6)    6 (33) 2 (6)  <NA>  
    4 3          <NA>     2 (6)    2 (6)  5 (27) <NA>  
    5 10         1 (2)    2 (22)   <NA>   <NA>   6 (48)
    

    Or may also use names_sort = TRUE

    foo <- function(data, study_id, ...){
      
           study_id <- rlang::ensym(study_id)
           cat_mod <- rlang::ensyms(...)
           purrr::map(cat_mod,  ~ {
      
           studies_cats <- 
             data %>%
             dplyr::group_by(!!study_id, !!.x) %>%
             dplyr::summarise(effects = n(), .groups = 'drop')
             nm1 <- rlang::as_string(.x)
             cat_names <- paste0(nm1, c(".x", ".y"))
            studies_cats <- 
              studies_cats %>%
              dplyr::inner_join(studies_cats, by = rlang::as_string(study_id)) %>%
              dplyr::group_by(!!!rlang::syms(cat_names)) %>%
              dplyr::summarise(
                studies = n(),
                effects = sum(effects.x), .groups = 'drop') %>% 
              dplyr::mutate(n = paste0(studies, " (", effects, ")") )
    
            studies_cats %>%
              dplyr::select(-studies, -effects) %>%        
              tidyr::pivot_wider(names_from = cat_names[2], 
                    values_from = n, names_sort = TRUE) %>%
              dplyr::rename_with(~nm1,  cat_names[1])  %>%
              dplyr::arrange(across(all_of(nm1))) %>%
              dplyr::mutate(across(all_of(nm1), as.character))
            
                })
              }
    

    -output

    > foo(d, study, error.type)
    [[1]]
    # A tibble: 5 x 6
      error.type `0`      `1`      `2`    `3`    `10`  
      <chr>      <chr>    <chr>    <chr>  <chr>  <chr> 
    1 0          27 (189) 1 (6)    <NA>   <NA>   1 (2) 
    2 1          1 (18)   16 (118) 2 (6)  2 (6)  2 (10)
    3 2          <NA>     2 (6)    6 (33) 2 (6)  <NA>  
    4 3          <NA>     2 (6)    2 (6)  5 (27) <NA>  
    5 10         1 (2)    2 (22)   <NA>   <NA>   6 (48)