Search code examples
rlistreorderlist

Ordering dataset and maintain order when creating list


I am trying to maintain a specific order of a dataset when creating a list.

What I want to do: Order the dataset by a column and maintain this order when creating a list. Should be simple but all solutions I found are not working correctly.

Data (apologies for the structure of this one will skill up on reprex)

    data <- structure(list(Fac_Map = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 
8L, 8L, 9L, 9L, 9L, 9L, 4L, 4L, 4L, 4L, 10L, 10L, 10L, 10L, 2L, 
2L, 2L, 2L), .Label = c("Fac_1", "Fac_10", "Fac_2", "Fac_3_ProblemOrder", 
"Fac_4", "Fac_5", "Fac_6", "Fac_7", "Fac_8_ProblemOrder", "Fac_9"
), class = "factor"), Calendar = structure(c(2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("Non-std", "Std"), class = "factor"), 
    S_Residency = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 
    2L, 2L), .Label = c("Int", "Loc"), class = "factor"), Period = structure(c(1L, 
    2L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2020 P1", 
    "2020 P2", "2020 S1"), class = "factor"), Sum_A = c(92.2, 
    91.7, 90.2, 88.6, 96.4, 91.4, 96.4, 91.4, 87.3, 95.3, 82.5, 
    89.1, 89, 90.1, 87.4, 88.9, 85.1, 89.6, 89.7, 88.1, 87.1, 
    91.1, 88.2, 87.9, 90.8, 97.9, 91, 88.8, 86.4, 89.5, 86.3, 
    86.4, 86.7, 90.8, 84.6, 86.8, 82.1, 86.8, 78.4, 80.7), Sum_S = c(75.9, 
    75.6, 75, 73.5, 78.6, 70.5, 78.6, 70.5, 69, 86, 65.9, 72.6, 
    69, 69.3, 71.5, 73.8, 65.4, 70.6, 70.6, 70.3, 68, 73, 68.4, 
    69.1, 69.7, 80.9, 70.7, 68, 66.3, 69.9, 67.6, 68, 65.5, 68.6, 
    63, 64.6, 60.4, 68.6, 59.1, 63.5), Order = c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 
    4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 
    8L, 8L, 8L, 9L, 9L, 9L, 9L)), class = "data.frame", row.names = c(NA, 
-40L))

Code with partially working attempts:

#Reorder attempts
#1
arrange(data$Fac_Map, data,data$Order)
#2
data$Order <- as.numeric(as.character(data$Order))
data[order(data$Fac_Map,data$Order),]
#3
reorder(data$Fac_Map, data$Order)


#Create list
#List is not in order
Fac <- split(data, list(data$Fac_Map, data$Calendar))
Fac

The order will just not stick correctly when I create the list. Any ideas?


Solution

  • We could change the columns to factor with levels specified as the unique values after the arrange step. Then, we do the split or group_split. Noticed that 'Fac_Map' have strings and numbers. So, it may be useful to do the arrange by matching with the mixedsorted levels of 'Fac_Map'

    library(dplyr)
    outlst <- data %>% 
        arrange(match(Fac_Map, gtools::mixedsort(levels(Fac_Map))), Order) %>%
        mutate(across(c(Fac_Map, Order, Calendar),
             ~ factor(., levels = unique(.)))) %>% 
        group_split(Fac_Map, Calendar)
    

    The values of 'Fac_Map' in the list are in the order

    library(purrr)
    map_chr(outlst, ~ .x %>% 
                select(Fac_Map) %>% 
                slice(1) %>% 
                pull %>% 
                as.character())
    #[1] "Fac_1"              "Fac_2"              "Fac_3_ProblemOrder" "Fac_3_ProblemOrder" "Fac_4"             
    #[6] "Fac_5"              "Fac_6"              "Fac_7"              "Fac_8_ProblemOrder" "Fac_9"             
    #[11] "Fac_10"            
    

    Or using base R after ordering by 'Order' and then convert the columns to split to factor with levels as unique values

    data <- data[order(data$Order),]
    data[c("Fac_Map", "Calendar")] <- lapply(data[c("Fac_Map", "Calendar")],
         function(x) factor(x, levels = unique(x)))
    split(data, data[c("Fac_Map", "Calendar")], drop = TRUE)