I am trying to maintain a specific order of a dataset when creating a list.
What I want to do: Order the dataset by a column and maintain this order when creating a list. Should be simple but all solutions I found are not working correctly.
Data (apologies for the structure of this one will skill up on reprex)
data <- structure(list(Fac_Map = structure(c(1L, 1L, 1L, 1L, 3L, 3L,
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 4L, 4L, 4L, 4L, 10L, 10L, 10L, 10L, 2L,
2L, 2L, 2L), .Label = c("Fac_1", "Fac_10", "Fac_2", "Fac_3_ProblemOrder",
"Fac_4", "Fac_5", "Fac_6", "Fac_7", "Fac_8_ProblemOrder", "Fac_9"
), class = "factor"), Calendar = structure(c(2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("Non-std", "Std"), class = "factor"),
S_Residency = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L), .Label = c("Int", "Loc"), class = "factor"), Period = structure(c(1L,
2L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2020 P1",
"2020 P2", "2020 S1"), class = "factor"), Sum_A = c(92.2,
91.7, 90.2, 88.6, 96.4, 91.4, 96.4, 91.4, 87.3, 95.3, 82.5,
89.1, 89, 90.1, 87.4, 88.9, 85.1, 89.6, 89.7, 88.1, 87.1,
91.1, 88.2, 87.9, 90.8, 97.9, 91, 88.8, 86.4, 89.5, 86.3,
86.4, 86.7, 90.8, 84.6, 86.8, 82.1, 86.8, 78.4, 80.7), Sum_S = c(75.9,
75.6, 75, 73.5, 78.6, 70.5, 78.6, 70.5, 69, 86, 65.9, 72.6,
69, 69.3, 71.5, 73.8, 65.4, 70.6, 70.6, 70.3, 68, 73, 68.4,
69.1, 69.7, 80.9, 70.7, 68, 66.3, 69.9, 67.6, 68, 65.5, 68.6,
63, 64.6, 60.4, 68.6, 59.1, 63.5), Order = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L)), class = "data.frame", row.names = c(NA,
-40L))
Code with partially working attempts:
#Reorder attempts
#1
arrange(data$Fac_Map, data,data$Order)
#2
data$Order <- as.numeric(as.character(data$Order))
data[order(data$Fac_Map,data$Order),]
#3
reorder(data$Fac_Map, data$Order)
#Create list
#List is not in order
Fac <- split(data, list(data$Fac_Map, data$Calendar))
Fac
The order will just not stick correctly when I create the list. Any ideas?
We could change the columns to factor
with levels
specified as the unique
values after the arrange
step. Then, we do the split
or group_split
. Noticed that 'Fac_Map' have strings and numbers. So, it may be useful to do the arrange
by match
ing with the mixedsort
ed levels
of 'Fac_Map'
library(dplyr)
outlst <- data %>%
arrange(match(Fac_Map, gtools::mixedsort(levels(Fac_Map))), Order) %>%
mutate(across(c(Fac_Map, Order, Calendar),
~ factor(., levels = unique(.)))) %>%
group_split(Fac_Map, Calendar)
The values of 'Fac_Map' in the list
are in the order
library(purrr)
map_chr(outlst, ~ .x %>%
select(Fac_Map) %>%
slice(1) %>%
pull %>%
as.character())
#[1] "Fac_1" "Fac_2" "Fac_3_ProblemOrder" "Fac_3_ProblemOrder" "Fac_4"
#[6] "Fac_5" "Fac_6" "Fac_7" "Fac_8_ProblemOrder" "Fac_9"
#[11] "Fac_10"
Or using base R
after order
ing by 'Order' and then convert the columns to split to factor
with levels
as unique
values
data <- data[order(data$Order),]
data[c("Fac_Map", "Calendar")] <- lapply(data[c("Fac_Map", "Calendar")],
function(x) factor(x, levels = unique(x)))
split(data, data[c("Fac_Map", "Calendar")], drop = TRUE)