This question is very similar to this one and this one but it combines elements of both in a way that I can't figure out on my own.
I have the following list.
original_groups <- list(group_1 = as.character(1:6), group_2 = as.character(7:12), group_3 = as.character(13:20))
I want to create new groups based on these original groups. There is a constraint - each new group must contain an equal number of items from each original group. Furthermore, items cannot be used more than once. For example, if we take one item from each original group, we may get the following new groups.
Number_of_Items_From_Each_Original_Group_to_End_up_in_Each_New_Group <- 1
Number_of_New_Groups <- 3
# option 1
new_groups <- list(group_1 = as.character(c(1, 7, 13)), group_2 = as.character(c(2, 8, 14)), group_3 = as.character(c(3, 9, 15)))
# option 2
new_groups <- list(group_1 = as.character(c(1, 7, 13)), group_2 = as.character(c(2, 8, 14)), group_3 = as.character(c(3, 9, 16)))
# option 3
new_groups <- list(group_1 = as.character(c(1, 8, 13)), group_2 = as.character(c(2, 7, 14)), group_3 = as.character(c(3, 9, 15)))
There are three things that make what I'm hoping to do really tricky. First, I want to generate all possible combinations since this operation is part of a larger function. Second, I want to have the option to have multiple items from each original group end up in each new group. Third, I also want to have the option to choose how many new groups there will be. Here is another example.
Number_of_Items_From_Each_Original_Group_to_End_up_in_Each_New_Group <- 2
Number_of_New_Groups <- 3
# option 1
new_groups <- list(group_1 = as.character(c(1, 2, 7, 8, 13, 14)), group_2 = as.character(c(3, 4, 9, 10, 15, 16)), group_3 = as.character(c(5, 6, 11, 12, 17, 18)))
# option 2
new_groups <- list(group_1 = as.character(c(1, 2, 7, 8, 13, 14)), group_2 = as.character(c(3, 4, 9, 10, 15, 16)), group_3 = as.character(c(5, 6, 11, 12, 17, 19)))
# option 3
new_groups <- list(group_1 = as.character(c(1, 3, 7, 8, 13, 14)), group_2 = as.character(c(2, 4, 9, 10, 15, 16)), group_3 = as.character(c(5, 6, 11, 12, 17, 18)))
If each original group contained at least 9 items, I could even make new groups that each contain 3 items from each original group. Alternatively, if each original group contained at least 9 items, I could also increase the number of new groups to 4 if only 2 items from each original group end up in each new group.
Note that the original groups don't all need to contain the same amount of items for this process to work - the third original group contains more items than the other two original groups.
Also, note that item order doesn't matter within new groups. In other words, new_groups <- list(group_1 = as.character(c(1, 2, 7, 8, 13, 14)), group_2 = as.character(c(3, 4, 9, 10, 15, 16)), group_3 = as.character(c(5, 6, 11, 12, 17, 18)))
is the same as new_groups <- list(group_1 = as.character(c(2, 1, 8, 7, 14, 13)), group_2 = as.character(c(3, 4, 9, 10, 15, 16)), group_3 = as.character(c(5, 6, 11, 12, 17, 18)))
, so in my final output, I would only want one of these options reported.
Finally, note that the number of original groups won’t always equal the number of new groups - they just happen to in this example.
All solutions are welcome but I'd be especially curious to see one using only base
functions.
Thank you!
I suggest this solution: it does not use any other library than base
.
permutations
function, to compute all possibile combinations of elements of a vector (or list) vec
taken sublen
elements at a time (likewise the combn
function)permutations <- function(vec, sublen, prev_vec=NULL){
out_list <- list()
if(sublen==1){
for(v in vec){
out_list <- append(out_list,list(append(prev_vec,list(v))))
}
} else {
for (i in 1:(length(vec)-sublen+1)){
v <- vec[1]
prev_vec0 <- c(prev_vec,vec[1])
vec <- vec[2:length(vec)]
perm_list <- permutations(
vec=vec,
sublen=sublen-1,
prev_vec=prev_vec0
)
out_list <- append(out_list,perm_list)
}
}
return(out_list)
}
find_matrix
function that unlists matrices from deeply nested lists (source)find_matrix <- function(x) {
if (is.matrix(x))
return(list(x))
if (!is.list(x))
return(NULL)
unlist(lapply(x, find_matrix), FALSE)
}
compatible_rows
function, that extract from a dataframe a subset of rows which can be used to create the other output vectors, given an output vectorcompatible_rows <- function(df,row_value){
row_ids <- c()
if(is.null(nrow(df))){
return(NULL)
} else {
for (row_id in 1:nrow(df)){
row_ids <- c(row_ids,!any(row_value %in% df[row_id,]))
}
return(df[which(row_ids),])
}
}
new_groups_list
function, that computes all possible output matricesnew_groups_list <- function(df, prev_df=NULL, lvl=-1, verbose=F){
lvl <- lvl+1
results_list <- list()
if(is.null(nrow(df))){
if(verbose==T) cat(paste0("--BRANCH END (BEGIN lvl ",lvl,")--\n"))
prev_df0 <- rbind(prev_df,df)
rownames(prev_df0) <- NULL
if(verbose==T) cat("returned\n")
if(verbose==T) print(prev_df0)
if(verbose==T) cat("\n")
return(prev_df0)
}
if(nrow(df)==0){
if(verbose==T) cat(paste0("--BRANCH END (BEGIN lvl ",lvl,")--\n"))
prev_df0 <- rbind(prev_df,df)
rownames(prev_df0) <- NULL
if(verbose==T) cat("returned\n")
if(verbose==T) print(prev_df0)
if(verbose==T) cat("\n")
return(prev_df0)
}
for(row_id in 1:nrow(df)){
if(verbose==T) cat(paste("-- lvl",lvl,"cycle",row_id,"--\n"))
if(verbose==T) cat("initial results list\n")
if(verbose==T) print(results_list)
if(verbose==T) cat("\n")
if(verbose==T) cat("df in\n")
if(verbose==T) assign("last_df",df,envir = .GlobalEnv)
if(verbose==T) print(df)
if(verbose==T) cat("\n")
if(is.null(nrow(df))){
prev_df0 <- rbind(prev_df,df)
rownames(prev_df0) <- NULL
if(verbose==T) cat(paste0("--BRANCH END (MID lvl ",lvl,")--\n"))
if(verbose==T) cat("returned\n")
results_list <- append(results_list,list(prev_df0))
if(verbose==T) print(results_list)
if(verbose==T) cat("\n")
return(results_list)
}
considered_row <- df[1,]
if(verbose==T) assign("last_considered_row",considered_row,envir = .GlobalEnv)
if(verbose==T) cat("considered rows\n")
if(verbose==T) print(considered_row)
if(verbose==T) cat("\n")
df <- df[2:nrow(df),]
if(verbose==T) assign("last_df",df,envir = .GlobalEnv)
if(verbose==T) cat("df without considered rows\n")
if(verbose==T) print(df)
if(verbose==T) cat("\n")
prev_df0 <- rbind(prev_df,considered_row)
rownames(prev_df0) <- NULL
if(verbose==T) assign("last_prev0",prev_df0,envir = .GlobalEnv)
if(verbose==T) cat("collected considered rows\n")
if(verbose==T) print(prev_df0)
if(verbose==T) cat("\n")
comp_df <- compatible_rows(df,considered_row)
if(verbose==T) assign("last_comp_df",comp_df,envir = .GlobalEnv)
if(verbose==T) cat("compatible rows in df\n")
if(verbose==T) print(comp_df)
if(verbose==T) cat("\n")
if(verbose==T) cat(paste(">>> GOING TO LVL",lvl+1,"\n\n"))
new_rows <- new_groups_list(
comp_df,
prev_df=prev_df0,
lvl=lvl,
verbose=verbose
)
if(verbose==T) cat(paste0("--ROOT (lvl ",lvl,")--\n"))
if(verbose==T) cat("result received from branch\n")
if(verbose==T) print(new_rows)
if(verbose==T) cat("\n")
results_list <- append(results_list,list(new_rows))
if(verbose==T) cat("results list\n")
if(verbose==T) print(results_list)
if(verbose==T) cat("\n")
}
return(results_list)
}
create_new_groups
, which wraps-up all the other functions, and outputs the whole list of possible solutionscreate_new_groups <- function(original_groups, max_output = NULL){
min_len_original_groups = min(lengths(original_groups))
num_original_groups = length(original_groups)
max_len_subgroup <- floor(min_len_original_groups/2)
if(min_len_original_groups<2){
return("Not possible to populate new groups: at least one original group has less than 2 elements")
}
NewGroups_subLen_len_num <- list()
for (len_subgroup in 1:max_len_subgroup){
new_group_params <- c(
len_subgroup,
len_subgroup*num_original_groups,
floor(min_len_original_groups/len_subgroup)
)
NewGroups_subLen_len_num[[len_subgroup]] <- new_group_params
}
out_list <- list()
ind <- 1
for (e in 1:length(NewGroups_subLen_len_num)){
NewGroup_subLen_len_num <- NewGroups_subLen_len_num[[e]]
elem_list <- list()
ind <- 1
# print(ind)
for (o in 1:length(original_groups)){
original_group <- original_groups[[o]]
elem_list[[paste("group",ind)]] <- permutations(original_group,NewGroup_subLen_len_num[1])
ind <- ind+1
}
out_list[[paste(c("subLen","len","numGroups"), NewGroup_subLen_len_num, collapse = " ")]] <- elem_list
}
results_list <- list()
config_test <<- NewGroups_subLen_len_num
for (config_id in 1:length(NewGroups_subLen_len_num)){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
# config_test <<- config
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
# out_test <<- new_groups
results_list[[config_id]] <- new_groups
}
return(results_list)
}
Given a simple input, like
original_groups <- list(
group_1 = as.character(1:2),
group_2 = as.character(3:4),
group_3 = as.character(5:7)
)
The output of create_new_groups(original_groups)
is
> create_new_groups_modified(original_groups)
[[1]]
[[1]][[1]]
[,1] [,2] [,3]
[1,] "1" "3" "5"
[2,] "2" "4" "6"
[[1]][[2]]
[,1] [,2] [,3]
[1,] "1" "3" "5"
[2,] "2" "4" "7"
[[1]][[3]]
[,1] [,2] [,3]
[1,] "2" "3" "5"
[2,] "1" "4" "6"
[[1]][[4]]
[,1] [,2] [,3]
[1,] "2" "3" "5"
[2,] "1" "4" "7"
[[1]][[5]]
[,1] [,2] [,3]
[1,] "1" "4" "5"
[2,] "2" "3" "6"
[[1]][[6]]
[,1] [,2] [,3]
[1,] "1" "4" "5"
[2,] "2" "3" "7"
[[1]][[7]]
[,1] [,2] [,3]
[1,] "2" "4" "5"
[2,] "1" "3" "6"
[[1]][[8]]
[,1] [,2] [,3]
[1,] "2" "4" "5"
[2,] "1" "3" "7"
[[1]][[9]]
[,1] [,2] [,3]
[1,] "1" "3" "6"
[2,] "2" "4" "7"
[[1]][[10]]
[,1] [,2] [,3]
[1,] "2" "3" "6"
[2,] "1" "4" "7"
[[1]][[11]]
[,1] [,2] [,3]
[1,] "1" "4" "6"
[2,] "2" "3" "7"
[[1]][[12]]
[,1] [,2] [,3]
[1,] "2" "4" "6"
[2,] "1" "3" "7"
Moreover, the create_new_groups
function also creates a global variable config_test
where all possible configurations, for a given list of groups (i.e., original_groups
), are stored. For example, for the previous problem, config_test
is equal to
> config_test
[[1]]
[1] 1 3 2
So, for this problem only one output configuration is possible, having this structure:
Given a slightly more complex example
original_groups <- list(
group_1 = as.character(1:4),
group_2 = as.character(5:8),
group_3 = as.character(9:13)
)
config_test
would be equal to
> config_test
[[1]]
[1] 1 3 4
[[2]]
[1] 2 6 2
I made some tests, this method should work for any number of groups, of any length, and the output should always be composed of not-duplicated matrices.
I'm sorry if the explanation is short, if I have time in the following days I'll try to add some notes.
EDIT
A simple way to output only the configurations characterized by a specific number of elements from the original groups is to change the create_new_groups
as follows
create_new_groups_modified <- function(original_groups, max_output = NULL, elements_from_original = NULL){
min_len_original_groups = min(lengths(original_groups))
num_original_groups = length(original_groups)
max_len_subgroup <- floor(min_len_original_groups/2)
if(min_len_original_groups<2){
stop("Not possible to populate new groups: at least one original group has less than 2 elements")
}
NewGroups_subLen_len_num <- list()
for (len_subgroup in 1:max_len_subgroup){
new_group_params <- c(
len_subgroup,
len_subgroup*num_original_groups,
floor(min_len_original_groups/len_subgroup)
)
NewGroups_subLen_len_num[[len_subgroup]] <- new_group_params
}
out_list <- list()
ind <- 1
for (e in 1:length(NewGroups_subLen_len_num)){
NewGroup_subLen_len_num <- NewGroups_subLen_len_num[[e]]
elem_list <- list()
ind <- 1
# print(ind)
for (o in 1:length(original_groups)){
original_group <- original_groups[[o]]
elem_list[[paste("group",ind)]] <- permutations(original_group,NewGroup_subLen_len_num[1])
ind <- ind+1
}
out_list[[paste(c("subLen","len","numGroups"), NewGroup_subLen_len_num, collapse = " ")]] <- elem_list
}
results_list <- list()
config_test <<- NewGroups_subLen_len_num
# if `elements_from_original` is not set, output all possible combinations
if(is.null(elements_from_original)){
for (config_id in 1:length(NewGroups_subLen_len_num)){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
# config_test <<- config
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
# out_test <<- new_groups
results_list[[config_id]] <- new_groups
}
} else {
# if `elements_from_original` is set, check if this is a valid configuration, then output only the matrix having this configuration
config_id <- which(sapply(NewGroups_subLen_len_num,function(x) x[1]==elements_from_original))
if (length(config_id)!=0){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
results_list[[1]] <- new_groups
} else {
stop("value of elements_from_original not available: check config_test to see available configurations")
}
}
return(results_list)
}
The elements_from_original
parameter of the function allows to set the number elements from the original groups to consider, and, if applicable, the output will include only matrices following that configuration.
EDIT 2
To output the matrices composed by a specific number of groups
select_matrices_by_number_output_groups
, that outputs only the matrices with n_output_groups
rowsselect_matrices_by_number_output_groups <- function(l,n_output_groups){
# Filter out matrices having less rows than `n_output_groups`
out_l <- l[which(
sapply(
l,
# function(x) check_matrix_by_number_output_groups(x,n_output_groups)
function(mtr){
if(nrow(mtr)<n_output_groups) return(F)
else return(T)
}
)
)]
# Cut-off rows from matrices having more rows than `n_output_groups`
out_l <- lapply(
out_l,
function(x) head(x,n_output_groups)
)
# Keep only unique elements (i.e., matrices)
out_l <- unique(out_l)
return(out_l)
}
create_new_groups
so that it includes the select_matrices_by_number_output_groups
functioncreate_new_groups_modified_2 <- function(original_groups, max_output = NULL, elements_from_original = NULL, n_output_groups = NULL){
min_len_original_groups = min(lengths(original_groups))
num_original_groups = length(original_groups)
max_len_subgroup <- floor(min_len_original_groups/2)
if(min_len_original_groups<2){
stop("Not possible to populate new groups: at least one original group has less than 2 elements")
}
NewGroups_subLen_len_num <- list()
for (len_subgroup in 1:max_len_subgroup){
new_group_params <- c(
len_subgroup,
len_subgroup*num_original_groups,
floor(min_len_original_groups/len_subgroup)
)
NewGroups_subLen_len_num[[len_subgroup]] <- new_group_params
}
out_list <- list()
ind <- 1
for (e in 1:length(NewGroups_subLen_len_num)){
NewGroup_subLen_len_num <- NewGroups_subLen_len_num[[e]]
elem_list <- list()
ind <- 1
# print(ind)
for (o in 1:length(original_groups)){
original_group <- original_groups[[o]]
elem_list[[paste("group",ind)]] <- permutations(original_group,NewGroup_subLen_len_num[1])
ind <- ind+1
}
out_list[[paste(c("subLen","len","numGroups"), NewGroup_subLen_len_num, collapse = " ")]] <- elem_list
}
results_list <- list()
config_test <<- NewGroups_subLen_len_num
# if `elements_from_original` is not set, output all possible combinations
if(is.null(elements_from_original)){
for (config_id in 1:length(NewGroups_subLen_len_num)){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
results_list[[config_id]] <- new_groups
}
} else {
# if `elements_from_original` is set, check if this is a valid configuration, then output only the matrix having this configuration
config_id <- which(sapply(NewGroups_subLen_len_num,function(x) x[1]==elements_from_original))
if (length(config_id)!=0){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
new_groups <- lapply(
new_groups,
function(x) {
dimnames(x) <- NULL
return(x)
}
)
if(is.null(n_output_groups)){
new_groups <- new_groups[which(sapply(new_groups, nrow) == config[3])]
} else if (n_output_groups > config[3]){
stop("value n_output_groups higher than max number of new groups for this configuration: check config_test to see available configurations")
} else {
new_groups <- select_matrices_by_number_output_groups(new_groups,n_output_groups)
}
# results_list[[1]] <- new_groups
results_list <- new_groups
} else {
stop("value of elements_from_original not available: check config_test to see available configurations")
}
}
return(results_list)
}