I have a table like this:
set.seed(123)
random_table <- data.frame(
Column1 = sample(1:10, 5, replace = TRUE),
Column2 = sample(1:10, 5, replace = TRUE),
Column3 = sample(1:10, 5, replace = TRUE),
Column4 = sample(1:10, 5, replace = TRUE),
Column5 = sample(1:10, 5, replace = TRUE)
)
Column1 Column2 Column3 Column4 Column5
3 5 5 3 9
3 4 3 8 3
10 6 9 10 4
2 9 9 7 1
6 10 9 10 7
I want to make a function that checks if "n" numbers from this table can sum to some value "m". When this is possible, I want to record all such combinations (else NULL).
I tried to write a function to do this using the combinat library:
library(combinat)
find_combinations <- function(table, num, target_sum) {
combinations <- combn(as.vector(as.matrix(table)), num)
valid_combinations <- list()
valid_cells <- list()
for (i in 1:ncol(combinations)) {
if (sum(combinations[, i]) == target_sum) {
valid_combinations <- append(valid_combinations, list(combinations[, i]))
cells <- c()
for (value in combinations[, i]) {
cell <- which(table == value, arr.ind = TRUE)[1, ]
cells <- c(cells, paste0(LETTERS[cell[2]], cell[1]))
}
valid_cells <- append(valid_cells, list(cells))
}
}
if (length(valid_combinations) > 0) {
result <- data.frame(
id = seq_along(valid_combinations),
sum = rep(target_sum, length(valid_combinations)),
numbers_selected = sapply(valid_combinations, function(x) paste(x, collapse = ",")),
cells = sapply(valid_cells, function(x) paste(x, collapse = ","))
)
} else {
result <- data.frame(
id = NA,
sum = NA,
numbers_selected = NA,
cells = NA
)
}
return(result)
}
I then called the function for a specific example and removed all duplicates (relative to the cells column):
result <- find_combinations(random_table, num = 4, target_sum = 19)
result$sorted_cells <- sapply(strsplit(result$cells, ","), function(x) paste(sort(x), collapse = ","))
result <- result[!duplicated(result$sorted_cells), ]
result$sorted_cells <- NULL
result$id <- seq_len(nrow(result))
The output looks like this:
id sum numbers_selected cells
1 19 3,3,10,3 A1,A1,A3,A1
2 19 3,3,6,7 A1,A1,A5,D4
3 19 3,3,5,8 A1,A1,B1,D2
4 19 3,3,4,9 A1,A1,B2,B4
5 19 3,10,2,4 A1,A3,A4,B2
Are there any standard ways to do this in R (ex: Finding all possible combinations of numbers from a vector to reach a given sum (No repetitions), Getting all the combination of numbers from a list that would sum to a specific number)? Or do we really have to write a function?
This is a restricted subset sum problem. Take a look at RcppAlgos::partitionsGeneral
(also see this vignette).
library(RcppAlgos)
result <- partitionsGeneral(unlist(random_table), 4, target = 19)
dim(result)
#> [1] 328 4
all(rowSums(result) == 19)
#> [1] TRUE
A function to get indices:
f <- function(df, num, target_sum, linear = TRUE) {
x <- unlist(df, 1, 1)
vals <- unique(partitionsGeneral(x, num, target = target_sum))
idx <- array(split(seq_along(x), x)[as.character(vals)], dim(vals))
out <- do.call(rbind, lapply(asplit(idx, 1), comboGrid, repetition = FALSE))
if (!(linear || is.vector(df))){
out[] <- outer(1:nrow(df), LETTERS[1:ncol(df)], \(a, b) paste0(b, a))[out]
}
out
}
Testing:
result_idx <- f(random_table, 4, 19)
dim(result_idx)
#> [1] 552 4
# check that each row corresponds to a set of indices of values that sum correctly
x <- result_idx
x[] <- unlist(df, 1, 1)[result_idx]
all(rowSums(x) == 19)
#> [1] TRUE
f(random_table, 4, 19, FALSE)[1:5,]
#> Var1 Var2 Var3 Var4
#> [1,] "E4" "A4" "A5" "A3"
#> [2,] "E4" "A4" "A5" "B5"
#> [3,] "E4" "A4" "A5" "D3"
#> [4,] "E4" "A4" "A5" "D5"
#> [5,] "E4" "A4" "B3" "A3"
Note that for this example, result
and result_idx
don't have the same number of rows. partitionsGeneral
stops early once it has identified all unique outputs (see the comment below from the package's author, Joseph Wood).