Constrained matrices

I need to create all the possible matrix of dimension 5 (5x5), where all elements are integers from 0 to 100 and its sum is 100.

I'm not sure how to do it, or how to start... Any suggestion?

Despite I program in R, I'm looking for an idea of how to do it. Pseucode is fine.

My firs approach was getting all the permutations of 100 elements 25 times (one for each element in the matrix) and then take only those that sum 100. But that is 100^25 permutations... no way to do it in this by this approach.

I will thanks any idea and/or help!

Solution

The OP is looking for all integer partitions of the number 100 of maximal length of 25. The package partitions is equipped with a function exactly for this purpose called restrictedparts. E.g.:

library(partitions)

## Keep the output tidy
options(digits = 4)
options(width = 90)

## all integer partitions of 10 of maximal length = 4
restrictedparts(10, 4)
#>                                                    
#> [1,] 10 9 8 7 6 5 8 7 6 5 6 5 4 4 7 6 5 4 5 4 3 4 3
#> [2,]  0 1 2 3 4 5 1 2 3 4 2 3 4 3 1 2 3 4 2 3 3 2 3
#> [3,]  0 0 0 0 0 0 1 1 1 1 2 2 2 3 1 1 1 1 2 2 3 2 2
#> [4,]  0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2

Once all of the them have been generated, simply create a 5x5 matrix of each combinations (restrictedparts doesn’t differentiate between 0 0 3 and 0 3 0). The only problem is that there are so many possible combinations (partitions::R(25, 100, TRUE) = 139620591) the function throws an error when you call restrictedparts(100, 25).

test <- restrictedparts(100, 25)
#> Warning in restrictedparts(100, 25): NAs introduced by coercion to integer range
#> Error in restrictedparts(100, 25): NAs in foreign function call (arg 3)

Since we can’t generate them all via restrictedparts, we can generate them individually using firstrestrictedpart along with nextrestrictedpart like so:

funPartition <- function(p, n) {
    mat <- matrix(nrow = 25, ncol = n)
    mat[, 1] <- p

    for (i in 2:n) {
        p <- nextrestrictedpart(p)
        mat[, i] <- p
    }

    mat
}

head(funPartition(firstrestrictedpart(100, 25), 5))
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]  100   99   98   97   96
#> [2,]    0    1    2    3    4
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0    0
#> [6,]    0    0    0    0    0

The only problem here is that the iterators aren’t as efficient due continuously copying.

Enter RcppAlgos

There is a faster way using the package RcppAlgos (I am the author). Similar to the partitions package, there is a function, partitionsGeneral, for generating all of partitions.

library(RcppAlgos)
## Target is implicitly set to 100 below. For different targets, explicitly
## set the target parameter. E.g.:
##
##     partitionsGeneral(0:100, 25, TRUE, target = 200, upper = 10^5)
##
## Will generate the first 10^5 partitions of 200 using the vector 0:100

matrixParts <- apply(
    partitionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
    1, \(x) matrix(x, ncol = 5), simplify = FALSE
)

all(sapply(matrixParts, sum) == 100)
#> [1] TRUE


matrixParts[c(1, 90, 10^5)]
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0  100
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    1
#> [4,]    0    0    0    0   39
#> [5,]    0    0    0    0   60
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    5
#> [2,]    0    0    0    0   13
#> [3,]    0    0    0    0   17
#> [4,]    0    0    0    0   27
#> [5,]    0    0    0    2   36

A Better Approach: Iterators

There are memory efficient iterators available as well for many topics in combinatorics including integer partitions (E.g. partitionsIter).

Using iterators, we could create a helper function that could transform each result to our desired matrix.

matFromIter <- function(it, ncol = 5L) {
    matrix(it@nextIter(), ncol = ncol)
}

## Initialize partitions iterator
it <- partitionsIter(0:100, 25, repetition = TRUE)
## Get the first 3 results
replicate(3, matFromIter(it))
#> , , 1
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0  100
#> 
#> , , 2
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    1
#> [5,]    0    0    0    0   99
#> 
#> , , 3
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    2
#> [5,]    0    0    0    0   98

## Get 2 more picking up where we left off above
replicate(2, matFromIter(it))
#> , , 1
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    3
#> [5,]    0    0    0    0   97
#> 
#> , , 2
#> 
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    4
#> [5,]    0    0    0    0   96

## Reset iterator
it@startOver()
## Get random lexicographical result using the method: `[[`
matrix(it[[1e6]], ncol = 5)
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    7
#> [2,]    0    0    0    0   10
#> [3,]    0    0    0    2   11
#> [4,]    0    0    0    2   22
#> [5,]    0    0    0    2   44

## Get the last one
matrix(it@back(), ncol = 5)
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    4    4    4    4    4
#> [2,]    4    4    4    4    4
#> [3,]    4    4    4    4    4
#> [4,]    4    4    4    4    4
#> [5,]    4    4    4    4    4

Need Permutations?

If you really want permutations, no problem, just call compositionsGeneral:

matrixComps <- apply(
    compositionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
    1, \(x) matrix(x, ncol = 5), simplify = FALSE
)

all(sapply(matrixComps, sum) == 100)
#> [1] TRUE


## Compare to the output of matrixCombs
matrixComps[c(1, 90, 10^5)]
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0    0
#> [5,]    0    0    0    0  100
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0    0
#> [3,]    0    0    0    0    0
#> [4,]    0    0    0    0   89
#> [5,]    0    0    0    0   11
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    0    0    0
#> [2,]    0    0    0    0   27
#> [3,]    0    0    0    0    6
#> [4,]    0    0    0    0   51
#> [5,]    0    0    0    0   16

Random Sampling

Since the number of results is so massive, sampling may be our best option. Consider how many total results we are dealing with:

partitionsCount(0:100, 25, TRUE)
#> [1] 139620591


compositionsCount(0:100, 25, TRUE)
#> Big Integer ('bigz') :
#> [1] 87676181447775191489836

We can use either partitionsSample or compositionsSample to quickly generate a candidates that can be transformed into the desired matrix output.

## Optional, use seed parameter for reproducibility
apply(partitionsSample(0:100, 25, TRUE, n = 3, seed = 42), 1, \(x) {
    matrix(x, ncol = 5)
}, simplify = FALSE)
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    0    4    7    7
#> [2,]    0    0    4    7    7
#> [3,]    0    1    4    7    8
#> [4,]    0    1    5    7    8
#> [5,]    0    1    5    7   10
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    1    1    4    4    5
#> [2,]    1    1    4    5    5
#> [3,]    1    2    4    5    5
#> [4,]    1    2    4    5   11
#> [5,]    1    3    4    5   16
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    1    1    1    8
#> [2,]    0    1    1    1   11
#> [3,]    0    1    1    2   16
#> [4,]    0    1    1    6   17
#> [5,]    0    1    1    8   20


apply(compositionsSample(0:100, 25, TRUE, n = 3, seed = 28), 1, \(x) {
    matrix(x, ncol = 5)
}, simplify = FALSE)
#> [[1]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    2    6    1    2
#> [2,]    0    2    1    6    2
#> [3,]   12    2    3    1    1
#> [4,]    3    2    3   24    1
#> [5,]    7    4    4    5    6
#> 
#> [[2]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    0    1    9    4    5
#> [2,]    6    2    1    4    7
#> [3,]    1    4   24    4    2
#> [4,]    3    2    2    1    6
#> [5,]    1    7    2    1    1
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,]    1    1    3    9    3
#> [2,]    3    2    8    1    3
#> [3,]    8    5    6    2    6
#> [4,]    3    3   11    1    2
#> [5,]    1    3    4    5    6

Efficiency

All of the functions are written in C++ for ultimate efficiency. Consider iterating over 10,000 partitions.

library(microbenchmark)
pkg_partitions <- function(n, k, total) {
    a <- firstrestrictedpart(n, k)
    for (i in 1:(total - 1)) a <- nextrestrictedpart(a)
}

pkg_RcppAlgos <- function(n, k, total) {
    a <- partitionsIter(0:n, k, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}

microbenchmark(cbRcppAlgos  = pkg_RcppAlgos(100, 25, 10^4),
               cbPartitions = pkg_partitions(100, 25, 10^4),
               times = 25, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgos = pkg_RcppAlgos(100, 25, 10^4), cbPartitions =
#> pkg_partitions(100, : less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#>          expr   min    lq  mean median    uq   max neval
#>   cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    25
#>  cbPartitions 23.94 23.45 23.17  23.31 22.22 32.84    25

And generating 10^5 random samples takes no time, especially when using multiple threads:

system.time(partitionsSample(0:100, 25, TRUE, nThreads = 6,
                             n = 1e5, seed = 42))
#>    user  system elapsed 
#>   1.973   0.004   0.348


system.time(compositionsSample(0:100, 25, TRUE, nThreads = 6,
                               n = 1e5, seed = 28))
#>    user  system elapsed 
#>   0.300   0.001   0.062