Search code examples
rconstraintspermutation

Constrained matrices


I need to create all the possible matrix of dimension 5 (5x5), where all elements are integers from 0 to 100 and its sum is 100.

I'm not sure how to do it, or how to start... Any suggestion?

Despite I program in R, I'm looking for an idea of how to do it. Pseucode is fine.

My firs approach was getting all the permutations of 100 elements 25 times (one for each element in the matrix) and then take only those that sum 100. But that is 100^25 permutations... no way to do it in this by this approach.

I will thanks any idea and/or help!


Solution

  • The OP is looking for all integer partitions of the number 100 of maximal length of 25. The package partitions is equipped with a function exactly for this purpose called restrictedparts. E.g.:

    library(partitions)
    
    ## Keep the output tidy
    options(digits = 4)
    options(width = 90)
    
    ## all integer partitions of 10 of maximal length = 4
    restrictedparts(10, 4)
    #>                                                    
    #> [1,] 10 9 8 7 6 5 8 7 6 5 6 5 4 4 7 6 5 4 5 4 3 4 3
    #> [2,]  0 1 2 3 4 5 1 2 3 4 2 3 4 3 1 2 3 4 2 3 3 2 3
    #> [3,]  0 0 0 0 0 0 1 1 1 1 2 2 2 3 1 1 1 1 2 2 3 2 2
    #> [4,]  0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2
    

    Once all of the them have been generated, simply create a 5x5 matrix of each combinations (restrictedparts doesn’t differentiate between 0 0 3 and 0 3 0). The only problem is that there are so many possible combinations (partitions::R(25, 100, TRUE) = 139620591) the function throws an error when you call restrictedparts(100, 25).

    test <- restrictedparts(100, 25)
    #> Warning in restrictedparts(100, 25): NAs introduced by coercion to integer range
    #> Error in restrictedparts(100, 25): NAs in foreign function call (arg 3)
    

    Since we can’t generate them all via restrictedparts, we can generate them individually using firstrestrictedpart along with nextrestrictedpart like so:

    funPartition <- function(p, n) {
        mat <- matrix(nrow = 25, ncol = n)
        mat[, 1] <- p
    
        for (i in 2:n) {
            p <- nextrestrictedpart(p)
            mat[, i] <- p
        }
    
        mat
    }
    
    head(funPartition(firstrestrictedpart(100, 25), 5))
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]  100   99   98   97   96
    #> [2,]    0    1    2    3    4
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    0
    #> [5,]    0    0    0    0    0
    #> [6,]    0    0    0    0    0
    

    The only problem here is that the iterators aren’t as efficient due continuously copying.

    Enter RcppAlgos

    There is a faster way using the package RcppAlgos (I am the author). Similar to the partitions package, there is a function, partitionsGeneral, for generating all of partitions.

    library(RcppAlgos)
    ## Target is implicitly set to 100 below. For different targets, explicitly
    ## set the target parameter. E.g.:
    ##
    ##     partitionsGeneral(0:100, 25, TRUE, target = 200, upper = 10^5)
    ##
    ## Will generate the first 10^5 partitions of 200 using the vector 0:100
    
    matrixParts <- apply(
        partitionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
        1, \(x) matrix(x, ncol = 5), simplify = FALSE
    )
    
    all(sapply(matrixParts, sum) == 100)
    #> [1] TRUE
    
    
    matrixParts[c(1, 90, 10^5)]
    #> [[1]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    0
    #> [5,]    0    0    0    0  100
    #> 
    #> [[2]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    1
    #> [4,]    0    0    0    0   39
    #> [5,]    0    0    0    0   60
    #> 
    #> [[3]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    5
    #> [2,]    0    0    0    0   13
    #> [3,]    0    0    0    0   17
    #> [4,]    0    0    0    0   27
    #> [5,]    0    0    0    2   36
    

    A Better Approach: Iterators

    There are memory efficient iterators available as well for many topics in combinatorics including integer partitions (E.g. partitionsIter).

    Using iterators, we could create a helper function that could transform each result to our desired matrix.

    matFromIter <- function(it, ncol = 5L) {
        matrix(it@nextIter(), ncol = ncol)
    }
    
    ## Initialize partitions iterator
    it <- partitionsIter(0:100, 25, repetition = TRUE)
    ## Get the first 3 results
    replicate(3, matFromIter(it))
    #> , , 1
    #> 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    0
    #> [5,]    0    0    0    0  100
    #> 
    #> , , 2
    #> 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    1
    #> [5,]    0    0    0    0   99
    #> 
    #> , , 3
    #> 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    2
    #> [5,]    0    0    0    0   98
    
    ## Get 2 more picking up where we left off above
    replicate(2, matFromIter(it))
    #> , , 1
    #> 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    3
    #> [5,]    0    0    0    0   97
    #> 
    #> , , 2
    #> 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    4
    #> [5,]    0    0    0    0   96
    
    ## Reset iterator
    it@startOver()
    ## Get random lexicographical result using the method: `[[`
    matrix(it[[1e6]], ncol = 5)
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    7
    #> [2,]    0    0    0    0   10
    #> [3,]    0    0    0    2   11
    #> [4,]    0    0    0    2   22
    #> [5,]    0    0    0    2   44
    
    ## Get the last one
    matrix(it@back(), ncol = 5)
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    4    4    4    4    4
    #> [2,]    4    4    4    4    4
    #> [3,]    4    4    4    4    4
    #> [4,]    4    4    4    4    4
    #> [5,]    4    4    4    4    4
    

    Need Permutations?

    If you really want permutations, no problem, just call compositionsGeneral:

    matrixComps <- apply(
        compositionsGeneral(0:100, 25, repetition = TRUE, upper = 10^5),
        1, \(x) matrix(x, ncol = 5), simplify = FALSE
    )
    
    all(sapply(matrixComps, sum) == 100)
    #> [1] TRUE
    
    
    ## Compare to the output of matrixCombs
    matrixComps[c(1, 90, 10^5)]
    #> [[1]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0    0
    #> [5,]    0    0    0    0  100
    #> 
    #> [[2]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0    0
    #> [3,]    0    0    0    0    0
    #> [4,]    0    0    0    0   89
    #> [5,]    0    0    0    0   11
    #> 
    #> [[3]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    0    0    0
    #> [2,]    0    0    0    0   27
    #> [3,]    0    0    0    0    6
    #> [4,]    0    0    0    0   51
    #> [5,]    0    0    0    0   16
    

    Random Sampling

    Since the number of results is so massive, sampling may be our best option. Consider how many total results we are dealing with:

    partitionsCount(0:100, 25, TRUE)
    #> [1] 139620591
    
    
    compositionsCount(0:100, 25, TRUE)
    #> Big Integer ('bigz') :
    #> [1] 87676181447775191489836
    

    We can use either partitionsSample or compositionsSample to quickly generate a candidates that can be transformed into the desired matrix output.

    ## Optional, use seed parameter for reproducibility
    apply(partitionsSample(0:100, 25, TRUE, n = 3, seed = 42), 1, \(x) {
        matrix(x, ncol = 5)
    }, simplify = FALSE)
    #> [[1]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    0    4    7    7
    #> [2,]    0    0    4    7    7
    #> [3,]    0    1    4    7    8
    #> [4,]    0    1    5    7    8
    #> [5,]    0    1    5    7   10
    #> 
    #> [[2]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    1    1    4    4    5
    #> [2,]    1    1    4    5    5
    #> [3,]    1    2    4    5    5
    #> [4,]    1    2    4    5   11
    #> [5,]    1    3    4    5   16
    #> 
    #> [[3]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    1    1    1    8
    #> [2,]    0    1    1    1   11
    #> [3,]    0    1    1    2   16
    #> [4,]    0    1    1    6   17
    #> [5,]    0    1    1    8   20
    
    
    apply(compositionsSample(0:100, 25, TRUE, n = 3, seed = 28), 1, \(x) {
        matrix(x, ncol = 5)
    }, simplify = FALSE)
    #> [[1]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    2    6    1    2
    #> [2,]    0    2    1    6    2
    #> [3,]   12    2    3    1    1
    #> [4,]    3    2    3   24    1
    #> [5,]    7    4    4    5    6
    #> 
    #> [[2]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    0    1    9    4    5
    #> [2,]    6    2    1    4    7
    #> [3,]    1    4   24    4    2
    #> [4,]    3    2    2    1    6
    #> [5,]    1    7    2    1    1
    #> 
    #> [[3]]
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    1    1    3    9    3
    #> [2,]    3    2    8    1    3
    #> [3,]    8    5    6    2    6
    #> [4,]    3    3   11    1    2
    #> [5,]    1    3    4    5    6
    

    Efficiency

    All of the functions are written in C++ for ultimate efficiency. Consider iterating over 10,000 partitions.

    library(microbenchmark)
    pkg_partitions <- function(n, k, total) {
        a <- firstrestrictedpart(n, k)
        for (i in 1:(total - 1)) a <- nextrestrictedpart(a)
    }
    
    pkg_RcppAlgos <- function(n, k, total) {
        a <- partitionsIter(0:n, k, repetition = TRUE)
        for (i in 1:total) a@nextIter()
    }
    
    microbenchmark(cbRcppAlgos  = pkg_RcppAlgos(100, 25, 10^4),
                   cbPartitions = pkg_partitions(100, 25, 10^4),
                   times = 25, unit = "relative")
    #> Warning in microbenchmark(cbRcppAlgos = pkg_RcppAlgos(100, 25, 10^4), cbPartitions =
    #> pkg_partitions(100, : less accurate nanosecond times to avoid potential integer overflows
    #> Unit: relative
    #>          expr   min    lq  mean median    uq   max neval
    #>   cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    25
    #>  cbPartitions 23.94 23.45 23.17  23.31 22.22 32.84    25
    

    And generating 10^5 random samples takes no time, especially when using multiple threads:

    system.time(partitionsSample(0:100, 25, TRUE, nThreads = 6,
                                 n = 1e5, seed = 42))
    #>    user  system elapsed 
    #>   1.973   0.004   0.348
    
    
    system.time(compositionsSample(0:100, 25, TRUE, nThreads = 6,
                                   n = 1e5, seed = 28))
    #>    user  system elapsed 
    #>   0.300   0.001   0.062