Search code examples
rmatrixapplyfill

In R Program, want to fill a matrix from vector values across rows at a a specific column with specific value


In R Program, I have a matrix containing 0 and 1's. See Below

0 1 0 1 0 0 
0 1 1 0 0 0 
0 0 0 1 0 1
0 1 1 0 1 0 
0 0 0 0 0 1

I want to fill the matrix byrow from values listed (c("J" "J" "A" "A" "A" "A" "... continue "A" until end of matrix") but begin at the first 1 in each row. See below:

0 J J A A A
0 J J A A A 
0 0 0 J J A
0 J J A A A
0 0 0 0 0 J

As of now, I have created a values list and a function to determine where the first 1 is. I'm lost on how to apply this to get the matrix I want.

pattern<- c("A","A","A","A","A") pattern <- c("J","J", rep(pattern, length.out = ncol(Matrix)-2)) indices<- apply(Matrix, 1, function(row) min(which(row == 1)))


Solution

  • A few vectorized options:

    m <- max.col(cbind(mat, 1L), "f")
    m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
    matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
    #>      [,1] [,2] [,3] [,4] [,5] [,6]
    #> [1,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [2,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [3,] "0"  "0"  "0"  "J"  "J"  "A" 
    #> [4,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [5,] "0"  "0"  "0"  "0"  "0"  "J"
    

    Or

    m <- max.col(cbind(mat, 1L), "f")
    array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
    #>      [,1] [,2] [,3] [,4] [,5] [,6]
    #> [1,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [2,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [3,] "0"  "0"  "0"  "J"  "J"  "A" 
    #> [4,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [5,] "0"  "0"  "0"  "0"  "0"  "J"
    

    Or

    matrix(
      rep.int(c("0", "J", "A"), c(ncol(mat), 2L, ncol(mat) - 2L))[
        sequence(rep(ncol(mat), nrow(mat)), ncol(mat) - max.col(cbind(mat, 1L), "f") + 2L)
      ], nrow(mat), ncol(mat), 1
    )
    #>      [,1] [,2] [,3] [,4] [,5] [,6]
    #> [1,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [2,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [3,] "0"  "0"  "0"  "J"  "J"  "A" 
    #> [4,] "0"  "J"  "J"  "A"  "A"  "A" 
    #> [5,] "0"  "0"  "0"  "0"  "0"  "J"
    

    Benchmarking shows the advantage of using a vectorized approach. Define various approaches as functions:

    f1 <- function(mat) {
      m <- max.col(cbind(mat, 1L), "f")
      m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
      matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
    }
    
    f2 <- function(mat) {
      m <- max.col(cbind(mat, 1L), "f")
      array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
    }
    
    f3 <- function(mat) {
      d <- dim(mat)
      matrix(
        rep.int(c("0", "J", "A"), c(d[2], 2L, d[2] - 2L))[
          sequence(rep(d[2], d[1]), d[2] - max.col(cbind(mat, 1L), "f") + 2L)
        ], d[1], d[2], 1
      )
    }
    
    Andre <- function(mat) {
      # from Andre Wildberg
      t(apply(mat, 1, function(x){
        res <- which(x == 1)[1] - 1
        res <- replace(res, is.na(res), length(x))
        c(x[0:res], 
          rep("J", min(c(2, (length(x) - res)))),
          rep("A", max(c(0, (length(x) - res) - 2))))}))
    }
    

    Benchmark on a large-ish matrix.

    mat <- matrix(sample(0:1, 1e5, 1, c(0.75, 0.25)), 1e4)
    
    microbenchmark::microbenchmark(
      f1 = f1(mat),
      f2 = f2(mat),
      f3 = f3(mat),
      Andre = Andre(mat),
      check = "equal"
    )
    #> Unit: milliseconds
    #>   expr       min        lq      mean    median        uq        max neval
    #>     f1  1.391700  1.894001  2.127106  1.967701  2.097001   7.966101   100
    #>     f2  1.616000  2.240750  2.691387  2.361451  2.590051   7.142301   100
    #>     f3  1.118401  1.570600  1.745991  1.619800  1.739251   5.924802   100
    #>  Andre 68.022601 70.696101 73.181934 72.200000 73.931750 117.784401   100