In R Program, want to fill a matrix from vector values across rows at a a specific column with specific value

In R Program, I have a matrix containing 0 and 1's. See Below

0 1 0 1 0 0 
0 1 1 0 0 0 
0 0 0 1 0 1
0 1 1 0 1 0 
0 0 0 0 0 1

I want to fill the matrix byrow from values listed (c("J" "J" "A" "A" "A" "A" "... continue "A" until end of matrix") but begin at the first 1 in each row. See below:

0 J J A A A
0 J J A A A 
0 0 0 J J A
0 J J A A A
0 0 0 0 0 J

As of now, I have created a values list and a function to determine where the first 1 is. I'm lost on how to apply this to get the matrix I want.

pattern<- c("A","A","A","A","A") pattern <- c("J","J", rep(pattern, length.out = ncol(Matrix)-2)) indices<- apply(Matrix, 1, function(row) min(which(row == 1)))

Solution

A few vectorized options:

m <- max.col(cbind(mat, 1L), "f")
m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
#>      [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [2,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [3,] "0"  "0"  "0"  "J"  "J"  "A" 
#> [4,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [5,] "0"  "0"  "0"  "0"  "0"  "J"

m <- max.col(cbind(mat, 1L), "f")
array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
#>      [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [2,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [3,] "0"  "0"  "0"  "J"  "J"  "A" 
#> [4,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [5,] "0"  "0"  "0"  "0"  "0"  "J"

matrix(
  rep.int(c("0", "J", "A"), c(ncol(mat), 2L, ncol(mat) - 2L))[
    sequence(rep(ncol(mat), nrow(mat)), ncol(mat) - max.col(cbind(mat, 1L), "f") + 2L)
  ], nrow(mat), ncol(mat), 1
)
#>      [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [2,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [3,] "0"  "0"  "0"  "J"  "J"  "A" 
#> [4,] "0"  "J"  "J"  "A"  "A"  "A" 
#> [5,] "0"  "0"  "0"  "0"  "0"  "J"

Benchmarking shows the advantage of using a vectorized approach. Define various approaches as functions:

f1 <- function(mat) {
  m <- max.col(cbind(mat, 1L), "f")
  m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
  matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
}

f2 <- function(mat) {
  m <- max.col(cbind(mat, 1L), "f")
  array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
}

f3 <- function(mat) {
  d <- dim(mat)
  matrix(
    rep.int(c("0", "J", "A"), c(d[2], 2L, d[2] - 2L))[
      sequence(rep(d[2], d[1]), d[2] - max.col(cbind(mat, 1L), "f") + 2L)
    ], d[1], d[2], 1
  )
}

Andre <- function(mat) {
  # from Andre Wildberg
  t(apply(mat, 1, function(x){
    res <- which(x == 1)[1] - 1
    res <- replace(res, is.na(res), length(x))
    c(x[0:res], 
      rep("J", min(c(2, (length(x) - res)))),
      rep("A", max(c(0, (length(x) - res) - 2))))}))
}

Benchmark on a large-ish matrix.

mat <- matrix(sample(0:1, 1e5, 1, c(0.75, 0.25)), 1e4)

microbenchmark::microbenchmark(
  f1 = f1(mat),
  f2 = f2(mat),
  f3 = f3(mat),
  Andre = Andre(mat),
  check = "equal"
)
#> Unit: milliseconds
#>   expr       min        lq      mean    median        uq        max neval
#>     f1  1.391700  1.894001  2.127106  1.967701  2.097001   7.966101   100
#>     f2  1.616000  2.240750  2.691387  2.361451  2.590051   7.142301   100
#>     f3  1.118401  1.570600  1.745991  1.619800  1.739251   5.924802   100
#>  Andre 68.022601 70.696101 73.181934 72.200000 73.931750 117.784401   100