In R Program, I have a matrix containing 0 and 1's. See Below
0 1 0 1 0 0
0 1 1 0 0 0
0 0 0 1 0 1
0 1 1 0 1 0
0 0 0 0 0 1
I want to fill the matrix byrow from values listed (c("J" "J" "A" "A" "A" "A" "... continue "A" until end of matrix") but begin at the first 1 in each row. See below:
0 J J A A A
0 J J A A A
0 0 0 J J A
0 J J A A A
0 0 0 0 0 J
As of now, I have created a values list and a function to determine where the first 1 is. I'm lost on how to apply this to get the matrix I want.
pattern<- c("A","A","A","A","A")
pattern <- c("J","J", rep(pattern, length.out = ncol(Matrix)-2))
indices<- apply(Matrix, 1, function(row) min(which(row == 1)))
A few vectorized options:
m <- max.col(cbind(mat, 1L), "f")
m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] "0" "J" "J" "A" "A" "A"
#> [2,] "0" "J" "J" "A" "A" "A"
#> [3,] "0" "0" "0" "J" "J" "A"
#> [4,] "0" "J" "J" "A" "A" "A"
#> [5,] "0" "0" "0" "0" "0" "J"
Or
m <- max.col(cbind(mat, 1L), "f")
array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] "0" "J" "J" "A" "A" "A"
#> [2,] "0" "J" "J" "A" "A" "A"
#> [3,] "0" "0" "0" "J" "J" "A"
#> [4,] "0" "J" "J" "A" "A" "A"
#> [5,] "0" "0" "0" "0" "0" "J"
Or
matrix(
rep.int(c("0", "J", "A"), c(ncol(mat), 2L, ncol(mat) - 2L))[
sequence(rep(ncol(mat), nrow(mat)), ncol(mat) - max.col(cbind(mat, 1L), "f") + 2L)
], nrow(mat), ncol(mat), 1
)
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] "0" "J" "J" "A" "A" "A"
#> [2,] "0" "J" "J" "A" "A" "A"
#> [3,] "0" "0" "0" "J" "J" "A"
#> [4,] "0" "J" "J" "A" "A" "A"
#> [5,] "0" "0" "0" "0" "0" "J"
Benchmarking shows the advantage of using a vectorized approach. Define various approaches as functions:
f1 <- function(mat) {
m <- max.col(cbind(mat, 1L), "f")
m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
}
f2 <- function(mat) {
m <- max.col(cbind(mat, 1L), "f")
array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
}
f3 <- function(mat) {
d <- dim(mat)
matrix(
rep.int(c("0", "J", "A"), c(d[2], 2L, d[2] - 2L))[
sequence(rep(d[2], d[1]), d[2] - max.col(cbind(mat, 1L), "f") + 2L)
], d[1], d[2], 1
)
}
Andre <- function(mat) {
# from Andre Wildberg
t(apply(mat, 1, function(x){
res <- which(x == 1)[1] - 1
res <- replace(res, is.na(res), length(x))
c(x[0:res],
rep("J", min(c(2, (length(x) - res)))),
rep("A", max(c(0, (length(x) - res) - 2))))}))
}
Benchmark on a large-ish matrix.
mat <- matrix(sample(0:1, 1e5, 1, c(0.75, 0.25)), 1e4)
microbenchmark::microbenchmark(
f1 = f1(mat),
f2 = f2(mat),
f3 = f3(mat),
Andre = Andre(mat),
check = "equal"
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> f1 1.391700 1.894001 2.127106 1.967701 2.097001 7.966101 100
#> f2 1.616000 2.240750 2.691387 2.361451 2.590051 7.142301 100
#> f3 1.118401 1.570600 1.745991 1.619800 1.739251 5.924802 100
#> Andre 68.022601 70.696101 73.181934 72.200000 73.931750 117.784401 100