I have the following matrix:
Mat1 <- structure(c("Procedure_B", "Procedure_C", "Procedure_B", NA,
"Procedure_B", "Procedure_A", "Procedure_C", "Procedure_B", NA,
"Procedure_B", NA, "Procedure_B", NA, NA, "Procedure_A", "Procedure_A",
"Procedure_C", "Procedure_A", "Procedure_A", "Procedure_B", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_B", "Procedure_A", "Procedure_A",
NA, NA, "Procedure_C", NA, "Procedure_C", NA, "Procedure_A",
"Procedure_B", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_B",
"Procedure_A", "Procedure_B", "Procedure_C", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_C", "Procedure_C", "Procedure_A", NA,
NA, NA, NA, NA, NA, "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A", "Procedure_A",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B", "Procedure_B",
"Procedure_B", "Procedure_B", "Procedure_B", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C", "Procedure_C",
"Procedure_C", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), dim = c(53L, 5L))
I want to sample one value from each row, with the following constant probabilities:
P = c(0.99, 0.002992, 0.003186, 0.003018, 0.000804)
i.e. for each row sample each of its 5 values with these probabilities
The expected output is 53 values.
I tried:
sample(Mat1, size = nrow(Mat1), prob = rep(P, nrow(Mat1)), replace = T)
However outcome doesnt fit the expected distribution. I dont want to do this in a loop/apply as my matrix can have many rows.
What is wrong with this command?
You can sample column indices, and extract values from the matrix according to the sampled column numbers.
This method is vectorized.
set.seed(1)
cols <- sample(1:ncol(Mat1), size = nrow(Mat1), replace = TRUE, prob = P)
Mat1[cbind(1:nrow(Mat1), cols)]
# [1] "Procedure_B" "Procedure_C" "Procedure_B" NA "Procedure_B"
# [6] "Procedure_A" "Procedure_C" "Procedure_B" NA "Procedure_B"
# [11] NA "Procedure_B" NA NA "Procedure_A"
# [16] "Procedure_A" "Procedure_C" "Procedure_B" "Procedure_A" "Procedure_B"
# [21] "Procedure_C" "Procedure_C" "Procedure_C" "Procedure_B" "Procedure_A"
# [26] "Procedure_A" NA NA "Procedure_C" NA
# [31] "Procedure_C" NA "Procedure_A" "Procedure_B" "Procedure_A"
# [36] "Procedure_A" "Procedure_A" "Procedure_B" "Procedure_A" "Procedure_B"
# [41] "Procedure_C" "Procedure_B" "Procedure_B" "Procedure_B" "Procedure_C"
# [46] "Procedure_C" "Procedure_A" NA NA NA
# [51] NA NA NA