So... I'm trying to optimize for loppings in R.
My problem is: I have a multidimensional array (6 layers) and I want to get a matrix as answer. For each cell position (i,j) I need to apply a function that uses the 6 values in the position i,j and returns for the probability for each pixel.
My first idea was to create two nested for-loops scanning each pixel position and applying this function. However, as my matrix has many elements (approximately 2000 x 2000 x 6), it is taking TOO long to calculate the function at each pixel position. I would like to know if there is any way to optimize this calculation. I tried using the "apply" function using both dimensions as parameters of MARGIN {MARGIN = c(1,2)
} but apparently I'm computing something wrong.
Below is an example of the first version of my code:
data <- c(
8247, 9240, 8875, 20534, 14334, 10797,
8090, 9081, 8364, 21301, 13496, 9599,
8089, 8922, 8620, 19769, 13601, 9749,
8477, 9554, 9383, 18693, 14649, 10498,
8476, 9711, 9637, 17616, 14649, 10348,
8164, 8921, 8873, 16844, 12763, 9449,
8008, 8285, 8106, 17309, 12239, 8999,
7929, 8445, 7977, 19003, 12449, 8999,
7850, 8284, 7977, 18080, 12345, 8850,
8083, 8443, 7976, 19158, 12449, 8999,
8401, 9713, 9638, 18691, 14858, 10947,
8400, 9555, 9257, 18846, 14544, 10498,
8399, 9239, 9256, 18077, 15068, 10947,
8477, 9554, 9510, 17924, 15382, 11096,
8397, 9711, 9637, 17770, 14859, 10648,
8164, 9238, 8873, 18079, 13183, 9749,
7852, 8603, 8106, 18695, 12449, 9150,
7774, 8603, 7977, 18850, 12449, 8999,
7773, 8444, 8105, 18235, 12345, 8999,
7849, 8443, 8104, 18235, 12449, 9300,
8556, 9713, 9893, 18230, 15277, 11246,
8632, 9712, 9893, 17769, 15591, 10947,
8631, 9397, 9638, 17461, 16324, 11546,
8476, 9396, 9383, 17924, 15591, 11397,
8553, 9553, 9637, 18078, 14649, 10648,
8163, 8920, 8873, 17771, 13078, 9898,
7852, 8603, 8105, 18542, 12555, 8999,
7851, 8444, 8105, 19311, 12659, 8999,
7772, 8444, 8104, 18851, 12555, 8999,
7771, 8443, 8104, 19005, 12555, 8999,
8478, 9871, 10020, 17615, 15068, 10948,
8632, 9712, 10020, 17615, 15381, 11246,
8554, 9554, 9766, 17461, 15382, 11096,
8476, 9554, 9510, 17924, 15172, 11097,
8397, 9553, 9383, 17462, 15278, 11097,
8086, 9078, 8745, 18849, 13812, 10199,
7695, 8761, 8233, 20233, 13079, 9150,
7851, 8602, 8233, 20387, 12869, 9450,
7927, 8602, 8104, 20388, 12869, 9150,
7926, 8760, 8232, 20849, 13393, 9599,
8633, 9871, 9893, 17461, 15278, 11247,
8633, 9713, 10020, 17308, 15278, 11247,
8477, 9870, 9893, 17615, 15172, 11246,
8708, 9711, 9637, 17616, 15172, 10947,
8397, 9238, 9382, 17462, 15068, 10797,
8086, 8920, 8745, 18695, 14126, 10199,
7929, 8919, 8233, 20847, 13393, 9150,
7850, 8919, 8360, 20847, 13393, 8999,
7927, 8760, 8232, 20848, 13183, 9450,
7926, 8759, 8231, 20849, 13289, 9450,
8710, 9713, 10020, 17770, 15278, 11097,
8632, 9713, 9893, 17770, 15173, 11098,
8476, 9870, 9893, 18079, 15069, 11247,
8397, 9711, 9765, 17770, 15068, 10947,
8164, 9238, 9128, 17771, 14545, 10648,
8085, 8920, 8361, 19003, 13602, 9898,
8007, 8919, 8361, 20847, 13393, 9150,
7927, 8919, 8360, 20848, 13497, 9300,
7927, 8760, 8360, 21155, 13393, 9450,
7848, 8759, 8231, 20543, 13393, 9450,
8556, 9871, 9893, 17462, 14963, 11097,
8555, 9713, 9893, 17462, 14964, 11098,
8632, 9712, 9766, 17771, 15279, 11098,
8554, 9712, 9511, 18388, 14650, 10648,
8319, 9395, 9128, 20386, 13916, 10199,
8085, 9078, 8489, 20693, 13602, 9599,
8084, 8919, 8233, 21307, 13497, 9300,
8006, 8919, 8232, 21614, 13393, 9450,
8005, 8760, 8232, 21155, 13393, 8999,
8004, 8759, 8103, 20543, 13079, 9450,
8633, 9870, 9893, 17308, 15278, 11247,
8555, 9870, 10020, 17462, 15173, 11098,
8554, 9555, 9511, 18233, 14860, 10948,
8009, 8922, 8618, 19926, 13917, 10199,
7852, 8604, 8106, 21766, 13498, 9599,
8007, 8761, 8233, 21306, 13497, 9300,
7928, 8761, 8232, 21001, 13497, 9150,
7849, 8602, 8232, 20388, 13183, 9300,
7926, 8601, 8231, 20235, 13183, 9150,
7770, 8601, 8103, 20083, 13184, 9150,
8323, 9397, 9257, 19156, 14963, 10797,
8399, 9396, 9257, 18849, 15279, 10797,
8243, 9238, 8873, 20079, 14336, 10049,
7853, 8604, 8234, 20234, 13288, 9150,
8008, 8445, 8106, 20235, 13078, 9300,
7928, 8444, 8105, 19927, 12973, 9300,
7850, 8444, 8105, 18389, 12869, 9300,
7849, 8602, 8232, 18389, 12764, 9300,
7848, 8442, 8231, 20082, 12659, 9450,
7847, 8442, 8103, 20390, 12869, 9150,
8090, 8764, 8492, 20232, 13183, 9300,
8011, 8605, 8364, 19925, 13183, 9300,
7932, 8446, 8107, 19926, 13078, 9150,
8009, 8445, 7978, 19158, 12764, 9000,
7930, 8603, 8105, 19313, 12555, 9150,
7929, 8603, 8233, 19928, 12660, 9300,
7927, 8444, 7976, 19312, 12555, 8999,
7849, 8601, 8104, 19159, 12555, 8850,
7848, 8442, 8103, 20082, 13079, 9300,
7925, 8600, 8103, 20390, 13289, 9300
)
image <- array(data, dim = c(10, 10, 6))
num_classes = 2
params<- vector("list", num_classes)
mean_vector1 = c(7889.362, 8412.398, 8046.333, 17008.465, 11883.484, 8895.631)
mean_vector2 = c(8055.359, 8979.759, 8421.408, 21060.805, 13810.918, 9664.242)
covariance_matrix1 = matrix(c(
9004.794, 3393.195, 3512.241, 11185.344, 8613.778, 4262.351,
3393.195, 12405.309, 6722.598, 33555.189, 15467.398, 6200.901,
3512.241, 6722.598, 14015.815, 21658.040, 15597.915, 8170.201,
11185.344, 33555.189, 21658.040, 567199.44, 161168.165, 46471.445,
8613.778, 15467.398, 15597.915, 161168.16, 137379.407, 45141.111,
4262.351, 6200.901, 8170.201, 46471.44, 45141.111, 35420.010
), nrow = 6, byrow = TRUE)
covariance_matrix2 = matrix(c(
13137.14, 16641.66, 13388.65, 53306.99, 42323.70, 24993.38,
16641.66, 50107.41, 32027.36, 136436.34, 96649.78, 54599.03,
13388.65, 32027.36, 33801.94, 66746.02, 75361.02, 45798.90,
53306.99, 136436.34, 66746.02, 1037747.24, 418523.62, 203725.86,
42323.70, 96649.78, 75361.02, 418523.62, 306097.86, 167889.17,
24993.38, 54599.03, 45798.90, 203725.86, 167889.17, 115735.97
), nrow = 6, byrow = TRUE)
params[[1]] <- list(mean = mean_vector1, covariance_matrix = covariance_matrix1)
params[[2]] <- list(mean = mean_vector2, covariance_matrix = covariance_matrix2)
classified_image <- matrix(0, nrow(image), ncol(image))
for (row in 1:nrow(image)) {
for (col in 1:ncol(image)) {
pixel <- image[row, col,]
class_probabilities <- numeric(num_classes)
# Calcular probabilidades para cada classe
for (i in 1:num_classes) {
parameters <- params[[i]]
class_probabilities[i] <- gaussian_probability(pixel,parameters)
}
# Atribuir a classe com maior probabilidade
max_prob_class <- which.max(class_probabilities)
classified_image[row, col] <- max_prob_class
}
}
gaussian_probability <- function(pixel,parameters){
mean_vector = parameters$mean
d=6
covariance_matrix=parameters$covariance_matrix
det_cov <- det(covariance_matrix)
inv_cov <- solve(covariance_matrix)
exponent = -0.5 * (pixel - mean_vector) %*% inv_cov %*% as.vector(t(pixel - mean_vector))
coef = 1 /sqrt(((2*pi)^d)*det_cov)
probability = coef * exp(exponent)
return(probability)}
In my second version, I'm trying to use something like this (just to show what I'm trying to do):
image_probabilities <- array(0, dim = c(10, 10, num_classes))
for (i in 1:num_classes){
parameters <- params[[i]]
image_probabilities[[i]] <- gaussian_class_likelihood(image,parameters)}
gaussian_class_likelihood <- function(image, parameters){
image.array = as.array(image)
probability_image <- apply(image.array, MARGIN = c(1,2), function(pixel) {
class_probabilities <- gaussian_probability(pixel, parameters)
return(class_probabilities)
})
return(probability_image)}
The first approach is impractical because of the amount of time it takes to calculate each pixel...Could you help me to solve this problem?
It appears that the final product is classified_image
, rather than the probabilities for each pixel. The solution below uses three strategies:
sqrt((2*pi)^6)
does not affect the result of a comparison to determine which one is larger, and neither does taking the square root of both probabilities). If the actual probabilities are needed for some other portion of the code not shown in the question, completing the calculations is straightforward.Applying the two solutions as functions to compare timings:
Proposed solution:
f2 <- function() {
image2 <- matrix(image, 6, 100, 1)
mean_vector <- list(mean_vector1, mean_vector2)
covariance_matrix <- list(covariance_matrix1, covariance_matrix2)
image_centered <- lapply(mean_vector, \(v) image2 - v)
det_cov <- lapply(covariance_matrix, det)
inv_cov <- lapply(covariance_matrix, solve)
# get the image classifications
matrix(
max.col(
-mapply(
function(i) {
rowSums(
crossprod(image_centered[[i]], inv_cov[[i]])*t(image_centered[[i]])
) + log(det_cov[[i]])
},
1:num_classes
), "f"
), 10, 10
)
}
Original solution (modified to use log probabilities):
f1 <- function() {
params <- vector("list", num_classes)
params[[1]] <- list(mean = mean_vector1, covariance_matrix = covariance_matrix1)
params[[2]] <- list(mean = mean_vector2, covariance_matrix = covariance_matrix2)
classified_image <- matrix(0L, nrow(image), ncol(image))
gaussian_probability <- function(pixel,parameters){
mean_vector = parameters$mean
d=6
covariance_matrix=parameters$covariance_matrix
det_cov <- det(covariance_matrix)
inv_cov <- solve(covariance_matrix)
return(-(pixel - mean_vector) %*% inv_cov %*% as.vector(t(pixel - mean_vector)) - log(det_cov))
}
for (row in 1:nrow(image)) {
for (col in 1:ncol(image)) {
pixel <- image[row, col,]
class_probabilities <- numeric(num_classes)
# Calcular probabilidades para cada classe
for (i in 1:num_classes) {
parameters <- params[[i]]
class_probabilities[i] <- gaussian_probability(pixel,parameters)
}
# Atribuir a classe com maior probabilidade
max_prob_class <- which.max(class_probabilities)
classified_image[row, col] <- max_prob_class
}
}
classified_image
}
Timing (and checking that the results are identical):
microbenchmark::microbenchmark(
f1(),
f2(),
check = "identical"
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> f1() 4874.0 5122.95 5697.924 5341.0 5706.90 8474.6 100
#> f2() 104.5 120.35 156.873 147.1 184.05 299.9 100