Search code examples
rfor-loopgeospatialrasterterra

How may one iterate over a SpatRaster's cell values?


I am coming from a Python background, but have been working with R's terra package lately in order to better deal with geospatial raster data.

I have been wanting to create a for loop that allows one to iterate over a SpatRaster's cells' values, in order to populate another SpatRaster of the same size, resolution and coord. ref. depending on some conditions.

My goal is to create a loop similar to the one below; this would work for a Python list-of-lists, and I am used to data frames having this structure in Python, but I have been unable to create something equivalent in R:

for row in raster_1:
 for column in row:
  if value_1 <= [row, column] < value_2:
   raster_2[row, column] = 5

#---------------------------------------------------------------------------------------------

Edit: I have been able to create a loop similar to the one below, however, I am not sure whether this could be the most efficient solution when dealing with rasters with over 1 000 000 cells each and 24 different "if".

# Load required libraries
library(terra)

# define rast_1 where values in each row are constant 
repeated_values_1 <- rep(71:75, each = 5)
m_1 <- matrix(repeated_values_1, nrow = 5, byrow = TRUE)
rast_1 <- rast(m_1) |> `names<-`("raster_1")

# define rast_2 where values in each column are constant 
repeated_values_2 <- rep(1:5, each = 5)
m_2 <- matrix(repeated_values_2, ncol = 5)
rast_2 <- rast(m_2) |> `names<-`("raster_2")

# define rast_3 which has the same resolution and extent as rast_1 and rast_2
rast_3 <- rast(rast_1, names = "raster_3", vals = NA)

# iterate over every cell of both rast_1 and rast_2
for (row in 1:nrow(rast_1)) {
  for (col in 1:ncol(rast_1)) {
    value_1 <- (rast_1)[row, col]
    value_2 <- (rast_2)[row, col]
    
    # check whether the values of rast_1 and rast_2 in the same position
    # satisfy certain conditional statements
    if (1 <= value_2 && value_2 < 3 && 70 <= value_1 && value_1 < 73) {
      rast_3[row, col] <- 0.1
    } else if (3 <= value_2 && value_1 >= 75){
      rast_3[row, col] <- 0.2
    }
  }
}

# plotting for comparison
layout(matrix(1:3, nrow = 1, ncol = 3))
plot(rast_1, main="rast_1")
plot(rast_2, main="rast_2")
plot(rast_3, main="rast_3")

Solution

  • You should generally avoid looping through raster cells as much as possible and use subsetting, raster algbera, terra::ifel() and other {terra} methods - https://rspatial.github.io/terra/reference/terra-package.html .

    It's not just some 1 .. 5% performance gain, even for tiny rasters looping through every single cell, applying some logic and reading/writing a single cell value at a time can easily be 1000+ times slower compared to more suitable methods.

    E.g. consider something like this to move over only specific values and to set a range to a fixed value:

    library(terra)
    #> terra 1.7.39
    par(mfrow = c(2, 2))
    
    # input matrix 
    m <- matrix(1:25, ncol = 5, byrow = TRUE)
    m
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    1    2    3    4    5
    #> [2,]    6    7    8    9   10
    #> [3,]   11   12   13   14   15
    #> [4,]   16   17   18   19   20
    #> [5,]   21   22   23   24   25
    
    # raster 1
    rast_1 <- rast(m) |> `names<-`("raster_1")
    rast_1
    #> class       : SpatRaster 
    #> dimensions  : 5, 5, 1  (nrow, ncol, nlyr)
    #> resolution  : 1, 1  (x, y)
    #> extent      : 0, 5, 0, 5  (xmin, xmax, ymin, ymax)
    #> coord. ref. :  
    #> source(s)   : memory
    #> name        : raster_1 
    #> min value   :        1 
    #> max value   :       25
    plot(rast_1, main = "rast_1")
    
    # raster 2, filled with NA values
    rast_2 <- rast(rast_1, names = "raster_2", vals = NA)
    
    # creating masks (TRUE / FALSE values):
    (rast_1 %% 2 == 0) |> plot(main = "rast_1 %% 2 == 0")
    (rast_1 > 10 & rast_1 <= 15) |> plot(main = "rast_1 > 10 & rast_1 <= 15")
    
    ## using those for submitting
    # copy certain values from one raster to another:
    rast_2[rast_1 %% 2 == 0] <- rast_1[rast_1 %% 2 == 0]
    
    # set certain target values based on source values:
    rast_2[rast_1 > 10 & rast_1 <= 15] <- 25
    plot(rast_2, main = "rast_2[...] <- ...")
    

    If you absolutely do need to loop through cells, this should get you started:

    rast_3 <- rast(rast_1, names = "raster_3", vals = NA)
    for (cid in 1:ncell(rast_1)){
      if(rast_1[cid] > 10 && rast_1[cid] <= 15){
        rast_3[cid] <- 5
      }
    }
    
    bench::mark() subset vs loop with a simple condition vs ifel()

    Though here's a simple benchmark with a tiny 100x100 raster just to illustrate what you should be prepared for, results from all three methods are comparable ( identical(rast_x[], rast_y[]) is TRUE ).

    # 100x100 input rater for benchmark, 
    # random values in range of 1..10, uniform distribution, 
    # each value is used for ~10% of cells
    set.seed(123)
    rast_uniform <- 
      sample(1:10, size = 10000, replace = TRUE) |> 
      matrix(nrow = 100) |>
      rast()
    rast_uniform
    #> class       : SpatRaster 
    #> dimensions  : 100, 100, 1  (nrow, ncol, nlyr)
    #> resolution  : 1, 1  (x, y)
    #> extent      : 0, 100, 0, 100  (xmin, xmax, ymin, ymax)
    #> coord. ref. :  
    #> source(s)   : memory
    #> name        : lyr.1 
    #> min value   :     1 
    #> max value   :    10
    summary(rast_uniform)
    #>      lyr.1       
    #>  Min.   : 1.000  
    #>  1st Qu.: 3.000  
    #>  Median : 6.000  
    #>  Mean   : 5.507  
    #>  3rd Qu.: 8.000  
    #>  Max.   :10.000
    table(rast_uniform[])
    #> 
    #>    1    2    3    4    5    6    7    8    9   10 
    #> 1034 1003 1021  941  987  966  980 1028 1009 1031
    
    # NA rasters for benchmark
    rast_na_1 <- rast_na_2 <- rast_na_3 <- rast(rast_uniform, vals = NA)
    
    # banchmark sets ~10% of cell values, subset vs loop vs ifel()
    bnch <- bench::mark(
      subset = { rast_na_1[rast_uniform == 5] <- 0; rast_na_1 },
      loop   = { for (cid in 1:ncell(rast_uniform)) {if (rast_uniform[cid] == 5) rast_na_2[cid] <- 0}; rast_na_2 },
      ifel   = { rast_na_3 <- ifel(rast_uniform == 5, 0, NA); rast_na_3 },
      filter_gc = FALSE
    )
    

    Note the units, milliseconds vs seconds, tested loop is ~1700 times(!) slower when compared against subsetting.

    bnch
    #> # A tibble: 3 × 6
    #>   expression      min   median `itr/sec` mem_alloc `gc/sec`
    #>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
    #> 1 subset       4.68ms   4.85ms  191.        6.26KB     5.97
    #> 2 loop         10.26s   10.26s    0.0975    3.23MB     6.34
    #> 3 ifel         7.33ms   8.71ms   94.9       8.84KB     5.93
    
    Edit: applying the same approach on provided example rasters

    With provided example where conditions are exclusive, subsetting by boolean SpatRasters still works, though be aware of 1 <= rast_2 vs rast_2 >= 1:

    ## example input rasters
    # rast_1, values in each row are constant
    as.matrix(rast_1, wide = TRUE) 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]   71   71   71   71   71
    #> [2,]   72   72   72   72   72
    #> [3,]   73   73   73   73   73
    #> [4,]   74   74   74   74   74
    #> [5,]   75   75   75   75   75
    # rast_2, values in each column are constant 
    as.matrix(rast_2, wide = TRUE) 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]    1    2    3    4    5
    #> [2,]    1    2    3    4    5
    #> [3,]    1    2    3    4    5
    #> [4,]    1    2    3    4    5
    #> [5,]    1    2    3    4    5
    
    rast_4 <- rast(rast_1, names = "raster_4", vals = NA)
    rast_4[rast_2 >= 1 & rast_2 < 3 & rast_1 >= 70 & rast_1 < 73] <- 0.1
    rast_4[rast_2 >= 3 & rast_1 >= 75] <- 0.2
    as.matrix(rast_4, wide = TRUE)
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,]  0.1  0.1   NA   NA   NA
    #> [2,]  0.1  0.1   NA   NA   NA
    #> [3,]   NA   NA   NA   NA   NA
    #> [4,]   NA   NA   NA   NA   NA
    #> [5,]   NA   NA  0.2  0.2  0.2
    
    # in somewhat unituitive way, `1 <= rast_2` and `rast_2 >= 1` results are different, 
    # definetly something to be aware of:
    as.matrix(rast_2 >= 1, wide = TRUE) 
    #>      [,1] [,2] [,3] [,4] [,5]
    #> [1,] TRUE TRUE TRUE TRUE TRUE
    #> [2,] TRUE TRUE TRUE TRUE TRUE
    #> [3,] TRUE TRUE TRUE TRUE TRUE
    #> [4,] TRUE TRUE TRUE TRUE TRUE
    #> [5,] TRUE TRUE TRUE TRUE TRUE
    as.matrix(1 <= rast_2, wide = TRUE) 
    #>       [,1] [,2] [,3] [,4] [,5]
    #> [1,] FALSE TRUE TRUE TRUE TRUE
    #> [2,] FALSE TRUE TRUE TRUE TRUE
    #> [3,] FALSE TRUE TRUE TRUE TRUE
    #> [4,] FALSE TRUE TRUE TRUE TRUE
    #> [5,] FALSE TRUE TRUE TRUE TRUE
    

    For more complicated scenarios I'd first go through https://rspatial.org/spatial/8-rastermanip.html few times along with https://rspatial.github.io/terra/reference/terra-package.html , there's a good chance that the real world problem is more elegantly solvable through (a combination of) apply-like methods, classification, factor layers, cross-tabulation, ...