Select values from a 4-dimensional R stars array using 2-dimensional array containing the dimension values to select

Maybe somebody can help me with this:

I have a 4-dimensional spatiotemporal stars dataset, which contains annual values of a variable for four different tree species:

library(stars)
# 4-dimensional stars data: x,y,time, species
dat <- st_as_stars(array(data = runif(2000), dim = c(10, 10, 5, 4))) |> 
  setNames("data_val") |>
  st_set_dimensions(names= c("x","y", "year", "tree_species")) |>
  st_set_dimensions("year", values = 2018:2022) |>
  st_set_dimensions("tree_species",values = c("bu", "fi", "ki", "ei"))
plot(dat)  # (only values of the first slice of tree_speciess ("bu") displayed)

As a second object I have a species identity map, which contains information on which pixel belongs to which species:

# 2-d species map (x,y)
species_map <- st_as_stars(matrix(sample(c("bu", "fi", "ki", "ei"),size = 100, replace = TRUE), 
                                  ncol = 10, byrow=T)) |> 
  setNames("tree_species") |> 
  st_set_dimensions(c(1,2), names= c("x","y"))
species_map$tree_species <- factor(species_map$tree_species)
plot(species_map, col = c("#fdc086","#ffff99","#7fc97f","#beaed4"))

Based on this species_map, I want to extract pixel by pixel (x/y) the timeseries from the data that matches tree_species. The resulting stars-object would be a 3-d array (as the 4th data dimension (tree_species) was collapsed), with the data-values in (x,y,year) coming from the respective species entry. So, in the upper-left corner of the resulting stars-array (species ei,ki,ei,bu) there would be the data-values from the 4th-dimension indexes (4,3,4,1).

All my attempts so far failed... I tried to use ifelse(), which resulted in the right dimensionality, however it did not obtain the right results. I also tried to match values, but this also failed:

dat$data_val[, , , species_map$tree_species]

I guess my problem can be solved easily with some base R commands.

Thank you for your help.

All the best Paul

Solution

Based on the previous posts, I managed to solve this myself. Thank you for your help!

First we need to translate tree_species to the 4th dimension index in dat (like in Allan's for loop, see above), and then use JMenezes' indexmatrix, that just needed some little adjustments:

# create tree_species index grid
# (-> where to find the right tree_species in dat)
species_map$tree_species_ind <- matrix(match(species_map$tree_species,
                                             st_get_dimension_values(dat, 4)), 
                                       nrow= 10, ncol = 10)

# create index matrix 
indexmatrix <- expand.grid(x=1:10, y = 1:10, year = 1:5) # Quickway to generate all time-x-y combinations, recycling by time first
indexmatrix$tree_species <- rep(species_map$tree_species_ind,5) # Add species
indexmatrix <- as.matrix(indexmatrix)

# prepare stars object to hold the results
res <- dat[,,,,1,drop =TRUE] # copy one slice of tree_species from dat-array, and drop dimension

# extract results
res$data_val <- dat$data_val[indexmatrix]
plot(res)

Verification

It's diffcult to check wether everything works as expected with my initial example of random data. Therefore I create a second example of 'stars'-data with a fixed set of numbers varying along dimension 4. In the result, one pixel should finally hold the same value for each year. Also, I change from a square to rectangle map, to check if the rows and columns dimensions are alright.

# data with 4 fixed numbers varying along tree_species-dimension:
sample_numbers <- sample(1:10, size = 4)
dat <- st_as_stars(array(data = rep(sample_numbers,each = 5*7*3), 
                          dim = c(5, 7, 3, 4))) |> 
  setNames("data_val") |>
  st_set_dimensions(names= c("x","y", "year", "tree_species")) |>
  st_set_dimensions("year", values = 2018:2020) |>
  st_set_dimensions("tree_species",values = c("bu", "fi", "ki", "ei"))


# 2-d species map (x,y)
species_map <- st_as_stars(array(sample(c("bu", "fi", "ki", "ei"),
                                         size = 5*7, 
                                         replace = TRUE), 
                                  dim = c(5,7))) |> 
  setNames("tree_species") |> 
  st_set_dimensions(c(1,2), names= c("x","y"))
species_map$tree_species_f <- factor(species_map$tree_species)
plot(species_map["tree_species_f"], col = c("#fdc086","#ffff99","#7fc97f","#beaed4"))

# create index grid
species_map$tree_species_ind <- matrix(match(species_map$tree_species,
                                             st_get_dimension_values(dat, 4)), 
                                       nrow= 5, ncol = 7)

# create index matrix
indexmatrix = expand.grid(x=1:5, y = 1:7, year = 1:3) # Quickway to generate all time-x-y combinations, recycling by time first
indexmatrix$tree_species = rep(species_map$tree_species_ind,3) # Add species
indexmatrix = as.matrix(indexmatrix)

res <- dat[,,,,1,drop =T] # copy one tree_species-slice from dat-array, and drop dimension
res$data_val <- dat$data_val[indexmatrix]
res$data_val_f <- as.factor(res$data_val) # convert to factor for plotting

# Verification
plot(res["data_val_f"]) # results plot: same pattern for each year, as expected
rbind(sample_numbers, st_get_dimension_values(dat, 4)) # Check which number maps to  which tree_species
plot(species_map["tree_species_f"], col = c("#fdc086","#ffff99","#7fc97f","#beaed4")) # plot species map to compare