Search code examples
rdata.tablepolygonspatialr-sf

R filling in a matrix with values from spatial data


I have a matrix in R called “habitat” with latitude as the rownames and longitude as the column names. The cells are filled with NA right now. My end goal is to have the whole matrix filled with values - either 0s or values from another data frame.

For the cells that are on land, I want the cell value to be 0.

I created a simple feature collection using the sf package in R with a bounding box that matches the lats and lons of the habitat matrix and 1 feature that is a polygon of the land.

land<-st_read(paste0(nc_data_path, land))
land <- st_transform(land, crs = "EPSG:4326")
vertices <- matrix(c(bbox["xmin"], bbox["ymin"], 
                     bbox["xmax"], bbox["ymin"], 
                     bbox["xmax"], bbox["ymax"], 
                     bbox["xmin"], bbox["ymax"], 
                     bbox["xmin"], bbox["ymin"]), ncol = 2, byrow = TRUE)
polygon <- st_polygon(list(vertices))
polygon <- st_sfc(polygon)
st_crs(polygon) <- st_crs(st_crs("EPSG:4326"))
land=st_crop(land, polygon) 

I can't figure out how to use the land simple feature to fill in the habitat matrix with 0s where there is land. I tried turning the habitat matrix into a spatial points data frame, but I'm not sure where to go from there

latitudes <- as.numeric(rownames(habitat))
longitudes <- as.numeric(colnames(habitat))
habitat_dt <- data.table(latitude = rep(latitudes, each = ncol(habitat)),
                         longitude = rep(longitudes, times = nrow(habitat)),
                         value = as.vector(habitat))
habitat_points_sf <- st_as_sf(habitat_dt, coords = c("longitude", "latitude"), crs = 4326)

For the cells not on land (in the ocean), I want the values to be filled values from a dataframe called "sdm" based on the closest lat/lon (the sdm data frame columns are lat, lon, and lbs). I want to avoid using a for loop to do this.


Solution

  • Solution and illustration below (where habitats_dt is a data.table with a geometry column, and sdm_sf are land_sf are sf dataframes).

    Flag the ocean habitats using sf::st_within:

    habitats_dt[, ocean := is.na(as.logical(st_within(geometry, land_sf)))]
    

    Add row ID of sdm nearest neighbour to each ocean habitat, using nngeo::st_nn:

    habitats_dt[ocean==TRUE, sdm_rowid := transpose(st_nn(geometry, sdm_sf))]
    

    Add corresponding lbs attribute from the sdm data (0 for land cells)

    habitats_dt[, lbs := sdm_sf$lbs[c(habitats_dt$sdm_rowid)]][is.na(lbs), lbs := 0]
    

    Convert to matrix:

    result <- as.matrix(dcast(habitats_dt, lat ~ lon, value.var="lbs"), rownames=T)
    

    Illustration: enter image description here

    library(data.table)
    library(sf)
    library(nngeo)
    
    # DUMMY DATA
    # habitats
    min_lat <- -4; max_lat <- min_lat + 10; min_lon <- 40; max_lon <- min_lon + 10
    lat <- seq(min_lat, max_lat, 1) # as.numeric(rownames(habitat))
    lon <- seq(min_lon, max_lon, 2) # as.numeric(colnames(habitat))
    habitats_dt <- data.frame(lat = rep(lat, each = length(lon)), lon = rep(lon, times = length(lat))) |>
      st_as_sf(coords = c("lon", "lat"), crs = 4326, remove=F) |>
      setDT()
    # land
    land_sf <- st_polygon(list(cbind(min_lon+c(1.5,3.5,9.5,1.5), min_lat+c(1.5,9.5,6.5,1.5))))
    land_sf <- st_sf(geometry = st_sfc(land_sf), crs = 4326)
    # sdm
    set.seed(123)
    n <- 15
    sdm_sf <- data.frame(
      lat = runif(n, min = min_lat, max = max_lat),
      lon = runif(n, min = min_lon, max = max_lon),
      lbs = seq(100,999,length.out=n)) |>
      st_as_sf(coords = c("lon", "lat"), crs = 4326)
    sdm_sf <- setDT(sdm_sf)[is.na(as.logical(st_within(geometry, land_sf)))] |> st_as_sf()
    
    class(habitats_dt)
    class(land_sf)
    class(sdm_sf)
    
    # SOLUTION
    habitats_dt[, ocean := is.na(as.logical(st_within(geometry, land_sf)))]
    habitats_dt[ocean==TRUE, sdm_rowid := transpose(st_nn(geometry, sdm_sf))]
    habitats_dt[, lbs := sdm_sf$lbs[c(habitats_dt$sdm_rowid)]][ocean==FALSE, lbs := 0]
    result <- as.matrix(dcast(habitats_dt, lat ~ lon, value.var="lbs"), rownames=T)   
    
    # VISUAL CHECK
    print(result)
    library(ggplot2)
    ggplot() + 
      geom_sf(data=land_sf, fill="grey") +
      geom_sf(data=st_as_sf(habitats_dt), aes(colour=lbs, size=lbs)) +
      geom_sf(data=sdm_sf, aes(colour=lbs, size=lbs), shape="X") +
      guides(size="none")