I have a matrix in R called “habitat” with latitude as the rownames and longitude as the column names. The cells are filled with NA right now. My end goal is to have the whole matrix filled with values - either 0s or values from another data frame.
For the cells that are on land, I want the cell value to be 0.
I created a simple feature collection using the sf package in R with a bounding box that matches the lats and lons of the habitat matrix and 1 feature that is a polygon of the land.
land<-st_read(paste0(nc_data_path, land))
land <- st_transform(land, crs = "EPSG:4326")
vertices <- matrix(c(bbox["xmin"], bbox["ymin"],
bbox["xmax"], bbox["ymin"],
bbox["xmax"], bbox["ymax"],
bbox["xmin"], bbox["ymax"],
bbox["xmin"], bbox["ymin"]), ncol = 2, byrow = TRUE)
polygon <- st_polygon(list(vertices))
polygon <- st_sfc(polygon)
st_crs(polygon) <- st_crs(st_crs("EPSG:4326"))
land=st_crop(land, polygon)
I can't figure out how to use the land simple feature to fill in the habitat matrix with 0s where there is land. I tried turning the habitat matrix into a spatial points data frame, but I'm not sure where to go from there
latitudes <- as.numeric(rownames(habitat))
longitudes <- as.numeric(colnames(habitat))
habitat_dt <- data.table(latitude = rep(latitudes, each = ncol(habitat)),
longitude = rep(longitudes, times = nrow(habitat)),
value = as.vector(habitat))
habitat_points_sf <- st_as_sf(habitat_dt, coords = c("longitude", "latitude"), crs = 4326)
For the cells not on land (in the ocean), I want the values to be filled values from a dataframe called "sdm" based on the closest lat/lon (the sdm data frame columns are lat, lon, and lbs). I want to avoid using a for loop to do this.
Solution and illustration below (where habitats_dt
is a data.table with a geometry column, and sdm_sf
are land_sf
are sf dataframes).
Flag the ocean habitats using sf::st_within
:
habitats_dt[, ocean := is.na(as.logical(st_within(geometry, land_sf)))]
Add row ID of sdm nearest neighbour to each ocean habitat, using nngeo::st_nn
:
habitats_dt[ocean==TRUE, sdm_rowid := transpose(st_nn(geometry, sdm_sf))]
Add corresponding lbs attribute from the sdm data (0 for land cells)
habitats_dt[, lbs := sdm_sf$lbs[c(habitats_dt$sdm_rowid)]][is.na(lbs), lbs := 0]
Convert to matrix:
result <- as.matrix(dcast(habitats_dt, lat ~ lon, value.var="lbs"), rownames=T)
library(data.table)
library(sf)
library(nngeo)
# DUMMY DATA
# habitats
min_lat <- -4; max_lat <- min_lat + 10; min_lon <- 40; max_lon <- min_lon + 10
lat <- seq(min_lat, max_lat, 1) # as.numeric(rownames(habitat))
lon <- seq(min_lon, max_lon, 2) # as.numeric(colnames(habitat))
habitats_dt <- data.frame(lat = rep(lat, each = length(lon)), lon = rep(lon, times = length(lat))) |>
st_as_sf(coords = c("lon", "lat"), crs = 4326, remove=F) |>
setDT()
# land
land_sf <- st_polygon(list(cbind(min_lon+c(1.5,3.5,9.5,1.5), min_lat+c(1.5,9.5,6.5,1.5))))
land_sf <- st_sf(geometry = st_sfc(land_sf), crs = 4326)
# sdm
set.seed(123)
n <- 15
sdm_sf <- data.frame(
lat = runif(n, min = min_lat, max = max_lat),
lon = runif(n, min = min_lon, max = max_lon),
lbs = seq(100,999,length.out=n)) |>
st_as_sf(coords = c("lon", "lat"), crs = 4326)
sdm_sf <- setDT(sdm_sf)[is.na(as.logical(st_within(geometry, land_sf)))] |> st_as_sf()
class(habitats_dt)
class(land_sf)
class(sdm_sf)
# SOLUTION
habitats_dt[, ocean := is.na(as.logical(st_within(geometry, land_sf)))]
habitats_dt[ocean==TRUE, sdm_rowid := transpose(st_nn(geometry, sdm_sf))]
habitats_dt[, lbs := sdm_sf$lbs[c(habitats_dt$sdm_rowid)]][ocean==FALSE, lbs := 0]
result <- as.matrix(dcast(habitats_dt, lat ~ lon, value.var="lbs"), rownames=T)
# VISUAL CHECK
print(result)
library(ggplot2)
ggplot() +
geom_sf(data=land_sf, fill="grey") +
geom_sf(data=st_as_sf(habitats_dt), aes(colour=lbs, size=lbs)) +
geom_sf(data=sdm_sf, aes(colour=lbs, size=lbs), shape="X") +
guides(size="none")