Search code examples
rspatial

remove spatial points outside of polyton


I am trying to remove data points from outside of a pre-defined polygon using the code below.

However, when I plot what was removed vs retained, there are a few spots that look like some points were removed incorrectly (i.e., red points within blue polygon) and several that were not removed (i.e., grey points outside of polygon near Iceland). How do I change this code to that those areas are correctly removed or retained?

enter image description here

library(tidyverse)
library(openxlsx)
library(sp)
library(sf)
library(rnaturalearth)

# download data
zooWide <- read.xlsx("https://dassh.ac.uk/downloads/mba_rdm/NWAtl_CPR%20data.xlsx", 
                     cols = c(1:6, 8:20)) %>% rename(YEAR = Year)

# create list of spatial points for samples
sp <- SpatialPoints(dplyr::select(zooWide, Longitude, Latitude),
                    proj4string = CRS("+proj=longlat +datum=WGS84")) %>% st_as_sf()

# set boundaries for retaining samples
poly <- st_polygon(list(as.matrix(data.frame(lon = c(-6, -6, -50, -50, -64, -64, -54, -54, -6),
                                              lat = c(45, 66, 66, 70, 70, 51, 51, 45, 45))))) %>%
  st_sfc(crs = st_crs("+proj=longlat +datum=WGS84"))

# determine if sample locations fall within polygon
samples = apply(st_intersects(sp, poly, sparse = TRUE), 1, any)

# filter all samples to those within polygon
zooRetained <- filter(zooWide, samples == TRUE)
zooRemoved <- filter(zooWide, samples == FALSE)

# basemap
world <- ne_countries(scale = "medium", returnclass = "sf")

# map
ggplot() +
  # samples
  geom_point(data = zooRemoved, aes(x = Longitude, y = Latitude), alpha = 0.1, col = "red") +
  geom_point(data = zooRetained, aes(x = Longitude, y = Latitude), alpha = 0.1) +
  # region
  geom_sf(data = poly, aes(geometry = geometry), col = "navy", fill = "transparent", linewidth = 2) + 
  # basemap
  geom_sf(data = world, colour = NA, fill = "grey75") +
  scale_y_continuous(labels = ~.x, limits = c(43, 72), breaks = seq(45, 70, 5)) +
  scale_x_continuous(labels = ~.x, limits = c(-68, -2), breaks = seq(-70, 0, 10)) +
  labs(x = NULL, y = NULL)

Solution

  • As poly is defined only by corners, shortest paths for horizontal edges on a sphere will not follow parallels, at least not at those latitudes. Though this is easy to fix by adding more points to polygon edges, we can use st_segmentize() for that.

    library(dplyr, warn.conflicts = FALSE)
    library(openxlsx)
    library(sf)
    #> Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
    library(rnaturalearth)
    #> Support for Spatial objects (`sp`) will be deprecated in {rnaturalearth} and will be removed in a future release of the package. Please use `sf` objects with {rnaturalearth}. For example: `ne_download(returnclass = 'sf')`
    library(ggplot2)
    
    # by default sf uses spherical geometires for shapes with geographic coordinates,
    # if you just define corners, edges will follow the spehere in a way you probably 
    # did not expect; to keep vertiacl edges aligned with parallels, we could add more points
    # to poly with st_segmentize()
    poly <- 
      st_polygon(list(cbind(lon = c(-6, -6, -50, -50, -64, -64, -54, -54, -6),
                            lat = c(45, 66, 66, 70, 70, 51, 51, 45, 45)))) %>%
      st_segmentize(dfMaxLength = .1) %>% 
      st_sfc(crs = "WGS84")
    
    zooWide <- read.xlsx("https://dassh.ac.uk/downloads/mba_rdm/NWAtl_CPR%20data.xlsx", 
                         cols = c(1:6, 8:20)) %>% rename(YEAR = Year)
    
    # frame to sf object, add a feature indicating point intersactions
    zooWide_sf <- zooWide %>% 
      st_as_sf(coords = c("Longitude", "Latitude"), crs = "WGS84") %>% 
      mutate(retained = st_intersects(geometry, poly, sparse = FALSE)[,1])
    
    # some Natural Earth polygons are bit broken, one workaround is disabling s2;
    # though you might consider some other source for shapes instead, 
    # i.e. giscoR::gisco_get_countries()
    sf_use_s2(FALSE)
    #> Spherical geometry (s2) switched off
    world <- ne_countries(scale = "medium", returnclass = "sf")$geometry %>% 
      st_crop(c(xmin=-68, ymin=43, xmax=-2, ymax=72))
    #> although coordinates are longitude/latitude, st_intersection assumes that they
    #> are planar
    
    ggplot() +
      geom_sf(data = world, colour = NA, fill = "grey75") +
      geom_sf(data = zooWide_sf, aes(colour = retained), alpha = .1, show.legend = FALSE) +
      geom_sf(data = poly, fill = NA, colour = "navy", linewidth = 2) +
      scale_color_manual(values = c("FALSE" = "red", "TRUE" = "black")) +
      coord_sf(expand = FALSE)
    

    Created on 2023-11-18 with reprex v2.0.2