I have a shpfile from the entire world that I converted using the as_Spatial()
function for compatibility with the sp package.
set.seed(27)
shp <- sf::st_read("earth_gadm.shp")
shape <- as_Spatial(shp)
As I am not working with any particular region, I assigned the +proj=longlat +ellps=WGS84 +datum=WGS84
crs to my shpfile.
crs <- "+proj=longlat +ellps=WGS84 +datum=WGS84"
proj4string(shape) = crs
Following the Matt Strimas-Mackei workflow I used spsample()
and HexPoints2SpatialPolygons()
to create a hexagonal grid based on my shape object, and then I intersected the grid and the polygon.
size <- 2.5 #2.5 degrees as i am working with a latlong projection (correct?)
hex_points <- spsample(shape, type = "hexagonal", cellsize = size)
hex_grid <- HexPoints2SpatialPolygons(hex_points, dx = size)
shape.grid <- gIntersection(shape, hex_grid, byid = T)
I plotted some points on my new shapefile and overlaid them onto my shape.grid object.
library(rgbif)
gbif_data <- occ_data(scientificName = 'Lestes sponsa',
hasCoordinate = TRUE, limit = 60)
gbif_data <- gbif_data$data
coords <- gbif_data[ , c("decimalLongitude", "decimalLatitude")]
coords$decimalLatitude <- as.numeric(coords$decimalLatitude)
coords$decimalLongitude <- as.numeric(coords$decimalLongitude)
coordinates(coords) <- ~decimalLongitude + decimalLatitude
coords <- data.frame(x = coords$decimalLongitude, y = coords$decimalLatitude)
coords <- SpatialPointsDataFrame(coords= coords, data = gbif_data)
proj4string(coords) = crs
x11()
plot(shape.grid, col = "grey50", bg = "light blue", axes = TRUE, cex = 20)
points(coords, col = 'blue', pch=20, cex = 0.75)
overlaid <- over(shape.grid, coords, returnList = T)
overlaid <- data.frame(matrix(unlist(overlaid), nrow=60,
byrow=TRUE),stringsAsFactors=FALSE)
Now I am trying to extract mean bioclimatic variables from the grid cells which have points plotted on. I also have 19 .bil rasters which I downloaded from Wordclim. I was thinking in use these rasters to extract the bioclimatic variables. However, I am stuck at this step.
I tried:
bioclim_data <- extract(x=stackrasters, c(overlaid$decimalLongitude, overlaid$decimalLatitude))
However, I am not sure if I am extracting mean values from the grid cells, and besides that only NA values are being returned with the above command line.
Excuse me if I'll only use similar datasets to demonstrate the workflow (gadm_410 is ~1.4 GB, wc2.1_30s_bio is ~ 9.7 GB). I'll also try to stick to sf
and terra
as much as possible:
library(sf)
#> Linking to GEOS 3.9.1, GDAL 3.3.2, PROJ 7.2.1; sf_use_s2() is TRUE
library(rgbif)
# I used the Admin 0 - Countries (1:10) dataset from Natural Earth
shp <- sf::st_read("ne_10m_admin_0_countries.shp")
#> Reading layer `ne_10m_admin_0_countries' from data source
#> `ne_10m_admin_0_countries.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 258 features and 168 fields
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 83.6341
#> Geodetic CRS: WGS 84
# make hexagonal grid with res = 2.5° in WGS 84
grid <- sf::st_make_grid(shp,
cellsize = 2.5,
crs = 4326,
square = FALSE) |> sf::st_as_sf()
# get data
gbif_data <- occ_data(scientificName = 'Lestes sponsa',
hasCoordinate = TRUE,
limit = 60)
gbif_data <- gbif_data$data
# create a simple features object from your data
data_sf <- sf::st_as_sf(gbif_data,
coords = c("decimalLongitude", "decimalLatitude"),
crs = sf::st_crs(4326))
# select objects from grid (= cells) containing points from data_sf (= locations)
grid_subset <- sf::st_filter(grid, data_sf)
Almost finished, you only have to import your raster data and make use of terra::extract()
to get your desired values:
library(terra)
# I used wc2.1_30s_prec from WorldClim, read using `rast()`
files <- list.files(pattern = "*.tif")
prec <- terra::rast(files)
# note that the resulting SpatRast object has 12 layers
prec
#> class : SpatRaster
#> dimensions : 21600, 43200, 12 (nrow, ncol, nlyr)
#> resolution : 0.008333333, 0.008333333 (x, y)
#> extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
#> coord. ref. : lon/lat WGS 84 (EPSG:4326)
#> sources : wc2.1_30s_prec_01.tif
#> wc2.1_30s_prec_02.tif
#> wc2.1_30s_prec_03.tif
#> ... and 9 more source(s)
#> names : wc2.1~ec_01, wc2.1~ec_02, wc2.1~ec_03, wc2.1~ec_04, wc2.1~ec_05, wc2.1~ec_06, ...
#> min values : 0, 0, 0, 0, 0, 0, ...
#> max values : 973, 1309, 1145, 1049, 2081, 2226, ...
# extract values from prec by polygons from grid_subset using mean as aggregate
results <- terra::extract(prec, terra::vect(grid_subset), fun = mean)
# prec has 12 layers, grid_subset consists of 60 polygons,
# c.f. dimensions below (+ID column containing an identifier of the related polygon)
str(results)
#> 'data.frame': 60 obs. of 13 variables:
#> $ ID : num 1 2 3 4 5 6 7 8 9 10 ...
#> $ wc2.1_30s_prec_01: num 74 70 70 70 70 70 70 67 67 67 ...
#> $ wc2.1_30s_prec_02: num 65 51 51 51 51 51 51 52 52 52 ...
#> $ wc2.1_30s_prec_03: num 52 67 67 67 67 67 67 67 67 67 ...
#> $ wc2.1_30s_prec_04: num 51 46 46 46 46 46 46 45 45 45 ...
#> $ wc2.1_30s_prec_05: num 61 62 62 62 62 62 62 61 61 61 ...
#> $ wc2.1_30s_prec_06: num 46 70 70 70 70 70 70 70 70 70 ...
#> $ wc2.1_30s_prec_07: num 42 70 70 70 70 70 70 66 66 66 ...
#> $ wc2.1_30s_prec_08: num 43 60 60 60 60 60 60 57 57 57 ...
#> $ wc2.1_30s_prec_09: num 62 72 72 72 72 72 72 67 67 67 ...
#> $ wc2.1_30s_prec_10: num 68 71 71 71 71 71 71 66 66 66 ...
#> $ wc2.1_30s_prec_11: num 75 79 79 79 79 79 79 72 72 72 ...
#> $ wc2.1_30s_prec_12: num 77 77 77 77 77 77 77 73 73 73 ...
Sorry if I did not really answer your question and dared to project your grid cells.. ;-)