So, I have a dataset of many points across of Europe. For illustration purposes see this image below:
Each of these points has one of four categories attached to it, so the dataset I am working with has points with X and Y coordinates, and these categories:
df <- data.frame(
category = c("A","A","B","A","A","B","A","A","C","B","B","C","D","B","C","B","D","C","B","D"),
X = c(4599984.10552057,4831788.90207249,4840009.6065062,4832038.78315722,4930825.32065239,4416629.65604407,4446435.30204568,4446438.1996941,4446638.22211161,4446635.32438607,4442550.72821205,4442553.62813824,4442750.75801933,4422604.29347903,4422602.42523751,4422805.49222056,4418632.41506483,4418832.47128213,4408657.25277069,4408857.25256069),
Y = c(2881156.36674124,2862062.90168895,3020826.90515788,2864693.62543509,2955978.25574188,3502994.58644907,3502550.9888811,3502750.90387191,3502747.90897273,3502547.99413997,3510607.49600397,3510807.41086243,3510604.49974218,3514904.75866658,3515102.48878262,3515100.46862153,3516963.23902827,3516960.26748617,3519109.88091922,3519310.80154912))
What I would like to do then is to overlay a grid of a certain dimension over these points like here:
To then count the share of each of the points in one of the grid cells and map them out, to create four different maps for the four different categories that I have showing for each category what is the share in each of the grid cells. So for example if in one of the grid cells we have 10 points, 5 belong to category A
and 5 to category B
the share for each category in that grid cell would be 0.5. In the end I would like to end up with four raster maps for each of the categories, where each pixel shows the share of the points of that category in that grid cell.
I found similiar questions like this: https://gis.stackexchange.com/questions/309407/computing-number-of-points-in-a-raster-grid-cell-in-r but usually it is just abotu number of points per grid cell, whereas I need the share of points for each of the four categories. Hope I explained myself well enough.
Here's one approach with {sf}
package. First we'll convert data.frame
to sf
object, generate a grid with unique cell_id
s and then use spatial join to match every point and cell_id
. From there we can find shares and join those to matching grid cell geometries.
library(sf)
#> Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(dplyr)
library(ggplot2)
df <- data.frame(
category = c("A","A","B","A","A","B","A","A","C","B","B","C","D","B","C","B","D","C","B","D"),
X = c(4599984.10552057,4831788.90207249,4840009.6065062,4832038.78315722,4930825.32065239,4416629.65604407,4446435.30204568,4446438.1996941,4446638.22211161,4446635.32438607,4442550.72821205,4442553.62813824,4442750.75801933,4422604.29347903,4422602.42523751,4422805.49222056,4418632.41506483,4418832.47128213,4408657.25277069,4408857.25256069),
Y = c(2881156.36674124,2862062.90168895,3020826.90515788,2864693.62543509,2955978.25574188,3502994.58644907,3502550.9888811,3502750.90387191,3502747.90897273,3502547.99413997,3510607.49600397,3510807.41086243,3510604.49974218,3514904.75866658,3515102.48878262,3515100.46862153,3516963.23902827,3516960.26748617,3519109.88091922,3519310.80154912))
# create sf object from point
points_sf <- st_as_sf(df, coords = c("X", "Y"))
# create a grid covering all points, cell size 100000 units,
# add unique cell_id
grid_sf <- st_make_grid(points_sf, cellsize = 100000) |>
st_sf(geometry = _) |>
mutate(cell_id = row_number(), .before = 1)
print(grid_sf, n = 3)
#> Simple feature collection with 42 features and 1 field
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 4408657 ymin: 2862063 xmax: 5008657 ymax: 3562063
#> CRS: NA
#> First 3 features:
#> cell_id geometry
#> 1 1 POLYGON ((4408657 2862063, ...
#> 2 2 POLYGON ((4508657 2862063, ...
#> 3 3 POLYGON ((4608657 2862063, ...
# spatial join to metch cell_id-s to points,
shares_df <- st_join(points_sf, grid_sf) |>
st_drop_geometry() |>
# number of points per cell (mutate, row count remains the same)
group_by(cell_id) |>
mutate(points_per_cell = n()) |>
# share of each category in every cell,
# points_per_cell[1] to pick just a single value
group_by(cell_id, category) |>
summarise(share = n() / points_per_cell[1]) |>
ungroup()
#> `summarise()` has grouped output by 'cell_id'. You can override using the
#> `.groups` argument.
shares_df
#> # A tibble: 8 × 3
#> cell_id category share
#> <int> <chr> <dbl>
#> 1 2 A 1
#> 2 5 A 1
#> 3 6 A 1
#> 4 11 B 1
#> 5 37 A 0.133
#> 6 37 B 0.4
#> 7 37 C 0.267
#> 8 37 D 0.2
# join grid to shares for ploting
gird_shares_sf <- right_join(grid_sf, shares_df)
#> Joining with `by = join_by(cell_id)`
gird_shares_sf
#> Simple feature collection with 8 features and 3 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 4408657 ymin: 2862063 xmax: 5008657 ymax: 3562063
#> CRS: NA
#> cell_id category share geometry
#> 1 2 A 1.0000000 POLYGON ((4508657 2862063, ...
#> 2 5 A 1.0000000 POLYGON ((4808657 2862063, ...
#> 3 6 A 1.0000000 POLYGON ((4908657 2862063, ...
#> 4 11 B 1.0000000 POLYGON ((4808657 2962063, ...
#> 5 37 A 0.1333333 POLYGON ((4408657 3462063, ...
#> 6 37 B 0.4000000 POLYGON ((4408657 3462063, ...
#> 7 37 C 0.2666667 POLYGON ((4408657 3462063, ...
#> 8 37 D 0.2000000 POLYGON ((4408657 3462063, ...
gird_shares_sf |>
ggplot() +
# draw full grid
geom_sf(data = grid_sf, fill = NA, color = "grey") +
# draw polygons with shares from gird_shares_sf
geom_sf(aes(fill = share)) +
# add points to all facets, renaming category column so it would not match
# faceting variable
geom_sf(data = rename(points_sf, cat = category), aes(color = cat)) +
facet_wrap(~category) +
scale_fill_binned() +
theme_minimal() +
theme(axis.text = element_blank(), panel.grid = element_blank())
Created on 2023-10-31 with reprex v2.0.2
Note that if you plan to plot it over some other shapes / map layers or do any other kind of spatial operation that involves other datasets, you should not ignore projection as I did here. If (if!) those coordinates happen to be for EPSG:3035 , you should state this when creating sf
object from data.frame:
st_as_sf(df, coords = c("X", "Y"), crs = 3035)