Search code examples
r

How to produce a gridded map in R where each grid cell shows the share of points of a certain category?


So, I have a dataset of many points across of Europe. For illustration purposes see this image below:

enter image description here

Each of these points has one of four categories attached to it, so the dataset I am working with has points with X and Y coordinates, and these categories:

df <- data.frame(
category = c("A","A","B","A","A","B","A","A","C","B","B","C","D","B","C","B","D","C","B","D"),
X = c(4599984.10552057,4831788.90207249,4840009.6065062,4832038.78315722,4930825.32065239,4416629.65604407,4446435.30204568,4446438.1996941,4446638.22211161,4446635.32438607,4442550.72821205,4442553.62813824,4442750.75801933,4422604.29347903,4422602.42523751,4422805.49222056,4418632.41506483,4418832.47128213,4408657.25277069,4408857.25256069),
Y = c(2881156.36674124,2862062.90168895,3020826.90515788,2864693.62543509,2955978.25574188,3502994.58644907,3502550.9888811,3502750.90387191,3502747.90897273,3502547.99413997,3510607.49600397,3510807.41086243,3510604.49974218,3514904.75866658,3515102.48878262,3515100.46862153,3516963.23902827,3516960.26748617,3519109.88091922,3519310.80154912))

What I would like to do then is to overlay a grid of a certain dimension over these points like here:

enter image description here

To then count the share of each of the points in one of the grid cells and map them out, to create four different maps for the four different categories that I have showing for each category what is the share in each of the grid cells. So for example if in one of the grid cells we have 10 points, 5 belong to category A and 5 to category B the share for each category in that grid cell would be 0.5. In the end I would like to end up with four raster maps for each of the categories, where each pixel shows the share of the points of that category in that grid cell.

Illustration here: enter image description here

I found similiar questions like this: https://gis.stackexchange.com/questions/309407/computing-number-of-points-in-a-raster-grid-cell-in-r but usually it is just abotu number of points per grid cell, whereas I need the share of points for each of the four categories. Hope I explained myself well enough.


Solution

  • Here's one approach with {sf} package. First we'll convert data.frame to sf object, generate a grid with unique cell_ids and then use spatial join to match every point and cell_id. From there we can find shares and join those to matching grid cell geometries.

    library(sf)
    #> Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
    library(dplyr)
    library(ggplot2)
    
    df <- data.frame(
      category = c("A","A","B","A","A","B","A","A","C","B","B","C","D","B","C","B","D","C","B","D"),
      X = c(4599984.10552057,4831788.90207249,4840009.6065062,4832038.78315722,4930825.32065239,4416629.65604407,4446435.30204568,4446438.1996941,4446638.22211161,4446635.32438607,4442550.72821205,4442553.62813824,4442750.75801933,4422604.29347903,4422602.42523751,4422805.49222056,4418632.41506483,4418832.47128213,4408657.25277069,4408857.25256069),
      Y = c(2881156.36674124,2862062.90168895,3020826.90515788,2864693.62543509,2955978.25574188,3502994.58644907,3502550.9888811,3502750.90387191,3502747.90897273,3502547.99413997,3510607.49600397,3510807.41086243,3510604.49974218,3514904.75866658,3515102.48878262,3515100.46862153,3516963.23902827,3516960.26748617,3519109.88091922,3519310.80154912))
    
    # create sf object from point
    points_sf <- st_as_sf(df, coords = c("X", "Y"))
    
    # create a grid covering all points, cell size 100000 units,
    # add unique cell_id
    grid_sf <- st_make_grid(points_sf, cellsize = 100000) |> 
      st_sf(geometry = _) |>
      mutate(cell_id = row_number(), .before = 1)
    print(grid_sf, n = 3)
    #> Simple feature collection with 42 features and 1 field
    #> Geometry type: POLYGON
    #> Dimension:     XY
    #> Bounding box:  xmin: 4408657 ymin: 2862063 xmax: 5008657 ymax: 3562063
    #> CRS:           NA
    #> First 3 features:
    #>   cell_id                       geometry
    #> 1       1 POLYGON ((4408657 2862063, ...
    #> 2       2 POLYGON ((4508657 2862063, ...
    #> 3       3 POLYGON ((4608657 2862063, ...
    
    # spatial join to metch cell_id-s to points,
    shares_df <- st_join(points_sf, grid_sf) |>
      st_drop_geometry() |>
      # number of points per cell (mutate, row count remains the same)
      group_by(cell_id) |>
      mutate(points_per_cell = n()) |>
      # share of each category in every cell, 
      # points_per_cell[1] to pick just a single value
      group_by(cell_id, category) |>
      summarise(share = n() / points_per_cell[1]) |>
      ungroup()
    #> `summarise()` has grouped output by 'cell_id'. You can override using the
    #> `.groups` argument.
    shares_df
    #> # A tibble: 8 × 3
    #>   cell_id category share
    #>     <int> <chr>    <dbl>
    #> 1       2 A        1    
    #> 2       5 A        1    
    #> 3       6 A        1    
    #> 4      11 B        1    
    #> 5      37 A        0.133
    #> 6      37 B        0.4  
    #> 7      37 C        0.267
    #> 8      37 D        0.2
    
    # join grid to shares for ploting
    gird_shares_sf <- right_join(grid_sf, shares_df)
    #> Joining with `by = join_by(cell_id)`
    gird_shares_sf
    #> Simple feature collection with 8 features and 3 fields
    #> Geometry type: POLYGON
    #> Dimension:     XY
    #> Bounding box:  xmin: 4408657 ymin: 2862063 xmax: 5008657 ymax: 3562063
    #> CRS:           NA
    #>   cell_id category     share                       geometry
    #> 1       2        A 1.0000000 POLYGON ((4508657 2862063, ...
    #> 2       5        A 1.0000000 POLYGON ((4808657 2862063, ...
    #> 3       6        A 1.0000000 POLYGON ((4908657 2862063, ...
    #> 4      11        B 1.0000000 POLYGON ((4808657 2962063, ...
    #> 5      37        A 0.1333333 POLYGON ((4408657 3462063, ...
    #> 6      37        B 0.4000000 POLYGON ((4408657 3462063, ...
    #> 7      37        C 0.2666667 POLYGON ((4408657 3462063, ...
    #> 8      37        D 0.2000000 POLYGON ((4408657 3462063, ...
    
    gird_shares_sf |>
      ggplot() +
      # draw full grid
      geom_sf(data = grid_sf, fill = NA, color = "grey") +
      # draw polygons with shares from gird_shares_sf
      geom_sf(aes(fill = share)) +
      # add points to all facets, renaming category column so it would not match 
      # faceting variable
      geom_sf(data = rename(points_sf, cat = category), aes(color = cat)) +
      facet_wrap(~category) +
      scale_fill_binned() +
      theme_minimal() +
      theme(axis.text = element_blank(), panel.grid = element_blank())
    

    Created on 2023-10-31 with reprex v2.0.2

    Note that if you plan to plot it over some other shapes / map layers or do any other kind of spatial operation that involves other datasets, you should not ignore projection as I did here. If (if!) those coordinates happen to be for EPSG:3035 , you should state this when creating sf object from data.frame:

    st_as_sf(df, coords = c("X", "Y"), crs = 3035)