Search code examples
rgeometryshapefilearea

Computing the surface area of a variable in a shapefile in r


My shapefile represent a continent. It has many polygons (because of several layers).

I would like to compute the surface area/squarekm for different variables and have the results in a column i.e:

Total squarekm per country (NAME variable): It would give me the square km of each country polygons. Total squarekm per AEZ (AEZ variable): It would give me the square km of each AEZ zone

etc.

I did it in Arcmap but can't figure out how to have the same results in R.

I tried with Areapolygons but it does not work.

> dput(PRIO[2:6,9,12:14, c(1,2)]) structure(list(NAME = c("Mauritania", "Mauritania", "Mauritania", "Mauritania", "Mauritania"), geometry = structure(list(structure(list( list(structure(c(-8.15539750263898, -8.5, -8.5, -8.20444499999996, -8.15539750263898, 27, 27, 27.1964674367602, 27.0274960000002, 27), .Dim = c(5L, 2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-8.5, -8.66722299999986, -8.66722299999986, -8.66722299999986, -8.66717809129804, -8.5, -8.5, 26.5, 26.5, 26.8330540000001, 26.9663889999999, 27, 27, 26.5), .Dim = c(7L, 2L)))), class = c("XY", "MULTIPOLYGON", "sfg" )), structure(list(list(structure(c(-8, -8, -8.5, -8.5, -8.15539750263898, -8.13111099999998, -8, 26.9105346374803, 26.5, 26.5, 27, 27, 26.9863850000001, 26.9105346374803), .Dim = c(7L, 2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-7.50000000000003, -7.50000000000003, -8, -8, -7.71194499999996, -7.69361099999992, -7.50000000000003, 26.6209884313231, 26.5, 26.5, 26.9105346374803, 26.7438890000001, 26.7341649999999, 26.6209884313231), .Dim = c(7L, 2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list( list(structure(c(-7.29302525734133, -7.50000000000003, -7.50000000000003, -7.29302525734133, 26.5, 26.5, 26.6209884313231, 26.5), .Dim = c(4L, 2L)))), class = c("XY", "MULTIPOLYGON", "sfg"))), class = c("sfc_MULTIPOLYGON", "sfc"), precision = 0, bbox = structure(c(xmin = -8.66722299999986, ymin = 26.5, xmax = -7.29302525734133, ymax = 27.1964674367602 ), class = "bbox"), crs = structure(list(input = "WGS 84", wkt = "GEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"latitude\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"longitude\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, -5L), sf_column = "geometry", agr = structure(c(NAME = NA_integer_), .Label = c("constant", "aggregate", "identity"), class = "factor"), class = c("sf", "tbl_df", "tbl", "data.frame"))

Thanks!


Solution

  • Load your multipolygon in R, make sure it has an appropriate coordinate system, then use st_area(), which returns the area of each polygon (row) in your multipolygon.

    library(sf)
    
    # Load multipolygon
    nc = st_read(system.file("shape/nc.shp", package="sf"))
    
    # Check coordinate system
    st_crs(nc)
    #> Coordinate Reference System:
    #>   User input: NAD27 
    #>   wkt:
    #> GEOGCRS["NAD27",
    #>     DATUM["North American Datum 1927",
    #>         ELLIPSOID["Clarke 1866",6378206.4,294.978698213898,
    #>             LENGTHUNIT["metre",1]]],
    #>     PRIMEM["Greenwich",0,
    #>         ANGLEUNIT["degree",0.0174532925199433]],
    #>     CS[ellipsoidal,2],
    #>         AXIS["latitude",north,
    #>             ORDER[1],
    #>             ANGLEUNIT["degree",0.0174532925199433]],
    #>         AXIS["longitude",east,
    #>             ORDER[2],
    #>             ANGLEUNIT["degree",0.0174532925199433]],
    #>     ID["EPSG",4267]]
    
    plot(nc$geometry)
    

    nc

    nrow(nc)
    #> [1] 100
    
    st_area(nc)
    #> Units: [m^2]
    #>   [1] 1137107793  610916077 1423145355  694378925 1520366979  967504822
    #>   [7]  615794941  903423919 1179065710 1232475139 1136017416 1524295167
    #>  [13] 1426763054 1085709751  718024778 1893655681  524303669 1986581059
    #>  [19]  812132036  626215554  439637846  640597398  863142124 1276325061
    #>  [25] 1083947009 1697657775 1109799786 1800353048 1036247721  770426970
    #>  [31] 1422972995  585145178 1311460371 1224942117  800163805 1186288078
    #>  [37] 2194374294 1179004039 1550151186  690514844  665066784 1457728244
    #>  [43] 1340416729 1005633561  988219530 1163804357 2019609428 1810365923
    #>  [49]  944348527 1350014516 1685059736 1068120639 1691385005 2082034143
    #>  [55] 1447025244  943796075 2045470574 1420873777  707648814  653349704
    #>  [61] 1471057561 1436128964 1550970115 1186032312  788508058 1265459073
    #>  [67] 1829451696 1447903974  918204712 1312725482 1043733633  961860879
    #>  [73]  781909574 1046090580  986760532  917758923  601335294 1321974824
    #>  [79] 2438120829  833576485 1210382282 1738664778 1228776807 1648541762
    #>  [85] 1400697543  995179656 1678005426 2072031752 1228366621  519232890
    #>  [91] 1785013769  808690576 1978885855 2439935278 1264198838 2289052992
    #>  [97] 2181566551 2450830549  430798470 2166454052
    Created on 2021-10-12 by the reprex package (v2.0.1)
    

    Edit: To calculate area within groups in your data

    library(dplyr)
    library(sf)
    
    # I've loaded the data in your question as `df`
    #
    # I'll show how to calculate total areas for your group NAME,
    # like you say in your question, but since there's only one
    # unique value in your example data, I'll also make a dummy
    # grouping variable to show the difference:
    
    # Define dummy groups
    df$id <- c(1,1,2,2,3)
    
    # First, calculate the area of each polygon in your multipolygon
    df$area <- st_area(df)
    
    # Group by NAME and calculate a total area for each group.
    # We expect this to return one area value, because there is only one group.
    
    df %>% group_by(NAME) %>% summarize(st_union(geometry), area_NAME = sum(area))
    #> Simple feature collection with 1 feature and 2 fields
    #> Geometry type: POLYGON
    #> Dimension:     XY
    #> Bounding box:  xmin: -8.667223 ymin: 26.5 xmax: -7.293025 ymax: 27.19647
    #> Geodetic CRS:  WGS 84
    #> # A tibble: 1 x 3
    #>   NAME                                           `st_union(geometry)`  area_NAME
    #>   <chr>                                                 <POLYGON [°]>      [m^2]
    #> 1 Mauritania ((-7.5 26.62099, -7.693611 26.73416, -7.711945 26.74389~     5.59e9
    
    # Now group by the dummy variable and calculate a total area for each group.
    # In this case, we have three groups (1,2,3), so we expect three area values.
    
    df %>% group_by(id) %>% summarize(st_union(geometry), area_id = sum(area))
    #> Simple feature collection with 3 features and 2 fields
    #> Geometry type: GEOMETRY
    #> Dimension:     XY
    #> Bounding box:  xmin: -8.667223 ymin: 26.5 xmax: -7.293025 ymax: 27.19647
    #> Geodetic CRS:  WGS 84
    #> # A tibble: 3 x 3
    #>      id                                           `st_union(geometry)`   area_id
    #>   <dbl>                                                 <GEOMETRY [°]>     [m^2]
    #> 1     1 MULTIPOLYGON (((-8.204445 27.0275, -8.5 27.19647, -8.5 27, -8~    1.30e9
    #> 2     2 POLYGON ((-7.693611 26.73416, -7.711945 26.74389, -8 26.91053~    4.15e9
    #> 3     3 POLYGON ((-7.5 26.62099, -7.5 26.5, -7.293025 26.5, -7.5 26.6~    1.39e8
    Created on 2021-10-12 by the reprex package (v2.0.1)
    

    Edit2: Merge grouped data to original data

    > df2 <- df %>% group_by(id) %>% summarize(st_union(geometry), area_id = sum(area))
    
    > merge(df, st_drop_geometry(df2), by = "id", all.x = TRUE)
    Simple feature collection with 5 features and 4 fields
    Geometry type: MULTIPOLYGON
    Dimension:     XY
    Bounding box:  xmin: -8.667223 ymin: 26.5 xmax: -7.293025 ymax: 27.19647
    Geodetic CRS:  WGS 84
      id       NAME             area          area_id
    1  1 Mauritania  371871356 [m^2] 1295023668 [m^2]
    2  1 Mauritania  923152312 [m^2] 1295023668 [m^2]
    3  2 Mauritania 2683469487 [m^2] 4153042391 [m^2]
    4  2 Mauritania 1469572903 [m^2] 4153042391 [m^2]
    5  3 Mauritania  138546017 [m^2]  138546017 [m^2]
                            geometry
    1 MULTIPOLYGON (((-8.155398 2...
    2 MULTIPOLYGON (((-8.5 26.5, ...
    3 MULTIPOLYGON (((-8 26.91053...
    4 MULTIPOLYGON (((-7.5 26.620...
    5 MULTIPOLYGON (((-7.293025 2...