Search code examples
rrasterterra

median raster value for polygons by attribute that is not unique


Given this this raster and polygon dataset

 f <- system.file("ex/elev.tif", package="terra") 
 r <- rast(f)
 fs <- system.file("ex/lux.shp", package="terra")
 v <- vect(fs)

How can I extract the median value of the raster cells for polygons with the same value of "NAME_1" in v, and add this to the attributes of v?

Please note that "NAME_1" is not unique in v:

v
# class       : SpatVector 
# geometry    : polygons 
# dimensions  : 12, 6  (geometries, attributes)
# extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
# source      : lux.shp
# coord. ref. : lon/lat WGS 84 (EPSG:4326) 
# names       :  ID_1   NAME_1  ID_2   NAME_2  AREA   POP
# type        : <num>    <chr> <num>    <chr> <num> <int>
# values      :     1 Diekirch     1 Clervaux   312 18081
#                   1 Diekirch     2 Diekirch   218 32543
#                   1 Diekirch     3  Redange   259 18664

Solution

  • To compute the median for all geometries, you can do

    library(terra)
    r <- rast(system.file("ex/elev.tif", package="terra") )
    v <- vect(system.file("ex/lux.shp", package="terra"))
    
    v$median = extract(r, v, fun=median, na.rm=TRUE, ID=FALSE)
    v
    # class       : SpatVector 
    # geometry    : polygons 
    # dimensions  : 12, 7  (geometries, attributes)
    # extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
    # source      : lux.shp
    # coord. ref. : lon/lat WGS 84 (EPSG:4326) 
    # names       :  ID_1   NAME_1  ID_2   NAME_2  AREA   POP median
    # type        : <num>    <chr> <num>    <chr> <num> <int>  <num>
    # values      :     1 Diekirch     1 Clervaux   312 18081  467.1
    #                   1 Diekirch     2 Diekirch   218 32543  333.9
    #                   1 Diekirch     3  Redange   259 18664  377.4
    

    Since "NAME_1" has the same value for multiple geometries, we have to do a bit more work. You can do either

    a <- aggregate(v, "NAME_1")
    e <- extract(r, a, fun=median, na.rm=TRUE)
    e$NAME_1 <- a$NAME_1[e$ID]
    e$ID <- NULL
    ve <- merge(v, e, by="NAME_1")
    

    or

    ee <- extract(r, v, na.rm=TRUE)
    ee$NAME_1 <- v$NAME_1[ee$ID]
    ee <- aggregate(ee[,2,drop=FALSE], ee[,"NAME_1",drop=FALSE], median, na.rm=TRUE)
    vee <- merge(v, ee, by="NAME_1")
    
    values(vee)
    #         NAME_1 ID_1 ID_2           NAME_2 AREA    POP elevation
    #1      Diekirch    1    1         Clervaux  312  18081     422.0
    #2      Diekirch    1    2         Diekirch  218  32543     422.0
    #3      Diekirch    1    3          Redange  259  18664     422.0
    #4      Diekirch    1    4          Vianden   76   5163     422.0
    #5      Diekirch    1    5            Wiltz  263  16735     422.0
    #6  Grevenmacher    2    6       Echternach  188  18899     288.5
    #7  Grevenmacher    2    7           Remich  129  22366     288.5
    #8  Grevenmacher    2   12     Grevenmacher  210  29828     288.5
    #9    Luxembourg    3    8         Capellen  185  48187     314.0
    #10   Luxembourg    3    9 Esch-sur-Alzette  251 176820     314.0
    #11   Luxembourg    3   10       Luxembourg  237 182607     314.0
    #12   Luxembourg    3   11           Mersch  233  32112     314.0