Search code examples
rboxplotshapefiler-rasterr-sp

boxplot of raster and shapefile


This example code works

library(raster)
library(sp)
library(rgdal)
library(ggplot2)

# Create some sample raster data
raster_file <- raster(ncol=36, nrow=18)
raster_file[] <- 1:ncell(raster_file)
plot(raster_file)

#Create some sample polygons
cds1 <- rbind(c(-180,-20), c(-160,5), c(-60, 0), c(-160,-60), c(-180,-20))
cds2 <- rbind(c(80,0), c(100,60), c(120,0), c(120,-55), c(80,0)) 
shape_file <-        SpatialPolygons(list(Polygons(list(Polygon(cds1)), 1), 
                          Polygons(list(Polygon(cds2)), 2)))
plot(shape_file)


# Extract raster values within the shapefile extracted_values <- extract(raster_file, shape_file)


# Assuming the shapefile has multiple polygons and you want to create a boxplot for each 
data_list <-   lapply(1:length(extracted_values), function(i) {
  data.frame(value = extracted_values[[i]], polygon = i)
 })
data <- do.call(rbind, data_list)

# Create the boxplot
bp<-ggplot(data, aes(x = factor(polygon), y = value)) +
  geom_boxplot() +
  labs(x = "Polygon", y = "Raster Values") +
  theme_minimal()
 bp

For my own dataset I encountered problems in reading in the polygons.
The error message comes in the boxplot function

Here may shape file

> # load shape file including all layers and print layers
> shape_file<-shapefile("C:/Users/.....BiogeoRegion.shp")
Warning message:
[vect] Z coordinates ignored 
> names(shape_file)
 [1] "RegionNumm" "RegionName" "Unterregio" "Unterreg_1" "ObjNummer"
"Version"    "Shape_Leng" "Shape_Area" "DERegionNa" "FRRegionNa"
[11] "ITRegionNa" "DEBioBedeu" "FRBioBedeu" "ITBioBedeu"
> str(shape_file)
Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
  ..@ data       :'data.frame': 12 obs. of  14 variables:
  .. ..$ RegionNumm: int [1:12] 1 2 2 2 2 3 3 4 5 5 ...
   .. ..$ RegionName: chr [1:12] "R1" "R2" "R2" "R2" ...
   .. ..$ Unterregio: int [1:12] 11 21 22 23 24 31 32 41 51 52 ...
   .. ..$ Unterreg_1: chr [1:12] "U11" "U21" "U22" "U23" ...
   .. ..$ ObjNummer : chr [1:12] "1" "2" "3" "4" ...
  .. ..$ Version   : chr [1:12] "2020/05/08" "2020/05/08" "2020/05/08"
 "2020/05/08" ...


> # select a specific raster file from a list 
> raster_file<-allrasters_pres[[1]]
> names(raster_file)
[1] "Andrena.barbilabris_glo_ensemble"
> 
> # Extract raster values within the shapefile extracted_values <- 
> extract(raster_file, shape_file)
> 
> 
> 
> # Assuming the shapefile has multiple polygons and you want to create 
 > a
 boxplot for each
 > data_list <- lapply(1:length(extracted_values), function(i) {
 +     data.frame(value = extracted_values[[i]], polygon = i)
 + })
> data <- do.call(rbind, data_list)

> names(data)
[1] "value"   "polygon"
> 
> # Create the boxplot
> bp<-ggplot(data, aes(x = factor(polygon), y = value)) +
 +     geom_boxplot() +
 +     labs(x = "Polygon", y = "Raster Values") +
 +     theme_minimal()
 > bp
 Error in UseMethod("depth") : 
 no applicable method for 'depth' applied to an object of class "NULL"
  In addition: Warning message:
Removed 452451 rows containing non-finite outside the scale range (`stat_boxplot()`). 
 >

Solution

  • If it doesn't have to be based on raster and sp packages, you might want to consider terra or sf + stars instead.

    With copy-pastes from terra documentation it might look something like this:

    library(terra)
    #> terra 1.7.78
    library(ggplot2)
    
    # Load shp and raster from example files of terra
    (v <- vect(system.file("ex/lux.shp", package="terra")))
    #>  class       : SpatVector 
    #>  geometry    : polygons 
    #>  dimensions  : 12, 6  (geometries, attributes)
    #>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
    #>  source      : lux.shp
    #>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
    #>  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA   POP
    #>  type        : <num>    <chr> <num>    <chr> <num> <int>
    #>  values      :     1 Diekirch     1 Clervaux   312 18081
    #>                    1 Diekirch     2 Diekirch   218 32543
    #>                    1 Diekirch     3  Redange   259 18664
    (r <- rast(system.file("ex/elev.tif", package="terra")))
    #> class       : SpatRaster 
    #> dimensions  : 90, 95, 1  (nrow, ncol, nlyr)
    #> resolution  : 0.008333333, 0.008333333  (x, y)
    #> extent      : 5.741667, 6.533333, 49.44167, 50.19167  (xmin, xmax, ymin, ymax)
    #> coord. ref. : lon/lat WGS 84 (EPSG:4326) 
    #> source      : elev.tif 
    #> name        : elevation 
    #> min value   :       141 
    #> max value   :       547
    plot(r); plot(v, add = TRUE); text(v, labels = "NAME_2", col=c("white"))
    

    # extract, include names from vector attributes
    e <- extract(r,v)
    e$NAME_2 <- v$NAME_2[e$ID]
    str(e)
    #> 'data.frame':    4606 obs. of  3 variables:
    #>  $ ID       : num  1 1 1 1 1 1 1 1 1 1 ...
    #>  $ elevation: int  547 485 497 515 515 515 518 531 540 NA ...
    #>  $ NAME_2   : chr  "Clervaux" "Clervaux" "Clervaux" "Clervaux" ...
    
    # plot
    ggplot(e, aes(x = NAME_2, y = elevation)) +
      geom_boxplot() +
      theme_minimal() +
      theme(axis.text.x = element_text(angle = 45))
    #> Warning: Removed 51 rows containing non-finite outside the scale range
    #> (`stat_boxplot()`).
    

    # or just use terra::rasterize() + terra::boxplot():
    y <- rasterize(v, r, "NAME_2")
    boxplot(r,y)
    

    Created on 2024-08-27 with reprex v2.1.1