Search code examples
rggplot2dplyrr-sf

I would like to identify the primary administrative unit from the latitude and longitude coded in the dataset


I want to identify the primary administrative unit from the latitude and longitude coded in the dataset, and fill that administrative unit with red

What I tried:

library(rnaturalearth)
library(sf)
library(dplyr)
library(ggplot2)


find_admin_areas <- function(lon, lat) {
  # 
  all_states <- ne_states(returnclass = "sf")
  
  # 
  point_df <- st_as_sf(data.frame(lon = lon, lat = lat), coords = c("lon", "lat"), crs = st_crs(all_states))
  
  # 
  admin_areas <- st_join(point_df, all_states)
  
  # 
  admin_areas <- st_make_valid(admin_areas)
  
  return(admin_areas)
}

# 
locations_df <- data.frame(
  location = c("Kampong Chhnang", "Prey Veng", "Bokeo", "Xieng Khouang", "Phatthalung", "Chachoengsao",
               "Kamphaeng Phet", "Maha Sarakham", "Nonthaburi", "Phitsanulok", "Roi Et", "Si Sa Ket",
               "Surin", "Uthai Thani", "Lao Cai", "Thua Thien-Hue", "Phang Nga", "An Giang"),
  lon = c(104.5598351, 105.4249716, 101.8603, 103.1700, 100.0694874, 101.4314805,
          99.53470511, 103.1683362, 100.394886, 100.5448427, 103.8151837, 104.3711179,
          103.658002, 99.47934782, 103.9768, 107.501161, 98.42208133, 105.182631),
  lat = c(12.16634032, 11.39794884, 20.2672, 19.4326, 7.510663288, 13.60579712,
          16.33080535, 15.9978543, 13.92069527, 16.98237825, 15.91654777, 14.85512615,
          14.88487277, 15.34854839, 22.3820, 16.32788631, 8.4500, 10.5143)
)

# 
admin_areas <- locations_df %>%
  rowwise() %>%
  mutate(admin_area = list(find_admin_areas(lon, lat))) %>%
  select(location, admin_area)
## <- Error code 

# 
ggplot() +
  geom_sf(data = ne_states(), fill = "lightgray", color = "white") +  
  geom_sf(data = admin_areas$admin_area, fill = "red", color = "red", alpha = 0.5) + 
  labs(title = "Administrative Areas in Southeast Asia") +  
  theme_minimal()  

Solution

  • You can achieve this simply by using sf::st_join(). Note that as you data have a geographical coordinate system (GCS), you will need to tell the sf package to treat the coordinates as if they were projected by using sf_use_s2(FALSE). Once you are done, you can turn it back on using sf_use_s2(TRUE):

    library(rnaturalearth)
    library(sf)
    library(dplyr)
    library(ggplot2)
    
    # Load state sf
    all_states <- ne_states(returnclass = "sf")
    
    # Location data
    locations_df <- data.frame(
      location = c("Kampong Chhnang", "Prey Veng", "Bokeo", "Xieng Khouang", "Phatthalung", "Chachoengsao",
                   "Kamphaeng Phet", "Maha Sarakham", "Nonthaburi", "Phitsanulok", "Roi Et", "Si Sa Ket",
                   "Surin", "Uthai Thani", "Lao Cai", "Thua Thien-Hue", "Phang Nga", "An Giang"),
      lon = c(104.5598351, 105.4249716, 101.8603, 103.1700, 100.0694874, 101.4314805,
              99.53470511, 103.1683362, 100.394886, 100.5448427, 103.8151837, 104.3711179,
              103.658002, 99.47934782, 103.9768, 107.501161, 98.42208133, 105.182631),
      lat = c(12.16634032, 11.39794884, 20.2672, 19.4326, 7.510663288, 13.60579712,
              16.33080535, 15.9978543, 13.92069527, 16.98237825, 15.91654777, 14.85512615,
              14.88487277, 15.34854839, 22.3820, 16.32788631, 8.4500, 10.5143))
    
    # Create sf from location data
    point_df <- st_as_sf(locations_df, coords = c("lon", "lat"), crs = st_crs(all_states))
    
    # Tell sf to treat spherical coordinates as planar
    sf_use_s2(FALSE)
    
    # Return admin boundaries that share geometries with point_sf
    admin_areas <- st_join(all_states, point_df) |>
      filter(!is.na(location))
    
    # Plot (zoomed to area of interest)
    ggplot() +
      geom_sf(data = all_states , fill = "lightgray", color = "white") +  
      geom_sf(data = admin_areas, fill = "red", color = "red", alpha = 0.5) + 
      labs(title = "Administrative Areas in Southeast Asia") +  
      coord_sf(xlim = c(97, 109),
               ylim = c(7, 23)) +
      theme_minimal()
    

    1