Search code examples
rgeospatiallatitude-longitudespatialhaversine

R - For a dataframe of locations, find the nearest member from another dataframe of locations


I need to take the locations (Lat/Lon) in DF1 (3000 locations) and find the Haversine distance to the nearest location in DF2 (1500 locations) using R.

I can get this to work by hardcoding locations from DF2 or creating a column for each item in DF2 which is very cumbersome.

Any ideas on how to accomplish this efficiently?

Data structures
DF1
Lat   Lon

DF2
Lat   Lon

Solution

  • Try this:

    # load packages
    install.packages("tidyverse")
    library(geosphere)
    install.packages("rjson")
    library(rjson)
    
    # make a function to retrieve test data
    
    
    get_latlon <- function(x){ 
      url <- paste0("https://api3.geo.admin.ch/rest/services/api/SearchServer?searchText=",paste(x),"&type=locations")
      result <- rjson::fromJSON(file = URLencode(url))
    
    
      as_tibble(result$results[[1]]) %>% 
        mutate(attr_names = names(attrs)) %>% 
        spread(attr_names, attrs) %>% 
        unnest(cols = c(detail, featureId, geom_quadindex, geom_st_box2d, label, lat, 
                        lon, num, origin, rank, x, y, zoomlevel)) %>% 
        select(detail,lat,lon)
      }
    
    
    # retrieve test data
    
    cities1 <- c("spiez","zumikon","winterthur","neuenburg")
    cities2 <- c("zurich","bern","lausanne")
    
    cities1 %>% map(get_latlon) %>% bind_rows() -> DF1
    cities2 %>% map(get_latlon) %>% bind_rows() -> DF2
    
    
    # make a combined dataframe 
    names(DF1) <- paste0(names(DF1), ".a")
    names(DF2) <- paste0(names(DF2), ".b")
    crossing(DF1,DF2) -> data
    
    # function to calculate the Harversine distance
    
    haversine <- function(lon1, lat1, lon2, lat2, r = 6378137) {
    
      if(!is.numeric(c(lon1, lat1, lon2, lat2)))
        stop("Inpust must be numeric")
    
      # Convert degrees to radians
      lon1 <- lon1 * pi / 180
      lat1 <- lat1 * pi / 180
      lon2 <- lon2 * pi / 180
      lat2 <- lat2 * pi / 180
    
      delta.lon <- (lon2 - lon1)
      delta.lat <- (lat2 - lat1)
      a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) *
        sin(delta.lon/2)^2
      c <- 2 * asin(min(1,sqrt(a)))
      distance <- r * c
    
      return(distance) # Distance
    }
    
    
    # find the smallest distances for locations in DF1
    data %>%
      group_by(detail.a,detail.b) %>% 
      mutate(haversine=haversine(lat.a,lon.a,lat.b,lon.b)) %>% 
      group_by(detail.a) %>% 
      slice(which.min(haversine))