Search code examples
rlatitude-longituder-sf

Finding shortest distance between two sets of points ( latitude and longitude) points in R


I have two sets of geo location points i.e. latitude and longitude.

control <- 
data.frame(
  id = c("110000308033", "110000308042", "110000308060",  "110000308346", "110000308505",
         "110000308541", "110000308612", "110000309684", "110000309773", "110000309835"),
  latitude = c(42.20227, 42.19802, 42.19251, 42.13690, 42.65253, 42.24066, 42.60008,
               42.62743, 42.72361,  42.70060),
  longitude = c(-72.606052, -72.600913, -72.609814, -72.542205, -73.110606, -73.358297,
                -72.378388, -71.156079, -70.9629610,  -71.16304)
)


treatment <- 
data.frame(
  id = c("110000308881", "110000310556", "110000314570", "110000316024",  "110000324845"),
  latitude = c(42.61366, 42.16657, 45.36801, 41.62371, 43.30851 ),
  longitude = c(-71.633782, -71.212503, -68.510184, -72.043135, -73.63481 )
)

I want to find the shortest distance from control unit to treatment unit for each of the control unit. Also how can I show them on a US map. I would appreciate any help.


Solution

  • Please find one possible solution using sf, units, dplyr and tmap libraries

    Reprex

    • Computing the distance matrix (distance in km)
    library(sf)
    library(units)
    library(dplyr)
    library(tmap)
    
    # Convert the two dataframes into 'sf' objects
    
    control_sf <- st_as_sf(control, coords = c("longitude", "latitude"), crs = 4326)
    
    treatment_sf <- st_as_sf(treatment, coords = c("longitude", "latitude"), crs = 4326)
    
    
    # Compute a distance matrix (distance in km)
    # rows = control
    # columns = treatment
    Distances_df <- control_sf %>% 
      st_distance(., treatment_sf) %>% 
      set_units(., km) %>% 
      drop_units() %>% 
      round(., 1) %>% 
      as.data.frame() %>% 
      setNames(., treatment$id) %>% 
      mutate(control = control$id) %>% 
      relocate(control)
    
    Distances_df
    #>         control 110000308881 110000310556 110000314570 110000316024
    #> 1  110000308033         92.0        114.9        481.6         79.4
    #> 2  110000308042         91.9        114.5        481.6         78.8
    #> 3  110000308060         92.8        115.2        482.6         78.7
    #> 4  110000308346         91.5        109.7        483.6         70.5
    #> 5  110000308505        120.9        164.9        475.8        144.3
    #> 6  110000308541        147.5        176.9        521.6        128.6
    #> 7  110000308612         61.0        107.2        436.4        112.0
    #> 8  110000309684         39.1         51.5        371.0        133.4
    #> 9  110000309773         56.2         65.2        353.4        151.3
    #> 10 110000309835         39.7         59.5        364.6        140.0
    #>    110000324845
    #> 1         148.9
    #> 2         149.6
    #> 3         149.7
    #> 4         157.9
    #> 5          84.5
    #> 6         120.9
    #> 7         129.1
    #> 8         215.4
    #> 9         226.7
    #> 10        212.0
    
    • Visualization (control in blue and treatment in red)
    tmap_mode("view")
    #> tmap mode set to interactive viewing
    
    tm_shape(control_sf) +
      tm_dots(col = "blue")+
      tm_shape(treatment_sf) +
      tm_dots(col = "red")
    

    enter image description here

    Created on 2022-01-14 by the reprex package (v2.0.1)