I want to connect ALL the states drew from a sample of states of USA with a thin line exactly how it is done in this post. I did manage to draw some lines between some polygons but I did not manage to draw lines so all polygons are connected at the end. How can I force a polygon already connected to look for making a connexion with a polygon that is not already linked with itself?
Here is my code :
library(dplyr)
library(sf)
library(ggplot2)
library(nngeo)
nc <- st_read(system.file("shape/nc.shp", package="sf"))
set.seed(2806)
nc_sample=dplyr::sample_n(nc,10)
ggplot() + geom_sf(data = nc_sample)
co=st_connect(nc_sample[1,], nc_sample %>% slice(st_distance(nc_sample[1,],nc_sample) %>%
units::drop_units() %>%
as.data.frame() %>%
mutate_if(is.numeric, ~na_if(., 0)) %>%
rowwise() %>% which.min()))
all_connexion=st_sf(co)
for(i in 2:length(nc_sample$AREA)){
co=st_connect(nc_sample[i,], nc_sample %>% slice(st_distance(nc_sample[i,],nc_sample) %>%
units::drop_units() %>%
as.data.frame() %>%
mutate_if(is.numeric, ~na_if(., 0)) %>%
rowwise() %>% which.min()))
all_connexion = rbind(all_connexion,co)
}
ggplot() +
geom_sf(data = all_connexion) +
geom_sf(data = nc_sample)
To use nngeo
, you could iterate through polygons, each time finding the closest neighbour from the set that has not been linked yet. It's basically the nearest neighbour algorithm for the travelling salesman problem. Also note that nc
dataset contains multipolygons, to connect each individual polygon, you would need something like st_cast(nc_sample, "POLYGON")
first.
library(dplyr)
library(sf)
library(ggplot2)
library(nngeo)
nc <- st_read(system.file("shape/nc.shp", package="sf"))
set.seed(2806)
nc_sample=dplyr::sample_n(nc,10) %>%
# add index and next_hop columns
mutate(id = row_number(),
next_hop = NA_integer_)
# iterate though list of ploygons,
# find the closest neighbour from set of polygons
# that are not yet linked
# starting polygon index:
idx <- 1
# mask for unlinked polygons:
nn_mask <- rep_len(TRUE, nrow(nc_sample))
nn_mask[idx] <- FALSE
while (any(nn_mask)) {
# index of the nearest neighbour in masked dataset
masked_idx <- st_nn(nc_sample[idx,],
nc_sample[nn_mask,],
k = 1,
progress = FALSE)[[1]]
# get / store matching id value
idx <- nc_sample$next_hop[idx] <- nc_sample[nn_mask,]$id[masked_idx]
# exclude latest match from the set of available polygons
nn_mask[idx] <- FALSE
}
# resulting connections:
st_drop_geometry(nc_sample)[,c("id", "next_hop")]
#> id next_hop
#> 1 1 6
#> 2 2 5
#> 3 3 NA
#> 4 4 2
#> 5 5 3
#> 6 6 10
#> 7 7 9
#> 8 8 7
#> 9 9 4
#> 10 10 8
connections <- st_connect(nc_sample, nc_sample, ids = nc_sample$next_hop)
ggplot(nc_sample) +
geom_sf() +
geom_sf_label(aes(label = id)) +
geom_sf(data = connections)
Created on 2023-06-29 with reprex v2.0.2
In most cases you'd probably want solve this through graphs and more advanced TSP methods, so perhaps check igraph
and TSP
packages.