I have two datasets of Point coordinates (representing electrical installations) and would like to find the closest match between the two datasets following a given set of LineStrings (representing the cables). As an example let's take the following situation:
points1 = gpd.GeoDataFrame({'geometry' : [Point(0,1), Point(3,3)]})
points3 = gpd.GeoDataFrame({'geometry' : [Point(1,0), Point(4,4)]})
lines = gpd.GeoDataFrame({'geometry' : [LineString([Point(0,0),Point(0,4)]),LineString([Point(0,4),Point(4,4)]), LineString([Point(2,4),Point(2,0)]),LineString([Point(1,0),Point(3,0), Point(3,3)])]})
Here we can see that red point (0,1) is closest to green point (4,4) when following the lines (distance 7) and red point (3,3) is closest to green point (1,0).
How can I automatically calculate the closest green point for each red point? The lines can sometimes be split into smaller segments like in the example and not all red/green points are necessarily on the endpoint of one of the linestrings.
Building the graph is a bit tricky, I may be missing some edge cases. Considering intersections between lines is needed. The rest is using A*.
import matplotlib.pyplot as plt
import networkx as nx
from shapely.geometry import LineString, Point
from itertools import combinations
INF = 10**6
roads = [
LineString([(0, 0), (0, 4)]),
LineString([(0, 4), (4, 4)]),
LineString([(2, 4), (2, 0)]),
LineString([(1, 0), (3, 0), (3, 3)]),
]
coords = []
for road in roads:
c = list(road.coords)
for i in range(len(c) - 1):
coords.append(LineString([c[i], c[i + 1]]))
reds = [Point(0, 1), Point(3, 3)]
greens = [Point(1, 0), Point(4, 4)]
G = nx.Graph()
nodes = set()
for road in roads:
c = list(road.coords)
nodes |= set(c)
for n1, n2 in combinations(nodes, 2):
for c in coords:
if (n1 in c.coords or c.contains(Point(n1))) and (
n2 in c.coords or c.contains(Point(n2))
):
G.add_edge(n1, n2, weight=Point(n1).distance(Point(n2)))
for road in roads:
plt.plot(*road.xy, color="black")
for red_point in reds:
plt.scatter(red_point.x, red_point.y, color="red", s=100)
for green_point in greens:
plt.scatter(green_point.x, green_point.y, color="green", s=100)
def find_shortest(G, p1, p2):
try:
start_node = min(G.nodes, key=lambda node: Point(node).distance(p1))
end_node = min(G.nodes, key=lambda node: Point(node).distance(p2))
start_node_distance = Point(start_node).distance(p1)
end_node_distance = Point(end_node).distance(p2)
path = nx.astar_path(G, start_node, end_node, weight="weight")
path_line = LineString(path)
if start_node_distance != 0:
if path_line.contains(p1):
path[0] = p1
else:
path.insert(0, p1)
if end_node_distance != 0:
if path_line.contains(p2):
path[-1] = p2
else:
path.append(p2)
path_line = LineString(path)
path_length = path_line.length
return path_line, path_length
except nx.NetworkXNoPath:
return None, INF
def draw(path_line, path_length):
s_point = path_line.coords[0]
text_position = (s_point[0] + 0.5, s_point[1])
plt.plot(*path_line.xy, color="c", linestyle="dashed")
plt.text(
*text_position,
f"Length: {path_length:.2f}",
ha="center",
va="center",
color="purple",
)
opt_paths = [
min((find_shortest(G, r_point, g_point) for g_point in greens), key=lambda x: x[1])
for r_point in reds
]
for path in opt_paths:
draw(*path)
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.grid(True)
plt.show()