Search code examples
pythongeopandasshapely

Find the closest distance between points following a set of LineStrings


I have two datasets of Point coordinates (representing electrical installations) and would like to find the closest match between the two datasets following a given set of LineStrings (representing the cables). As an example let's take the following situation:

points1 = gpd.GeoDataFrame({'geometry' : [Point(0,1), Point(3,3)]})
points3 = gpd.GeoDataFrame({'geometry' : [Point(1,0), Point(4,4)]})
lines = gpd.GeoDataFrame({'geometry' : [LineString([Point(0,0),Point(0,4)]),LineString([Point(0,4),Point(4,4)]), LineString([Point(2,4),Point(2,0)]),LineString([Point(1,0),Point(3,0), Point(3,3)])]})

enter image description here

Here we can see that red point (0,1) is closest to green point (4,4) when following the lines (distance 7) and red point (3,3) is closest to green point (1,0).

How can I automatically calculate the closest green point for each red point? The lines can sometimes be split into smaller segments like in the example and not all red/green points are necessarily on the endpoint of one of the linestrings.


Solution

  • Building the graph is a bit tricky, I may be missing some edge cases. Considering intersections between lines is needed. The rest is using A*.

    import matplotlib.pyplot as plt
    import networkx as nx
    from shapely.geometry import LineString, Point
    from itertools import combinations
    
    INF = 10**6
    
    roads = [
        LineString([(0, 0), (0, 4)]),
        LineString([(0, 4), (4, 4)]),
        LineString([(2, 4), (2, 0)]),
        LineString([(1, 0), (3, 0), (3, 3)]),
    ]
    coords = []
    for road in roads:
        c = list(road.coords)
        for i in range(len(c) - 1):
            coords.append(LineString([c[i], c[i + 1]]))
    
    
    reds = [Point(0, 1), Point(3, 3)]
    greens = [Point(1, 0), Point(4, 4)]
    
    G = nx.Graph()
    
    nodes = set()
    for road in roads:
        c = list(road.coords)
        nodes |= set(c)
    
    for n1, n2 in combinations(nodes, 2):
        for c in coords:
            if (n1 in c.coords or c.contains(Point(n1))) and (
                n2 in c.coords or c.contains(Point(n2))
            ):
                G.add_edge(n1, n2, weight=Point(n1).distance(Point(n2)))
    
    for road in roads:
        plt.plot(*road.xy, color="black")
    
    for red_point in reds:
        plt.scatter(red_point.x, red_point.y, color="red", s=100)
    
    for green_point in greens:
        plt.scatter(green_point.x, green_point.y, color="green", s=100)
    
    
    def find_shortest(G, p1, p2):
        try:
            start_node = min(G.nodes, key=lambda node: Point(node).distance(p1))
            end_node = min(G.nodes, key=lambda node: Point(node).distance(p2))
            start_node_distance = Point(start_node).distance(p1)
            end_node_distance = Point(end_node).distance(p2)
    
            path = nx.astar_path(G, start_node, end_node, weight="weight")
            path_line = LineString(path)
            if start_node_distance != 0:
                if path_line.contains(p1):
                    path[0] = p1
                else:
                    path.insert(0, p1)
            if end_node_distance != 0:
                if path_line.contains(p2):
                    path[-1] = p2
                else:
                    path.append(p2)
            path_line = LineString(path)
            path_length = path_line.length
            return path_line, path_length
        except nx.NetworkXNoPath:
            return None, INF
    
    
    def draw(path_line, path_length):
        s_point = path_line.coords[0]
        text_position = (s_point[0] + 0.5, s_point[1])
        plt.plot(*path_line.xy, color="c", linestyle="dashed")
        plt.text(
            *text_position,
            f"Length: {path_length:.2f}",
            ha="center",
            va="center",
            color="purple",
        )
    
    
    opt_paths = [
        min((find_shortest(G, r_point, g_point) for g_point in greens), key=lambda x: x[1])
        for r_point in reds
    ]
    
    for path in opt_paths:
        draw(*path)
    
    
    plt.xlabel("X-axis")
    plt.ylabel("Y-axis")
    plt.grid(True)
    plt.show()
    

    output