Search code examples
pythonopencvimage-processingcomputer-visionmaps

Extract bordering countries from image using Python


I have maps which look like this with colored countries and black borders.

I need a way to extract all neighbouring countries (blue) for every country on the map (red).

The result should look like this:

{
    "country1":{
        "neighbours":["country2","country3","country4","country5","country6","country7","country8"]
    },
    "country2":{
        "neighbours":["country1","country8","country9","country10"]
    },
...
}

So far I have converted it to vector (.svg) using potrace and PIL as suggested to make the map easier to work with this result (An image of the SVG file. I can't upload SVG.)

import os
from PIL import Image

threshold = 620

file_in = "/Users/nils/Documents/map.png"
img = Image.open(file_in)
pixels = list(img.getdata())

newPixels = []
for pixel in pixels:
    if pixel[0]+pixel[1]+pixel[2] <= threshold:
        newPixel = (0, 0, 0)
    else:
        newPixel = (255, 255, 255)
    newPixels.append(newPixel)

newImg = Image.new(img.mode, img.size)
newImg.putdata(newPixels)

file_out = "test.bmp"
print (len(newImg.split()))  # test
if len(newImg.split()) == 4:
    print("converting")
    r, g, b, a = newImg.split()
    newImg = Image.merge("RGB", (r, g, b))
    newImg.save(file_out)
else:
    newImg.save(file_out)
    
    
os.system("potrace test.bmp --svg -o my.svg")

Solution

  • First the source image, second an illustration of the resulting graph (click them):

    enter image description here enter image description here

    The approach:

    1. Thresholding, to get one connected component per country
    2. Connected Components Labeling
    3. For each component:
      1. Dilate enough to reach across border lines and overlap with neighbors
      2. Intersect
      3. Collect the labels in that intersection
      4. Collect those relationships
    import numpy as np
    import cv2 as cv
    import matplotlib.pyplot as plt
    
    im = cv.imread("7oPxiEBe.png")
    (height, width) = im.shape[:2]
    
    gray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
    
    (th, mask) = cv.threshold(gray, 192, 255, cv.THRESH_BINARY)
    

    This picture looks like black lines on white background but you should see it as white blobs on black background:

    raw mask

    # remove small components
    # near egypt, the borders enclose 1-2 pixel sized components of sea
    
    (num_labels, labels, stats, centroids) = cv.connectedComponentsWithStats(mask, connectivity=4)
    for i in range(num_labels):
        if stats[i, cv.CC_STAT_AREA] <= 2:
            mask[labels == i] = 0
    

    cleaned up mask

    # the actual labeling
    (num_labels, labels, stats, centroids) = cv.connectedComponentsWithStats(mask, 8, cv.CV_32S)
    
    # calculate a central point for each component
    # that's just for visualization
    
    # give the "sea" label a border so its central point doesn't stick to a corner of the picture
    modified_mask = mask.copy()
    modified_mask[:,0] = 0
    modified_mask[:,-1] = 0
    modified_mask[0,:] = 0
    modified_mask[-1,:] = 0
    dist = cv.distanceTransform(modified_mask, cv.DIST_L2, cv.DIST_MASK_PRECISE)
    
    central_points = []
    for label in range(num_labels):
        component_dist = np.where(labels == label, dist, 0)
        (minVal, maxVal, minLoc, maxLoc) = cv.minMaxLoc(component_dist)
        central_points.append(maxLoc)
    
    central_points = np.array(central_points)
    

    DT with one peak per component

    # now let's establish neighbor relationships
    # for each component mask,
    #   dilate the component mask by a few pixels,
    #   calculate intersection with (original mask - component mask)
    #   collect the labels in the intersection
    #   own label might show up, shouldn't (assertion). any collected labels are neighbors.
    
    adjacency = np.zeros((num_labels, num_labels), dtype=bool)
    
    # label 0 (borders): neighbor to everyone, that's useless
    # label 1 (sea/background): neighbor to coastal countries
    for label in range(num_labels):
        if label == 0: continue # borders themselves
        # if label == 1: continue # non-continent (sea, other continents)
    
        component = (labels == label)
        noncomponent = mask.astype(bool) & ~component
        dilated_component = cv.dilate(component.astype(np.uint8), kernel=None, iterations=3).astype(bool)
        # NOTE: iterations=1 gives nothing, 2 gives *something* but with obvious missing edges.
        intersection = noncomponent & dilated_component
    
        neighbor_labels = np.unique(labels[intersection])
        adjacency[label, neighbor_labels] = True
        print(f"neighbors of {label:2d}: {set(neighbor_labels)}")
    
    neighbors of  1: {2, 3, 4, 5, 6, 8, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 38, 39, 40, 43, 44, 47, 48, 50, 52}
    neighbors of  2: {1, 3, 5}
    neighbors of  3: {1, 2, 4, 5, 8, 10, 11}
    neighbors of  4: {1, 3, 7, 8, 9}
    neighbors of  5: {1, 2, 3, 6, 11, 12, 13}
    neighbors of  6: {1, 13, 5}
    neighbors of  7: {8, 4}
    neighbors of  8: {1, 3, 4, 7, 9, 10, 15}
    neighbors of  9: {8, 4}
    neighbors of 10: {3, 8, 11, 15, 16, 22, 31}
    neighbors of 11: {3, 5, 10, 12, 16, 19, 20, 24}
    neighbors of 12: {5, 11, 13, 20, 30}
    neighbors of 13: {1, 5, 6, 12, 14, 17, 25, 30}
    neighbors of 14: {1, 13, 17, 23}
    neighbors of 15: {1, 8, 10, 18, 21, 22}
    neighbors of 16: {10, 11, 24, 28, 29, 31}
    neighbors of 17: {34, 13, 14, 23, 25, 26, 27}
    neighbors of 18: {1, 15}
    neighbors of 19: {24, 1, 11, 20}
    neighbors of 20: {1, 37, 38, 39, 11, 12, 19, 30}
    neighbors of 21: {1, 22, 15}
    neighbors of 22: {32, 1, 33, 10, 15, 21, 31}
    neighbors of 23: {1, 27, 14, 17}
    neighbors of 24: {1, 11, 16, 19, 29}
    neighbors of 25: {34, 35, 36, 13, 17, 30}
    neighbors of 26: {1, 34, 27, 17}
    neighbors of 27: {1, 26, 17, 23}
    neighbors of 28: {16, 1, 29, 31}
    neighbors of 29: {16, 1, 28, 24}
    neighbors of 30: {35, 37, 12, 13, 20, 25}
    neighbors of 31: {1, 33, 10, 16, 22, 28}
    neighbors of 32: {1, 22, 33}
    neighbors of 33: {32, 1, 22, 31}
    neighbors of 34: {1, 36, 40, 17, 25, 26}
    neighbors of 35: {1, 36, 37, 40, 41, 42, 43, 44, 45, 25, 30}
    neighbors of 36: {34, 35, 40, 41, 25}
    neighbors of 37: {1, 35, 39, 43, 20, 30}
    neighbors of 38: {1, 20, 39}
    neighbors of 39: {1, 20, 37, 38}
    neighbors of 40: {1, 34, 35, 36, 41, 42, 45, 46, 47}
    neighbors of 41: {40, 42, 35, 36}
    neighbors of 42: {40, 41, 35}
    neighbors of 43: {1, 35, 37}
    neighbors of 44: {1, 50, 35, 45}
    neighbors of 45: {35, 40, 44, 46, 47, 49, 50, 51}
    neighbors of 46: {40, 45, 47}
    neighbors of 47: {1, 40, 45, 46, 49, 52, 53}
    neighbors of 48: {1}
    neighbors of 49: {51, 52, 45, 47}
    neighbors of 50: {1, 44, 45, 51, 52}
    neighbors of 51: {49, 50, 52, 45}
    neighbors of 52: {1, 47, 49, 50, 51, 53, 54}
    neighbors of 53: {52, 47}
    neighbors of 54: {52}
    
    # illustration
    scale = 4
    canvas = cv.resize((im >> 1), dsize=None, fx=scale, fy=scale, interpolation=cv.INTER_NEAREST)
    for i in range(2, num_labels):
        for j in range(i+1, num_labels):
            if adjacency[i, j]:
                #cv.line(canvas, tuple(central_points[i]), tuple(central_points[j]), (0, 255, 0), 1)
                cv.line(
                    img=canvas,
                    pt1=((central_points[i]+0.5) * scale).astype(int),
                    pt2=((central_points[j]+0.5) * scale).astype(int),
                    #color=(186, 186, 186),
                    color=(255, 255, 255),
                    thickness=1,
                    lineType=cv.LINE_AA
                )
    for i in range(2, num_labels):
        cv.putText(
            img=canvas,
            text=f"{i}",
            org=((central_points[i]+0.5) * scale).astype(int),
            fontFace=cv.FONT_HERSHEY_SIMPLEX, fontScale=0.5 * scale, color=(255, 255, 255), thickness=round(1 * scale)
        )
    

    enter image description here