Search code examples
pythonopencvhierarchycontour

Sort associated Hierarchy with already sorted Contours in OpenCv in Python


I am extracting innermost Contours from an Image(input.png) by using the following Code
(I am using Python 3.6.3 and opencv-python==3.4.0.12)

input.png

enter image description here

import copy
import cv2

BLACK_THRESHOLD = 200
THIN_THRESHOLD = 10
ANNOTATION_COLOUR = (0, 0, 255)

img = cv2.imread('input.png')
orig = copy.copy(img)
gray = cv2.cvtColor(img, 6)
thresh = cv2.threshold(gray, thresh=BLACK_THRESHOLD, maxval=255, type=cv2.THRESH_BINARY_INV)[1]

# Find the contours
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

hierarchy = hierarchy[0]  # get the actual inner list of hierarchy descriptions
idx = 0
# For each contour, find the bounding rectangle and extract it
for component in zip(contours, hierarchy):
    currentContour = component[0]
    currentHierarchy = component[1]
    x, y, w, h = cv2.boundingRect(currentContour)
    roi = img[y+2:y + h-2, x+2:x + w-2]
    # Skip thin contours (vertical and horizontal lines)
    if h < THIN_THRESHOLD or w < THIN_THRESHOLD:
        continue
    if h > 300 and w > 300:
        continue
    if h < 40 or w < 40:
        continue
    if currentHierarchy[3] > 0:
        # these are the innermost child components
        idx += 1
        cv2.imwrite(str(idx) + '.png', roi)

Result: enter image description here

As you can see extracted Images are not in any particular order. So to fix this I sorted contours on the basis of their x-axis coordinates. Below is the code:

import copy
import cv2

BLACK_THRESHOLD = 200
THIN_THRESHOLD = 10
ANNOTATION_COLOUR = (0, 0, 255)

img = cv2.imread('input.png')
orig = copy.copy(img)
gray = cv2.cvtColor(img, 6)
thresh = cv2.threshold(gray, thresh=BLACK_THRESHOLD, maxval=255, type=cv2.THRESH_BINARY_INV)[1]

# Find the contours
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

# Sort Contours on the basis of their x-axis coordinates in ascending order
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0
    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True
    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1
    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))
    # return the list of sorted contours
    return cnts


sorted_contours = sort_contours(contours)

idx = 0
# For each contour, find the bounding rectangle and extract it
for component in sorted_contours:
    currentContour = component
    x, y, w, h = cv2.boundingRect(currentContour)
    roi = img[y + 2:y + h - 2, x + 2:x + w - 2]
    # Skip thin contours (vertical and horizontal lines)
    if h < THIN_THRESHOLD or w < THIN_THRESHOLD:
        continue
    if h > 300 and w > 300:
        continue
    if h < 40 or w < 40:
        continue
    idx += 1
    print(x, idx)
    cv2.imwrite(str(idx) + '.png', roi)

Result:

enter image description here

This has sorted the contours perfectly. But now as you can see I am getting all the Contours(that is the reason of two copies of each digit) because I am not using Hierarchy but when I spent some time debugging I realized that only Contours are sorted but not their associated Hierarchy. So can anyone please tell me how to sort Hierarchy along with Contours so that I can get only the innermost Contours of the sorted Contours. Thank You!


Solution

  • Let's start with your first script, since that gave you good results that just were not sorted correctly.

    Observe that the only decision based on the hierarchy (when you decide whether or not to treat given contour as a digit) is currentHierarchy[3] > 0 Why don't we begin by selecting only the contours that match this criterion, and perform further processing only on this subset (not having to care about hierarchy any more).

    # Find the contours
    _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    
    hierarchy = hierarchy[0]  # get the actual inner list of hierarchy descriptions
    
    # Grab only the innermost child components
    inner_contours = [c[0] for c in zip(contours, hierarchy) if c[1][3] > 0]
    

    Now we're left only with the contours we're interested, we just have to sort them. We can reuse a simplified version of your original sorting function:

    # Sort Contours on the basis of their x-axis coordinates in ascending order
    def sort_contours(contours):
        # construct the list of bounding boxes and sort them from top to bottom
        boundingBoxes = [cv2.boundingRect(c) for c in contours]
        (contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes)
           , key=lambda b: b[1][0], reverse=False))
        # return the list of sorted contours
        return contours
    

    and get sorted contours:

    sorted_contours = sort_contours(inner_contours)
    

    Finally, we want to filter out the garbage and output the good contours correctly labelled:

    MIN_SIZE = 40
    MAX_SIZE = 300
    THIN_THRESHOLD = max(10, MIN_SIZE)
    PADDING = 2
    
    # ...
    
    idx = 0
    # For each contour, find the bounding rectangle and extract it
    for contour in sorted_contours:
        x, y, w, h = cv2.boundingRect(contour)
        roi = img[(y + PADDING):(y + h - PADDING), (x + PADDING):(x + w - PADDING)]
        # Skip thin contours (vertical and horizontal lines)
        if (h < THIN_THRESHOLD) or (w < THIN_THRESHOLD):
            continue
        if (h > MAX_SIZE) and (w > MAX_SIZE):
            continue
        idx += 1
        cv2.imwrite(str(idx) + '.png', roi)
    

    Full Script (using Python 2.7.x and OpenCV 3.4.1)

    import cv2
    
    BLACK_THRESHOLD = 200
    MIN_SIZE = 40
    MAX_SIZE = 300
    THIN_THRESHOLD = max(10, MIN_SIZE)
    FILE_NAME = "numbers.png"
    PADDING = 2
    
    # ============================================================================
    
    # Sort Contours on the basis of their x-axis coordinates in ascending order
    def sort_contours(contours):
        # construct the list of bounding boxes and sort them from top to bottom
        boundingBoxes = [cv2.boundingRect(c) for c in contours]
        (contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes)
           , key=lambda b: b[1][0], reverse=False))
        # return the list of sorted contours
        return contours
    
    # ============================================================================
    
    img = cv2.imread(FILE_NAME)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Don't use magic numbers
    thresh = cv2.threshold(gray, thresh=BLACK_THRESHOLD, maxval=255, type=cv2.THRESH_BINARY_INV)[1]
    
    # Find the contours
    _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    
    hierarchy = hierarchy[0]  # get the actual inner list of hierarchy descriptions
    
    # Grab only the innermost child components
    inner_contours = [c[0] for c in zip(contours, hierarchy) if c[1][3] > 0]
    
    sorted_contours = sort_contours(inner_contours)
    
    idx = 0
    # For each contour, find the bounding rectangle and extract it
    for contour in sorted_contours:
        x, y, w, h = cv2.boundingRect(contour)
        roi = img[(y + PADDING):(y + h - PADDING), (x + PADDING):(x + w - PADDING)]
        # Skip thin contours (vertical and horizontal lines)
        if (h < THIN_THRESHOLD) or (w < THIN_THRESHOLD):
            continue
        if (h > MAX_SIZE) and (w > MAX_SIZE):
            continue
        idx += 1
        cv2.imwrite(str(idx) + '.png', roi)
    

    and the images it produced:

    Screenshot of thumbnails of output images