I have the following image:
I want to extract the boxed diagrams as so:
Here's what I've attempted:
import cv2
import matplotlib.pyplot as plt
# Load the image
image = cv2.imread('diagram.jpg')
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Find contours
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw the contours
cv2.drawContours(image, contours, -1, (0, 0, 255), 2)
# Show the final image
plt.imshow(image), plt.show()
However, I've realized it'll be difficult to extract the diagrams because the contours aren't closed:
I've tried using morphological closing to close the gaps in the box edges:
# Define a rectangular kernel for morphological closing
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# Perform morphological closing to close the gaps in the box edges
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
But this changes almost nothing. How should I approach this problem?
We may replace morphological closing with dilate then erode, but filling the contours between the dilate and erode.
For filling the gaps, the kernel size should be much larger than 5x5 (I used 51x51).
Assuming the handwritten boxes are colored, we may convert from BGR to HSV, and apply the threshold on the saturation channel of HSV:
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert from BGR to HSV color space
gray = hsv[:, :, 1] # Use saturation from HSV channel as "gray".
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU) # Apply automatic thresholding (use THRESH_OTSU).
Apply dilate with large kernel, and use drawContours
for filling the contours:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51)) # Use relatively large kernel for closing the gaps
dilated = cv2.dilate(thresh, kernel) # Dilate with large kernel
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(dilated, contours, -1, 255, -1)
Apply erode after filling the contours Erode after dilate is equivalent to closing, but here we are closing after filling.
closed = cv2.erode(dilated, kernel)
Code sample:
import cv2
import numpy as np
# Load the image
image = cv2.imread('diagram.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Convert from BGR to HSV color space
# Convert to grayscale
#gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = hsv[:, :, 1] # Use saturation from HSV channel as "gray".
# Apply thresholding to create a binary image
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU) # Apply automatic thresholding (use THRESH_OTSU).
thresh = np.pad(thresh, ((100, 100), (100, 100))) # Add zero padding (required due to large dilate kernels).
# Define a rectangular kernel for morphological operations.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51)) # Use relatively large kernel for closing the gaps
dilated = cv2.dilate(thresh, kernel) # Dilate with large kernel
# Fill the contours, before applying erode.
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(dilated, contours, -1, 255, -1)
closed = cv2.erode(dilated, kernel) # Apply erode after filling the contours.
closed = closed[100:-100, 100:-100] # Remove the padding.
# Find contours
contours, hierarchy = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw the contours
cv2.drawContours(image, contours, -1, (255, 0, 0), 2)
# Show images for testing
# plt.imshow(image), plt.show()
cv2.imshow('gray', gray)
cv2.imshow('thresh', thresh)
cv2.imshow('dilated', dilated)
cv2.imshow('closed', closed)
cv2.imshow('image', image)
cv2.waitKey()
cv2.destroyAllWindows()