Python OpenCV issue in chopping grid out of black border table from image

I am chopping the grid out of the table with a black border.

Here, I tried to code some program as follows:

import cv2 
import numpy as np 
 
# Load the image 
image = cv2.imread('img.jpg')
 
# Convert to grayscale 
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 
# Blur the image 
# blurred = cv2.GaussianBlur(gray, (5, 5), 0) 
blurred = cv2.bilateralFilter(gray, 11, 21, 7)
# Detect edges 
edges = cv2.Canny(blurred, 50, 150) 
# Find contours 
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 
 
# Filter contours 
rects = [] 
for contour in contours: 
    # Approximate the contour to a polygon 
    polygon = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, True), True) 
     
    # Check if the polygon has 4 sides and the aspect ratio is close to 1 
    if len(polygon) == 4 and abs(1 - cv2.contourArea(polygon) / (cv2.boundingRect(polygon)[2] * cv2.boundingRect(polygon)[3])) < 0.1: 
        rects.append(polygon) 
 
# Draw rectangles 
image_number = 0
for rect in rects: 
    x,y,w,h = cv2.boundingRect(rect)
    ROI = image[y:y+h, x:x+w]
    cv2.imwrite('img/ROI_{}.png'.format(image_number), ROI)
    cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
    image_number += 1

cv2.imwrite('img/choped_result.png'.format(image_number), image)
# Show the result 
cv2.imshow("Rectangles", image) 
cv2.waitKey(5000)

I've tried 2 different blurring mechanism as following:

blurred = cv2.GaussianBlur(gray, (5, 5), 0) 
blurred = cv2.bilateralFilter(gray, 11, 21, 7)

But the result looks similar as follows:

The green part is the one that detected the word out.

I want to ask, is any way can I improve the rate to detect grid successfully?

Thanks,

Toby.

Solution

I suggest to refrain from using blur and use following approach to improve the results:

_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
find contours with four edges and calculate the area of the contour to filter out the small ones
check within the contours for black pixel inside using a threshold value for further filtering the results

Below an example of the result of following this approach:

Notice that you will need appropriate threshold values for the area and number of allowed black "island" pixels within the detected white areas to achieve exactly the same result.

Below the code Which was used to create the above shown image:

import cv2
import numpy as np
# Read the image
image = cv2.imread('cv2bw.jpg')
# Get the dimensions/size of the image
height, width = image.shape[:2]
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Threshold the grayscale image to extract white regions
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
# Find contours in the thresholded image
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
    # Approximate the contour to a polygon
    epsilon = 0.02 * cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    # Filter contours based on the number of vertices (parallelograms have 4 vertices)
    if len(approx) == 4:
        # Compute the area of the contour
        area = cv2.contourArea(approx)
        # Specify a minimal size threshold
        minimal_size_threshold = 1000
        # Filter contours based on area
        if area > minimal_size_threshold:
            mask = np.zeros_like(gray)
            cv2.fillPoly(mask, [approx], (255, 255, 255))
            cv2.drawContours(mask, [approx], -1, (0,0,0), 3)
            # Check if there are any black pixels within the masked region
            if np.sum(cv2.bitwise_and(cv2.bitwise_not(thresh), mask) == 255) < 50:
                # Draw the contours on the original image
                cv2.drawContours(image, [approx], -1, (0, 255, 0), 2)
# Display the result
cv2.imshow('Detected Parallelograms', cv2.resize(image, ( int(width*0.5), int(height*0.5) ) )  )
import time
key=0
while key != 113: # -> [q] for quit
    key = cv2.waitKeyEx(0)
    print(key)
    time.sleep(1)
cv2.destroyAllWindows()

Playing a bit around with the parameter the best combination I have found to get the areas with letters was: _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) if np.sum(cv2.bitwise_and(cv2.bitwise_not(thresh), mask) == 255) > 50:

Here the result:

This is probably some room for further improvement, but the applied method won't be able to get the letters which dark parts overlay the cell grid lines. To get them all I suggest to try implementing guessing of the positions of not detected cells from the contours of the detected ones.