Search code examples
python-3.xopencv

Python OpenCV issue in chopping grid out of black border table from image


I am chopping the grid out of the table with a black border.

enter image description here

Here, I tried to code some program as follows:

import cv2 
import numpy as np 
 
# Load the image 
image = cv2.imread('img.jpg')
 
# Convert to grayscale 
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 
# Blur the image 
# blurred = cv2.GaussianBlur(gray, (5, 5), 0) 
blurred = cv2.bilateralFilter(gray, 11, 21, 7)
# Detect edges 
edges = cv2.Canny(blurred, 50, 150) 
# Find contours 
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 
 
# Filter contours 
rects = [] 
for contour in contours: 
    # Approximate the contour to a polygon 
    polygon = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, True), True) 
     
    # Check if the polygon has 4 sides and the aspect ratio is close to 1 
    if len(polygon) == 4 and abs(1 - cv2.contourArea(polygon) / (cv2.boundingRect(polygon)[2] * cv2.boundingRect(polygon)[3])) < 0.1: 
        rects.append(polygon) 
 
# Draw rectangles 
image_number = 0
for rect in rects: 
    x,y,w,h = cv2.boundingRect(rect)
    ROI = image[y:y+h, x:x+w]
    cv2.imwrite('img/ROI_{}.png'.format(image_number), ROI)
    cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
    image_number += 1

cv2.imwrite('img/choped_result.png'.format(image_number), image)
# Show the result 
cv2.imshow("Rectangles", image) 
cv2.waitKey(5000) 

I've tried 2 different blurring mechanism as following:

blurred = cv2.GaussianBlur(gray, (5, 5), 0) 
blurred = cv2.bilateralFilter(gray, 11, 21, 7)

But the result looks similar as follows:

enter image description here

The green part is the one that detected the word out.

I want to ask, is any way can I improve the rate to detect grid successfully?

Thanks,

Toby.


Solution

  • I suggest to refrain from using blur and use following approach to improve the results:

    • _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)

    • contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    • find contours with four edges and calculate the area of the contour to filter out the small ones

    • check within the contours for black pixel inside using a threshold value for further filtering the results

    Below an example of the result of following this approach:

    resultingImage

    Notice that you will need appropriate threshold values for the area and number of allowed black "island" pixels within the detected white areas to achieve exactly the same result.

    Below the code Which was used to create the above shown image:

    import cv2
    import numpy as np
    # Read the image
    image = cv2.imread('cv2bw.jpg')
    # Get the dimensions/size of the image
    height, width = image.shape[:2]
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Threshold the grayscale image to extract white regions
    _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
    # Find contours in the thresholded image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        # Approximate the contour to a polygon
        epsilon = 0.02 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
        # Filter contours based on the number of vertices (parallelograms have 4 vertices)
        if len(approx) == 4:
            # Compute the area of the contour
            area = cv2.contourArea(approx)
            # Specify a minimal size threshold
            minimal_size_threshold = 1000
            # Filter contours based on area
            if area > minimal_size_threshold:
                mask = np.zeros_like(gray)
                cv2.fillPoly(mask, [approx], (255, 255, 255))
                cv2.drawContours(mask, [approx], -1, (0,0,0), 3)
                # Check if there are any black pixels within the masked region
                if np.sum(cv2.bitwise_and(cv2.bitwise_not(thresh), mask) == 255) < 50:
                    # Draw the contours on the original image
                    cv2.drawContours(image, [approx], -1, (0, 255, 0), 2)
    # Display the result
    cv2.imshow('Detected Parallelograms', cv2.resize(image, ( int(width*0.5), int(height*0.5) ) )  )
    import time
    key=0
    while key != 113: # -> [q] for quit
        key = cv2.waitKeyEx(0)
        print(key)
        time.sleep(1)
    cv2.destroyAllWindows()
    

    Playing a bit around with the parameter the best combination I have found to get the areas with letters was: _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) if np.sum(cv2.bitwise_and(cv2.bitwise_not(thresh), mask) == 255) > 50:

    Here the result: inverted selection

    This is probably some room for further improvement, but the applied method won't be able to get the letters which dark parts overlay the cell grid lines. To get them all I suggest to try implementing guessing of the positions of not detected cells from the contours of the detected ones.