Search code examples
pythonopencvimage-processing

Add lines if a table is borderless using OpenCV in Python


I am currently working on a project that involves processing images of tables without borders. My goal is to add gridlines to these tables using OpenCV in Python. I came across a solution on Stack Overflow (this), which seemed promising, but unfortunately, it doesn't work as expected for my image. The code I followed -

import cv2
import numpy as np

# read the input
img = cv2.imread('/content/sample_data/Untitled.png')
hh, ww = img.shape[:2]

# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# threshold on white
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)[1]

# average gray image to one row
row = cv2.resize(thresh, (ww, 1), interpolation = cv2.INTER_AREA)

# threshold on white
thresh1 = cv2.threshold(row, 254, 255, cv2.THRESH_BINARY)[1]

# apply small amount of morphology to merge period with column of text
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (5,1))
thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel)

# get contours
contours = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
result1 = img.copy()
for cntr in contours:
    x, y, w, h = cv2.boundingRect(cntr)
    xcenter = x + w // 2
    cv2.line(result1, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)
    cv2.line(result, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)

# average gray image to one column
column = cv2.resize(thresh, (1, hh), interpolation = cv2.INTER_AREA)

# threshold on white
thresh2 = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
# get contours

contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
for cntr in contours:
    x, y, w, h = cv2.boundingRect(cntr)
    ycenter = y + h // 2
    cv2.line(result2, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)
    cv2.line(result, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)

# save results
cv2.imwrite('table4_threshold.png', thresh)
cv2.imwrite('table4_column_lines.png', result1)
cv2.imwrite('table4_row_lines.png', result2)
cv2.imwrite('table4_column_and_row_lines.png', result)

Input Image: enter image description here

Expected Output: enter image description here

Actual Output: enter image description here

Has anyone encountered a similar issue when working with borderless tables and OpenCV?


Solution

  • Here is a variation of my second method on the link you referenced. The main change is to crop your image to the bounds of the table. Then remove the horizontal lines. Then use that second method to add new lines.

    The issue you were having was not adjusting the threshold down a little to account for your existing vertical black lines.

    Input:

    enter image description here

    import cv2
    import numpy as np
    
    # read image
    img = cv2.imread("table.png")
    
    # convert to grayscale 
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # invert
    gray_inv = 255 - gray
    
    # threshold
    gray_inv[gray_inv!=255] = 0
    
    # get bounding box
    x,y,w,h = cv2.boundingRect(gray_inv)
    
    # crop gray image
    crop = gray[y:y+h, x:x+w]
    hh, ww = crop.shape[:2]
    
    # remove horizontal lines
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ww,1))
    morph = cv2.morphologyEx(crop, cv2.MORPH_CLOSE, kernel)
    crop[morph<255] = 255
    
    # average crop image to one column
    column = cv2.resize(crop, (1,hh), interpolation = cv2.INTER_AREA)
    
    # threshold on white
    thresh = cv2.threshold(column, 245, 255, cv2.THRESH_BINARY)[1]
    
    # get contours
    contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    
    # loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
    result = crop.copy()
    for cntr in contours:
        x,y,w,h = cv2.boundingRect(cntr)
        ycenter = y+h//2
        cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)
    
    # write results
    cv2.imwrite("table_lines2.png", result)
    
    # display results
    cv2.imshow("CROP", crop)
    cv2.imshow("MORPH", morph)
    cv2.imshow("RESULT", result)
    cv2.waitKey(0)
    

    Result:

    enter image description here