Add lines if a table is borderless using OpenCV in Python

I am currently working on a project that involves processing images of tables without borders. My goal is to add gridlines to these tables using OpenCV in Python. I came across a solution on Stack Overflow (this), which seemed promising, but unfortunately, it doesn't work as expected for my image. The code I followed -

import cv2
import numpy as np

# read the input
img = cv2.imread('/content/sample_data/Untitled.png')
hh, ww = img.shape[:2]

# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# threshold on white
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)[1]

# average gray image to one row
row = cv2.resize(thresh, (ww, 1), interpolation = cv2.INTER_AREA)

# threshold on white
thresh1 = cv2.threshold(row, 254, 255, cv2.THRESH_BINARY)[1]

# apply small amount of morphology to merge period with column of text
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (5,1))
thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel)

# get contours
contours = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
result1 = img.copy()
for cntr in contours:
    x, y, w, h = cv2.boundingRect(cntr)
    xcenter = x + w // 2
    cv2.line(result1, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)
    cv2.line(result, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)

# average gray image to one column
column = cv2.resize(thresh, (1, hh), interpolation = cv2.INTER_AREA)

# threshold on white
thresh2 = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
# get contours

contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
for cntr in contours:
    x, y, w, h = cv2.boundingRect(cntr)
    ycenter = y + h // 2
    cv2.line(result2, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)
    cv2.line(result, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)

# save results
cv2.imwrite('table4_threshold.png', thresh)
cv2.imwrite('table4_column_lines.png', result1)
cv2.imwrite('table4_row_lines.png', result2)
cv2.imwrite('table4_column_and_row_lines.png', result)

Input Image:

Expected Output:

Actual Output:

Has anyone encountered a similar issue when working with borderless tables and OpenCV?

Solution

Here is a variation of my second method on the link you referenced. The main change is to crop your image to the bounds of the table. Then remove the horizontal lines. Then use that second method to add new lines.

The issue you were having was not adjusting the threshold down a little to account for your existing vertical black lines.

Input:

import cv2
import numpy as np

# read image
img = cv2.imread("table.png")

# convert to grayscale 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# invert
gray_inv = 255 - gray

# threshold
gray_inv[gray_inv!=255] = 0

# get bounding box
x,y,w,h = cv2.boundingRect(gray_inv)

# crop gray image
crop = gray[y:y+h, x:x+w]
hh, ww = crop.shape[:2]

# remove horizontal lines
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ww,1))
morph = cv2.morphologyEx(crop, cv2.MORPH_CLOSE, kernel)
crop[morph<255] = 255

# average crop image to one column
column = cv2.resize(crop, (1,hh), interpolation = cv2.INTER_AREA)

# threshold on white
thresh = cv2.threshold(column, 245, 255, cv2.THRESH_BINARY)[1]

# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = crop.copy()
for cntr in contours:
    x,y,w,h = cv2.boundingRect(cntr)
    ycenter = y+h//2
    cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)

# write results
cv2.imwrite("table_lines2.png", result)

# display results
cv2.imshow("CROP", crop)
cv2.imshow("MORPH", morph)
cv2.imshow("RESULT", result)
cv2.waitKey(0)

Result: