Python & OpenCV: How to add lines to gridless table

I have the following table:

I want to write a script that creates lines based on the natural breakages on the table text. The result would look like this:

Is there an OpenCV implementation that makes drawing these lines possible? I looked at the answers to the questions here and here, but neither worked. What is the best approach to solving this problem?

Solution

Here is one way to get the horizontal lines in Python/OpenCV by counting the number of white pixels in each row of the image to find their center y values. The vertical lines can be added by a similar process.

Input:

import cv2
import numpy as np

# read image
img = cv2.imread("table.png")
hh, ww = img.shape[:2]

# convert to grayscale 
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

# threshold gray image
thresh = cv2.threshold(gray, 254, 255, cv2.THRESH_BINARY)[1]

# count number of non-zero pixels in each row
count = np.count_nonzero(thresh, axis=1)

# threshold count at ww (width of image)
count_thresh = count.copy()
count_thresh[count==ww] = 255
count_thresh[count<ww] = 0
count_thresh = count_thresh.astype(np.uint8)

# get contours
contours = cv2.findContours(count_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
    x,y,w,h = cv2.boundingRect(cntr)
    ycenter = y+h//2
    cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)

# write results
cv2.imwrite("table_thresh.png", thresh)
cv2.imwrite("table_lines.png", result)

# display results
cv2.imshow("THRESHOLD", thresh)
cv2.imshow("RESULT", result)
cv2.waitKey(0)

Threshold Image:

Result with lines:

ADDITION

Here is an alternate method that is slightly simpler. It averages the image down to one column rather than counting white pixels.

import cv2
import numpy as np

# read image
img = cv2.imread("table.png")
hh, ww = img.shape[:2]

# convert to grayscale 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# average gray image to one column
column = cv2.resize(gray, (1,hh), interpolation = cv2.INTER_AREA)

# threshold on white
thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]

# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
    x,y,w,h = cv2.boundingRect(cntr)
    ycenter = y+h//2
    cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 0), 2)

# write results
cv2.imwrite("table_lines2.png", result)

# display results
cv2.imshow("RESULT", result)
cv2.waitKey(0)

Result: