python image opencv image-processing bounding-box

Table without a grid, draw horizontal and vertical grid lines

I have the following image:

Note, the above image is just an example, I need a solution that will work on every image, my images can be with different sizes, more empty cells or less empty cells, and so on (I'll try to always remove the gridlines before I use the solution I'll find here but still).

I'm trying to draw on it the horizontal and vertical images.

Horizontal function:

def getHorizontalCnt(old_image):
  # read image
  img = old_image.copy() # cv2.imread(image_path1)
  hh, ww = img.shape[:2]
  # convert to grayscale 
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  # average gray image to one row
  row = cv2.resize(gray, (1,hh), interpolation = cv2.INTER_AREA)
  # threshold on white
  thresh = cv2.threshold(row, 240, 255, cv2.THRESH_BINARY_INV)[1]
  plt.imshow(thresh)
  # get contours
  contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  contours = contours[0] if len(contours) == 2 else contours[1]
  return contours, ww

Vertical function:

def getVerticalCnt(old_image):
  # read image
  img = old_image.copy() # cv2.imread(image_path1)
  hh, ww = img.shape[:2]
  # convert to grayscale 
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  # average gray image to one column
  column = cv2.resize(gray, (ww, 1), interpolation = cv2.INTER_AREA)
  # threshold on white
  thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
  # get contours
  contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  contours = contours[0] if len(contours) == 2 else contours[1]
  return contours, hh

Applying them:

original_image = cv2.imread(image_path)
# Horizontal lines
contours_h, ww_ = getHorizontalCnt(original_image.copy())
# Vertical lines
contours_v, hh_ = getVerticalCnt(original_image.copy()) 
# Draw horizontal
for cntr in contours_h:
    x, y, w, h = cv2.boundingRect(cntr)
    ycenter = y + h //2
    cv2.line(original_image, (0, ycenter), (ww_ - 1, ycenter), (0, 0, 0), 1)
# Draw vertical
for cntr in contours_v:
    x, y, w, h = cv2.boundingRect(cntr)
    xcenter = x + w // 2
    cv2.line(original_image, (xcenter, 0), (xcenter, hh_ - 1), (0, 0, 0), 1)
    
plt.imshow(original_image)

Result:

If I change in the # Draw horizontal area from ycenter = y + h //2 to ycenter = y + h + 2 I get the following result:

In both functions, I tried thresholds from 240 to 255 by raising and lowering 1 by 1 each time, but none worked for me (sometimes I got a different result but still not a good one, the above are the best results I got so far).

Update, extra background images:

Input:

Output:

Update input image without `matplotlib.pyplot.imshow()`

Image link: https://i.sstatic.net/YR1SV.jpg

Solution

The following draws your lines on your table in Python/OpenCV.

Read the input
Convert to gray
threshold (at 220, but may be image dependent) to remove the noise in your image. Note you won't see the noise unless you threshold at a high value)
Average the threshold image to one row
Threshold that at 254
Apply a small horizontal morphology open to connect the period in the 5th text column header so no extra line is drawn there.
Get the contours and bounding box and the X centers.
Draw contours as vertical lines at the X coordinates on a copy of the input for the vertical lines result and on a second copy of the input (for the final result)
Average the threshold to one column
Threshold that at 254
Get the contours and bounding box and the Y centers.
Draw contours as horizontal lines at the Y coordinates on a copy of the input for the horizontal lines result and on the previous second copy of the input (for the final result)
Save results

Input:

import cv2
import numpy as np

# read the input
img = cv2.imread('table4b.png')
hh, ww = img.shape[:2]

# convert to grayscale 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# threshold on white
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)[1]

# average gray image to one row
row = cv2.resize(thresh, (ww, 1), interpolation = cv2.INTER_AREA)

# threshold on white
thresh1 = cv2.threshold(row, 254, 255, cv2.THRESH_BINARY)[1]

# apply small amount of morphology to merge period with column of text
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (5,1))
thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel)

# get contours
contours = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
result1 = img.copy()
for cntr in contours:
    x, y, w, h = cv2.boundingRect(cntr)
    xcenter = x + w // 2
    cv2.line(result1, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)
    cv2.line(result, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)

# average gray image to one column
column = cv2.resize(thresh, (1, hh), interpolation = cv2.INTER_AREA)

# threshold on white
thresh2 = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
# get contours

contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
for cntr in contours:
    x, y, w, h = cv2.boundingRect(cntr)
    ycenter = y + h // 2
    cv2.line(result2, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)
    cv2.line(result, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)
    
# save results
cv2.imwrite('table4_threshold.png', thresh)
cv2.imwrite('table4_column_lines.png', result1)
cv2.imwrite('table4_row_lines.png', result2)
cv2.imwrite('table4_column_and_row_lines.png', result)
   
# show result
cv2.imshow('thresh', thresh)
cv2.imshow('result1', result1)
cv2.imshow('result2', result2)
cv2.imshow('result', result)
cv2.waitKey(0)

Threshold Image:

Row Lines Image:

Column Lines Image:

Row and Column Lines Image: