Search code examples
pythonimageopencvimage-processingbounding-box

Table without a grid, draw horizontal and vertical grid lines


I have the following image:

gridless input image

Note, the above image is just an example, I need a solution that will work on every image, my images can be with different sizes, more empty cells or less empty cells, and so on (I'll try to always remove the gridlines before I use the solution I'll find here but still).

I'm trying to draw on it the horizontal and vertical images.

Horizontal function:

def getHorizontalCnt(old_image):
  # read image
  img = old_image.copy() # cv2.imread(image_path1)
  hh, ww = img.shape[:2]
  # convert to grayscale 
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  # average gray image to one row
  row = cv2.resize(gray, (1,hh), interpolation = cv2.INTER_AREA)
  # threshold on white
  thresh = cv2.threshold(row, 240, 255, cv2.THRESH_BINARY_INV)[1]
  plt.imshow(thresh)
  # get contours
  contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  contours = contours[0] if len(contours) == 2 else contours[1]
  return contours, ww

Vertical function:

def getVerticalCnt(old_image):
  # read image
  img = old_image.copy() # cv2.imread(image_path1)
  hh, ww = img.shape[:2]
  # convert to grayscale 
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  # average gray image to one column
  column = cv2.resize(gray, (ww, 1), interpolation = cv2.INTER_AREA)
  # threshold on white
  thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
  # get contours
  contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  contours = contours[0] if len(contours) == 2 else contours[1]
  return contours, hh

Applying them:

original_image = cv2.imread(image_path)
# Horizontal lines
contours_h, ww_ = getHorizontalCnt(original_image.copy())
# Vertical lines
contours_v, hh_ = getVerticalCnt(original_image.copy()) 
# Draw horizontal
for cntr in contours_h:
    x, y, w, h = cv2.boundingRect(cntr)
    ycenter = y + h //2
    cv2.line(original_image, (0, ycenter), (ww_ - 1, ycenter), (0, 0, 0), 1)
# Draw vertical
for cntr in contours_v:
    x, y, w, h = cv2.boundingRect(cntr)
    xcenter = x + w // 2
    cv2.line(original_image, (xcenter, 0), (xcenter, hh_ - 1), (0, 0, 0), 1)
    
plt.imshow(original_image)

Result: wrong gridded table

If I change in the # Draw horizontal area from ycenter = y + h //2 to ycenter = y + h + 2 I get the following result:

only horizontal seem ok

In both functions, I tried thresholds from 240 to 255 by raising and lowering 1 by 1 each time, but none worked for me (sometimes I got a different result but still not a good one, the above are the best results I got so far).

Update, extra background images:

Input: input extra

Output: output extra

Update input image without matplotlib.pyplot.imshow()

raw image outside of python

Image link: https://i.sstatic.net/YR1SV.jpg


Solution

  • The following draws your lines on your table in Python/OpenCV.

    • Read the input
    • Convert to gray
    • threshold (at 220, but may be image dependent) to remove the noise in your image. Note you won't see the noise unless you threshold at a high value)
    • Average the threshold image to one row
    • Threshold that at 254
    • Apply a small horizontal morphology open to connect the period in the 5th text column header so no extra line is drawn there.
    • Get the contours and bounding box and the X centers.
    • Draw contours as vertical lines at the X coordinates on a copy of the input for the vertical lines result and on a second copy of the input (for the final result)
    • Average the threshold to one column
    • Threshold that at 254
    • Get the contours and bounding box and the Y centers.
    • Draw contours as horizontal lines at the Y coordinates on a copy of the input for the horizontal lines result and on the previous second copy of the input (for the final result)
    • Save results

    Input:

    enter image description here

    import cv2
    import numpy as np
    
    # read the input
    img = cv2.imread('table4b.png')
    hh, ww = img.shape[:2]
    
    # convert to grayscale 
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # threshold on white
    thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)[1]
    
    # average gray image to one row
    row = cv2.resize(thresh, (ww, 1), interpolation = cv2.INTER_AREA)
    
    # threshold on white
    thresh1 = cv2.threshold(row, 254, 255, cv2.THRESH_BINARY)[1]
    
    # apply small amount of morphology to merge period with column of text
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (5,1))
    thresh1 = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel)
    
    # get contours
    contours = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    result = img.copy()
    result1 = img.copy()
    for cntr in contours:
        x, y, w, h = cv2.boundingRect(cntr)
        xcenter = x + w // 2
        cv2.line(result1, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)
        cv2.line(result, (xcenter,0), (xcenter, hh-1), (0, 0, 0), 1)
    
    # average gray image to one column
    column = cv2.resize(thresh, (1, hh), interpolation = cv2.INTER_AREA)
    
    # threshold on white
    thresh2 = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
    # get contours
    
    contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    result2 = img.copy()
    for cntr in contours:
        x, y, w, h = cv2.boundingRect(cntr)
        ycenter = y + h // 2
        cv2.line(result2, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)
        cv2.line(result, (0, ycenter), (ww-1, ycenter), (0, 0, 0), 1)
        
    # save results
    cv2.imwrite('table4_threshold.png', thresh)
    cv2.imwrite('table4_column_lines.png', result1)
    cv2.imwrite('table4_row_lines.png', result2)
    cv2.imwrite('table4_column_and_row_lines.png', result)
       
    # show result
    cv2.imshow('thresh', thresh)
    cv2.imshow('result1', result1)
    cv2.imshow('result2', result2)
    cv2.imshow('result', result)
    cv2.waitKey(0)
    

    Threshold Image:

    enter image description here

    Row Lines Image:

    enter image description here

    Column Lines Image:

    enter image description here

    Row and Column Lines Image:

    enter image description here