Search code examples
pythonopencvtesseractimage-segmentationpython-tesseract

Segmenting image files with text (and pictures) into blocks


I'm trying to create bounding boxes for the text in an image I have. An example is the one below.

I would like to add a bounding box around each This is a test line. Unfortunately I'm not sure why this method is not automatically identifying the bounding boxes

import re
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
from matplotlib import pyplot as plt


# Plot character boxes on image using pytesseract.image_to_boxes() function
image = cv2.imread('Image.jpg')
b, g, r = cv2.split(image)
image = cv2.merge([r,g,b])
d = pytesseract.image_to_data(image, output_type=Output.DICT)
print('DATA KEYS: \n', d.keys())

n_boxes = len(d['text'])
for i in range(n_boxes):
    # condition to only pick boxes with a confidence > 60%
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        image = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

b, g, r = cv2.split(image)
rgb_img = cv2.merge([r, g, b])
plt.figure(figsize=(16, 12))
plt.imshow(rgb_img)
plt.title('SAMPLE IMAGE WITH WORD LEVEL BOXES')
plt.show()

enter image description here


Solution

  • Here is a different way to do that with Python/OpenCV.

    • Read the input
    • Convert to gray
    • (OTSU) Threshold (white text on black background)
    • Apply morphology dilate with horizontal kernel longer than letter spacing and then smaller vertical kernel to remove thin horizontal lines remaining from line in page.
    • Find contours
    • Draw bounding boxes of contours on input
    • Save result

    Input:

    enter image description here

    import cv2
    import numpy as np
    
    # load image
    img = cv2.imread("test_text.jpg")
    
    # convert to gray
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # threshold the grayscale image
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    # use morphology erode to blur horizontally
    #kernel = np.ones((500,3), np.uint8)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (250, 3))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 17))
    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
    
    # find contours
    cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
    
    # Draw contours
    result = img.copy()
    for c in cntrs:
        x,y,w,h = cv2.boundingRect(c)
        cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
    
    # write result to disk
    cv2.imwrite("test_text_threshold.png", thresh)
    cv2.imwrite("test_text_morph.png", morph)
    cv2.imwrite("test_text_lines.jpg", result)
    
    cv2.imshow("GRAY", gray)
    cv2.imshow("THRESH", thresh)
    cv2.imshow("MORPH", morph)
    cv2.imshow("RESULT", result)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    


    Thresholded image:

    enter image description here

    Dilated image:

    enter image description here

    Result:

    enter image description here