Search code examples
pythonopencvimage-processingocr

OCR - How to recognize numbers inside square boxes using python?


one problem with optical character recognition (ocr) is it can't recognize numbers properly when numbers are inside square boxes. one failure example with tesseract is discussed here : Tesseract - How can I recognize numbers in box? i was testing with paddleocr here : https://www.paddlepaddle.org.cn/hub/scene/ocr you can quickly try that api too,,for this input image :

enter image description here it returns nothing..

again when i try image like this : enter image description here

it returns all the numbers successfully.most of the times these number recognition(both printed and handwritten) failing when they are inside square boxes.for recognizing numbers inside square boxes we need to convert these so called numbers in box image into numbers in image by removing all the square boxes. i have some images like below :

enter image description here enter image description here

enter image description here enter image description here

see, the full square box outside numbers are not fully visible,,only some part of the square boxes are visible.i want to convert these images into image where i will have only the numbers by removing square boxes or some part of square boxes that is present in these images after then hopefully number/digit recognition will work. i tried this code :

import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('/content/21.png')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
linek = np.zeros((11,11),dtype=np.uint8)
linek[...,5]=1
x=cv2.morphologyEx(gray, cv2.MORPH_OPEN, linek ,iterations=800)
gray-=x
plt.imshow(gray)
cv2.imwrite('21_output.jpg', gray)

output :

enter image description here

also tried this code :

import cv2
import numpy as np
import matplotlib.pyplot as plt

#https://stackoverflow.com/questions/57961119/how-to-remove-all-the-detected-lines-from-the-original-image-using-python

image = cv2.imread('/content/17.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Remove vertical
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

image = thresh - detected_lines
plt.imshow( image)

output :

enter image description here

unfortunately,it's not able to remove the unwanted lines completely.when it removes unwanted lines,it removes part of original digit/numbers as well. how can i remove those complete or incomplete square boxes outside each number in image? thanks in advance.


Solution

  • the code below for me is doing decent job but it's hyper parameter sensitive :

    import cv2
    import imutils
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib import pyplot as plt
      
    
    def square_number_box_denoiser(image_path="/content/9.png",is_resize = False, resize_width = 768):
        '''
        ref : https://pretagteam.com/question/removing-horizontal-lines-in-image-opencv-python-matplotlib
    
        Args : 
          image_path (str) : path of the image containing numbers/digits inside square box
          is_resize (int) : whether to resize the input image or not? default : False
          resize_width (int) : resizable image width for resizing the image by maintaining aspect ratio. default : 768 
    
        '''
        img=cv2.imread(image_path)
        if(is_resize):
          print("resizing...")
          img = imutils.resize(img, width=resize_width)
        image = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
        gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
        # Remove horizontal
        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
        detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
        cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]
        for c in cnts:
            cv2.drawContours(image, [c], -1, (255,255,255), 2)
    
        # Repair image
        repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,6))
        result = 255 - cv2.morphologyEx(255 - image, cv2.MORPH_CLOSE, repair_kernel, iterations=2)
    
        # create figure
        fig = plt.figure(figsize=(20, 20))
        # setting values to rows and column variables
        rows = 3
        columns = 3
    
        fig.add_subplot(rows,  columns, 1)
        plt.imshow(img)
        fig.add_subplot(rows,  columns, 2)
        plt.imshow(thresh)
        fig.add_subplot(rows,  columns, 3)
        plt.imshow(detected_lines)
        fig.add_subplot(rows,  columns, 4)
        plt.imshow(image)
        fig.add_subplot(rows,  columns, 5)
        plt.imshow(result)
        result = cv2.rotate(result,cv2.ROTATE_90_COUNTERCLOCKWISE)
        fig.add_subplot(rows,  columns, 6)
        plt.imshow(result)
        cv2.imwrite("result.jpg", result)
    
        plt.show()
    

    Outputs : enter image description here

    without resizing :

    enter image description here

    enter image description here

    enter image description here

    enter image description here

    with 768 resizing :

    enter image description here

    enter image description here

    enter image description here

    enter image description here