Search code examples
opencvimage-processingcomputer-visionpython-tesseractpytesser

Pytesseract Not Recognising Text


I am trying to use Pytesseract to read the digits from the following image:

Low Resolution Image

Unfortunately, the program is not returning with any solution, even after using greyscale, thresholding, noise detection or canny edge detection. When using a config to whitelist only digits and $/, the program stops detecting even the high resolution image. (here)

The code is as follows:


class NumberAnalyser:

    # boilerplate code to pre-process image
    # get grayscale image
    def get_grayscale(self, image):
        return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # noise removal
    def remove_noise(self, image):
        return cv2.medianBlur(image, 5)

    # thresholding
    def thresholding(self, image):
        gray = self.get_grayscale(image)
        (T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
        # visualize only the masked regions in the image
        masked = cv2.bitwise_not(gray, gray, mask=threshInv)
        ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
        ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
        ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
        ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
        ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
        return thresh4

    # dilation
    def dilate(self, image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.dilate(image, kernel, iterations=1)

    # erosion
    def erode(self, image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.erode(image, kernel, iterations=1)

    # opening - erosion followed by dilation
    def opening(self, image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

    # canny edge detection
    def canny(self, image):
        return cv2.Canny(image, 100, 200)

    # skew correction
    def deskew(self, image):
        coords = np.column_stack(np.where(image > 0))
        angle = cv2.minAreaRect(coords)[-1]
        if angle < -45:
            angle = -(90 + angle)
        else:
            angle = -angle
            (h, w) = image.shape[:2]
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
            return rotated

    # template matching
    def match_template(self, image, template):
        return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)

    def numbers(self, img_path):

        reader = cv2.imread(img_path)
        # reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'

        gray = self.get_grayscale(reader)
        thresh = self.thresholding(reader)
        opening = self.opening(reader)
        canny = self.canny(reader)
        noiseless = self.remove_noise(reader)

        # cv2.imshow('canny', canny)
        # cv2.waitKey(0)
        # cv2.imshow('gray', gray)
        # cv2.waitKey(0)
        cv2.imshow('threshold', thresh)
        cv2.waitKey(0)
        # cv2.imshow('opening', opening)
        # cv2.waitKey(0)
        # cv2.imshow('noise removal', noiseless)
        # cv2.waitKey(0)
        # cv2.imshow('og', reader)
        # cv2.waitKey(0)

        print('yes')
        print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))

The --psm 11 configuration addition/deletion does not change anything.

Any help would be super appreciated!


Solution

  • You apply multiple simple thresholding consecutively, but you should also test it with other types of thresholding such as adaptive and inRange.

    For example, if you use inRange thresholding for the given example:

    The result for the high resolution image will be:

    enter image description here

    The output for the 0.38 version:

    20000
    4.000
    100
    

    The result for the low resolution image will be:

    enter image description here

    The output for the 0.38 version:

    44.900
    16.000
    34
    

    Unfortunately, only the middle number is recognized correctly. If you set the range values, the resulting image may give a better result.

    For more read: Improving the quality of the output Tesseract documentation

    Code:

    import cv2
    import pytesseract
    from numpy import array
    
    img = cv2.imread("eO1XG.png")  # Load the images: high-res: l9Zbt.png, low-res: eO1XG.png
    img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    msk = cv2.inRange(img, array([94, 0, 196]), array([179, 84, 255]))  # for low resolution
    # msk = cv2.inRange(img, array([0, 0, 0]), array([179, 26, 255]))  # for high resolution
    krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
    dlt = cv2.dilate(msk, krn, iterations=1)
    thr = 255 - cv2.bitwise_and(dlt, msk)
    txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
    print(txt)
    cv2.imshow("", thr)
    cv2.waitKey(0)