Search code examples
pythonimage-processingocrtesseractpython-tesseract

Prepping Image w/ Reflective Background for PyTesseract


I am trying to build a OCR system to extract a serial number from hundreds of labels. I am running the image through opencv and pytesseract to get the full text but I am having trouble clearing the background for PyTesseract to work properly.

The region of interest I am trying to extract information out from looks like the following (I blocked two characters for privacy).

ROI In attempt to improve performance, I have split the 3 line serial number into 3 separate ROIs.

The following code is what I have to produce the first line.

Current Status

Which pytesseract spits out '7IP29 AGH2TR:\n'.

import cv2 as cv
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd

#Tesseract Library
import pytesseract
import re

from PIL import Image

pytesseract.pytesseract.tesseract_cmd = r'/usr/local/Cellar/tesseract/5.3.0_1/bin/tesseract'

# In[Img Load]

image_path = '/Users/cfr/Desktop/20230308_111250.jpg'

img = cv.imread(image_path,0)


print('Original Dimensions : ',img.shape)

scale_percent = 25 # percent of original size
width = int(img.shape[3] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)

# resize image
resized = cv.resize(img, dim, interpolation = cv.INTER_AREA)

print('Resized Dimensions : ',resized.shape)

# In[ROI]
roi1 = (263, 252, 226, 43)
roi2 = (265, 288, 224, 32)
roi3 = (274, 320, 106, 32)
# In[Cropped ROI]
def roi_cropper(_image, _roi):
 roi_cropped = _image[int(_roi[3]):int(_roi[3]+_roi[3]), int(_roi[0]):int(_roi[0]+_roi[4])]
 
 return roi_cropped

roi_img1 = roi_cropper(resized, roi1)
roi_img2 = roi_cropper(resized, roi2)
roi_img3 = roi_cropper(resized, roi3)

# In[BlackHat]
# initialize a rectangular and s quare structuring kernel
x = 5
y = 2
kernel = cv.getStructuringElement(cv.MORPH_RECT, (x, y))

gray = cv.GaussianBlur(roi_img1, (5, 5), 0)
blackhat = cv.morphologyEx(gray, cv.MORPH_BLACKHAT, kernel)

blackhat_dilated = cv.dilate(blackhat, None, iterations=1)
plt.imshow(blackhat_dilated)

# In[Tesseract]

text = pytesseract.image_to_string(blackhat_dilated, config='--psm 2')

print(text)

Solution

  • You will maybe never get the perfect result. I played a little bit with the picture parameters. Here my code, maybe some help. Could you improve your image? And you should use b/w images only:

    import subprocess
    import cv2
    import pytesseract
    
    # Image manipulation
    # Commands https://imagemagick.org/script/convert.php
    mag_img = r'D:\Programme\ImageMagic\magick.exe'
    con_bw = r"D:\Programme\ImageMagic\convert.exe" 
    
    in_file = r'D:\Daten\..\stackoverflow\ID.jpg'
    out_file = r'D:\Daten\..\stackoverflow\ID_bw.jpg'
    
    # Play with black and white and contrast for better results
    process = subprocess.run([con_bw, in_file, "-threshold","18%", "-brightness-contrast","-10x30", out_file])
    
    # Text ptocessing
    pytesseract.pytesseract.tesseract_cmd=r'C:\Program Files\Tesseract-OCR\tesseract.exe'
    img = cv2.imread(out_file)
    
    # Parameters see tesseract doc 
    custom_config = r'--psm 3 --oem 3 -c tessedit_char_whitelist=0123456789ABCDEFHIJKLMNOPQRSTUVWXYZ'
    
    tex = pytesseract.image_to_string(img, config=custom_config)
    print(tex)
    
    cv2.imshow('image',img)
    cv2.waitKey(1000)
    cv2.destroyAllWindows()
    

    Output, not perfect:

    75229CN2TR
    TDETC1D72
    HM7 COAR