Search code examples
pythonopencvtesseractpython-tesseract

How to OCR low quality code picture with pytesseract


I have a set of pictures (sample) of the same formatted code, I've tried every thing but nothing works well. I tried blurring, hsv, threshing, etc. can you help me out?

import pytesseract
import cv2
imgr = cv2.imread("a.png")
img = cv2.resize(imgr, (int(imgr.shape[1] * 3), int(imgr.shape[0] * 3)), interpolation=cv2.INTER_AREA)
img = cv2.blur(img, (7, 7))
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
cv2.imshow("", v)
cv2.waitKey(0)
p = pytesseract.image_to_string(v)
print(p)
thresh = cv2.threshold(v, 170, 255, cv2.THRESH_BINARY)[1]
cv2.imshow("", thresh)
cv2.waitKey(0)
print(pytesseract.image_to_string(thresh))

ation


Solution

  • below is a possible solution. I felt that distortion was part of the problem. So I tried to "fix" that. The result looks fine: the detection is successful. Unfortunately, since you give only one sample, I have no way to figure out if this will work on the other ones... (probably not...) Nevertheless, you can give it a try. Best regards, Stéphane

    Note: I use tesseract-5.0.0-alpha with the tessdata_best dataset. Here is the output from the console:

    Regression parameters for the second-degree polynomial: 
    [ 2.33735101e-04 -1.92211992e-01  2.43573673e+02]
    =============================
    Rectified image
    RESULT:  EG01-012R210126024
    =============================
    ================================================
    Test on the non rectified image
    with the same blur, erode, threshold and
    tesseract parameters
    RESULT:  EGO1-012R2101269
    ================================================
    Press any key on an opened opencv window to close
    

    And below is the program:

    # Standard imports
    import cv2
    import numpy as np
    from matplotlib import pyplot as plt
    import pytesseract
    pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'
    
    # Read image
    imgr = cv2.imread("a.png")
    # Resizing, converting...
    factor=3
    imgr = cv2.resize(imgr, (int(imgr.shape[1]*factor ), int(imgr.shape[0]*factor)), interpolation=cv2.INTER_AREA)
    
    # First detection in order to crop the image
    # We want a detection. Not important if result is bad.
    
    strings=pytesseract.image_to_data(imgr, lang = 'eng', config='--psm 11 --oem 3 -c                                         tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
    strings=strings.split('\n')
    for line in strings[2:]:
        s=line.split('\t')
        if len(s[11])>0:
            xmin=int(s[6])
            break
    
    ##  We crop the image to keep the interesting part...
    imgr=imgr[:,np.max([0,xmin-imgr.shape[1]//10]):,:] 
    cv2.imshow("Cropped image",imgr)
    hsv = cv2.cvtColor(imgr, cv2.COLOR_BGR2HSV)
    h0, s0, Im0 = cv2.split(hsv)
    w=Im0.shape[1]  # From now, this is the image we will work on.
    h=Im0.shape[0]
    
    # Blob image to compute the image distortion
    blob=cv2.blur(Im0,(w//3,1))
    
    blob=cv2.normalize(blob,None,0,255,cv2.NORM_MINMAX)
    blob=cv2.threshold(blob,170,255,cv2.THRESH_BINARY)[1]
    cv2.imshow("Blob image",blob)
    
    x=[]
    y=[]
    for i in range(w):
        for j in range(h):
            if blob[j,i]==0:
                x.append(i)
                y.append(j)
    x=np.array(x)
    y=np.array(y)
    
    
    model = np.polyfit(x,y, 2)
    print("Regression parameters for the second-degree polynomial: ")
    print(model)
    
    plt.plot(x,y,'x')
    X=np.linspace(0,w)
    plt.plot(X,X*X*model[0]+X*model[1]+model[2])
    Ymean=np.mean(X*X*model[0]+X*model[1]+model[2])
    
    # Remapping the cropped image with the found model parameters
    
    
    map_x = np.zeros((Im0.shape[0], Im0.shape[1]), dtype=np.float32)
    map_y = np.zeros((Im0.shape[0], Im0.shape[1]), dtype=np.float32)
    for i in range(w):
        for j in range(h):
            map_x[j,i]=i
            map_y[j,i]=j+i*i*model[0]+i*model[1]+model[2]-Ymean
    
    
    Im1=cv2.remap(Im0, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
    
    # Actual detection on the rectified image: Im1
    
    Im1=cv2.normalize(Im1,None,0,255,cv2.NORM_MINMAX)
    blur_radius=8
    threshold=120
    Im1= cv2.blur(Im1, (blur_radius,blur_radius))
    kernel = np.ones((4,4), np.uint8) 
    Im1=255-cv2.erode(255-Im1, kernel)#, cv2.BORDER_REPLICATE)
    Im1=cv2.normalize(Im1,None,0,255,cv2.NORM_MINMAX)
    Im1 = cv2.threshold(Im1, threshold, 255, cv2.THRESH_BINARY)[1]
    cv2.imshow("Rectified image for text detection",Im1)
    
    strings=pytesseract.image_to_string(Im1, lang = 'eng', config='--psm 11 --oem 3 -c                                         tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
    
    strings=strings.split()
    strings=max(strings,key=len)
    print('=============================')
    print("Rectified image")
    print('RESULT: ',strings)
    print('=============================')
    
    
    # For comparison: detection on the non rectified image
    # using the same parameters:
    
    
    Im2 = Im0 #  whithout remapping
    Im2 = cv2.normalize(Im2,None,0,255,cv2.NORM_MINMAX)
    Im2 = cv2.blur(Im2, (blur_radius,blur_radius))
    Im2 = 255-cv2.erode(255-Im2, kernel)#, cv2.BORDER_REPLICATE)
    Im2 = cv2.normalize(Im2,None,0,255,cv2.NORM_MINMAX)
    Im2 = cv2.threshold(Im2, threshold, 255, cv2.THRESH_BINARY)[1]
    
    strings=pytesseract.image_to_string(Im2, lang = 'eng', config='--psm 11 --oem 3 -c                                         tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-')
    
    strings=strings.split()
    strings=max(strings,key=len)
    print('================================================')
    print("Test on the non rectified image")
    print("with the same blur, erode, threshold and")
    print("tesseract parameters")
    print('RESULT: ',strings)
    print('================================================')
    cv2.imshow("Unrectified image for text detection",Im2)
    
    
    # Close windows
    
    
    print("Press any key on an opened opencv window to close")
    cv2.waitKey()
    plt.close()
    cv2.destroyAllWindows()