Search code examples
pythonnumpypython-tesseract

How to detech numbers inside a yellow cube with pytesseract


So I've ben trying to detect a number (1-9) inside a yellow cube, but without a solid solution..

This is two of my pictures

4

6

This is one solution I've been trying, but without any luck

from PIL import Image
from operator import itemgetter
import numpy as np 
import easyocr
import cv2 
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'

img = cv2.imread("ROI_0.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                            cv2.THRESH_BINARY_INV, 59, 88)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6 digits")
txt = txt.strip().split("\n")
print(txt)
cv2.imshow("bnt", bnt)
cv2.waitKey(0)

Is there another way to do this, because it's not working?


Solution

  • Steps:

    1. Binarize(otsu's method)

    2. Correct skew using minAreaRect

    3. Find max area contour

    4. crop the number

    5. pass cropped to pytesseract

       image = cv2.imread("y6.png")
       # image = image_resize(image,width=480,height=640)
       gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
       thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
      
       contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
       big = max(contours,key=cv2.contourArea)
       (x,y),(w,h),angle = cv2.minAreaRect(big)
       print(angle)
      
       (h, w) = image.shape[:2]
       center = (w // 2, h // 2)
       M = cv2.getRotationMatrix2D(center, angle, 1.0)
       rotated = cv2.warpAffine(thresh, M, (w, h),flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,borderValue=(0,0,0))
      
       big = cv2.findContours(rotated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
       big = max(big,key=cv2.contourArea)
       x,y,w,h = cv2.boundingRect(big)
      
       # cropped = rotated[y:y+h,x:x+w]
       cropped = rotated[y:y+h-h//10,w//6:x+w-w//6]
      
       data = pytesseract.image_to_string(cropped,config='--psm 6 digits')#  -c tessedit_char_whitelist = 0123456789')
       print(data)
      

    There are a few hardcoded values like h//10 and all in cropping. So optimization is needed.