Search code examples
pythonopencvtesseract

OCR: check if letter is in (string) of image (Opencv, Python, Tesseract)


This is a really tricky question.

I am using the following code to detect text in hand-written images. I don't want it to recognize the char, in this case it only create a bounding box around each character/word he find.

o1

This is the code:

import cv2
import tesserocr as tr
from PIL import Image
import numpy as np

img = cv2.imread('1.png')

idx = 0

# since tesserocr accepts PIL images, converting opencv image to pil
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

# initialize api
api = tr.PyTessBaseAPI()

alphabet_min = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
                'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

alphabet_max = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
                'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

try:
    api.SetImage(pil_img)
    boxes = api.GetComponentImages(tr.RIL.SYMBOL, True)
    text = api.GetUTF8Text()
    print(text)

    for (im, box, _, _) in boxes:
        x, y, w, h = box['x'], box['y'], box['w'], box['h']
        #print(box)

        #if w < 200:
            #cv2.rectangle(img, (x, y), (x + w, y + h), color=(0, 255, 0), thickness=1)

    for letter in text:
        if letter in alphabet_min:
            cv2.rectangle(img, (x, y), (x + w, y + h), color=(0, 255, 0), thickness=-1)

    idx += 1

finally:
    api.End()

cv2.imshow('2', img)
cv2.waitKey(0)

If you take a closer look, you can see a print(text). This one print the text he find in the image. But, being this a hand-made text, it recovers near nothing:

Ca) a1 1. s 5305 Fm“. 4 54 0235 166 firm 4 §24630455

But even this output can help me in some way.

A little above in the code, I made a function:

for letter in text:
        if letter in alphabet_min:
            cv2.rectangle(img, (x, y), (x + w, y + h), color=(0, 255, 0), thickness=-1) 

This should take every char in the print(text) output and compare it to the alphabet_min list in the code. But it don't wanna work. I don't know why ?

The purpose of this is: if you find a letter in print(text) and this is equal to one of alphabet_min list, then cover it in the image (using cv2.rectangle) using its correspondent in the image.

Any suggestion ?

Source image is this:

src

EDIT

Doing a print(True) under the condition, it show 6 True. This means it finds the letter. The only problem is it don't create the bounding box for them..


Solution

  • Solved it...

    So, this is the new code:

    import cv2
    import tesserocr as tr
    from PIL import Image
    import numpy as np
    
    img = cv2.imread('1.png')
    
    idx = 0
    
    # since tesserocr accepts PIL images, converting opencv image to pil
    pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    
    # initialize api
    api = tr.PyTessBaseAPI()
    
    alphabet_min = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
                    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
    
    alphabet_max = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
                    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
    
    t = 0
    try:
        api.SetImage(pil_img)
        boxes = api.GetComponentImages(tr.RIL.SYMBOL, True)
        text = api.GetUTF8Text()
    
        for (im, box, _, _) in boxes:
            x, y, w, h = box['x'], box['y'], box['w'], box['h']
            cv2.rectangle(img, (x, y), (x + w, y + h), color=(0, 255, 0), thickness=1)
    
            print(text[t])
    
            for letter in alphabet_min:
                if text[t] in letter:
                    cv2.rectangle(img, (x, y), (x + w, y + h), color=(0, 255, 0), thickness=-1)
    
            t += 1
            cv2.imshow('2', img)
            cv2.waitKey(0)
    
    
        idx += 1
    
    
    finally:
        api.End()