Search code examples
pythonimage-processingnlpcomputer-visiondata-science

find the co-ordinate and highlight the required text in image in python


I have the photo in which I wanted to get the co-ordinate of text and highlight the text.

Image Link

Text to highlight and get co-ordinate = 'was the age of wisdom'
I try to get the co-ordinate by providing the first and last word but didn't get the required solution

import regex as re
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"

import cv2
import matplotlib.pyplot as plt

filename = 'C:\\Users\\vicky\\Downloads\\image.png'


img = cv2.imread(filename)

from pytesseract import Output
d = pytesseract.image_to_data(img, output_type=Output.DICT)
#print(d)

n_boxes = len(d['level'])
word1 = 'was'
word2 = 'wisdom,'
for i in range(n_boxes):
    #print(d['text'][i])   ## each word 
    if(d['text'][i] == word1):
        (x, y) = (d['left'][i], d['top'][i])
        #print(x,y)


    if(d['text'][i] == word2):
        (x1, y1,w1,h1) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        #print(x1, y1, w1, h1)
        
        
        
        print(f'The coordinates of text are ({x}, {y}) and ({x1+w1}, {y1+h1})')
        cv2.rectangle(img, (x, y), (x1 + w1, y1 + h1), (0, 0, 139), 5)
        break

cv2.imwrite('result2.png', img)

The resulted image I got

Output Image

If anybody know any other approach than this and also want to highlight and fill the text with pale color.


Solution

  • You have to figure out the postion of the words:

    import pytesseract
    from pytesseract import Output
    import cv2
    
    pytesseract.pytesseract.tesseract_cmd=r'C:\Program Files\Tesseract-OCR\tesseract.exe'
    
    img = cv2.imread('Text.png')
    
    d = pytesseract.image_to_data(img, output_type=Output.DICT)
    #print(d.keys())
    
    overlay = img.copy()
    output = img.copy()
    n_boxes = len(d['text'])
    for i in range(18,25):
        if int(d['conf'][i]) > 60:
            (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
            # print((x, y, w, h))
            img = cv2.rectangle(overlay, (x, y), (x + w, y + h), (0, 255, 0), -1)
    
    alpha = 0.1
    image_new = cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)
    
    cv2.imshow('img', image_new)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    

    Output: enter image description here