Search code examples
pythonimageopencvimage-processingpython-tesseract

How to process and extract text from image


I'm trying to extract text from image using python cv2. The result is pathetic and I can't figure out a way to improve my code. I believe the image needs to be processed before the extraction of text but not sure how.

Sample image

I've tried to convert it into black and white but no luck.

import cv2
import os
import pytesseract
from PIL import Image
import time

pytesseract.pytesseract.tesseract_cmd='C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

cam = cv2.VideoCapture(1,cv2.CAP_DSHOW)

cam.set(cv2.CAP_PROP_FRAME_WIDTH, 8000)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 6000)

while True:
    return_value,image = cam.read()
    image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    image = image[127:219, 508:722]
    #(thresh, image) = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    cv2.imwrite('test.jpg',image)
    print('Text detected: {}'.format(pytesseract.image_to_string(Image.open('test.jpg'))))
    time.sleep(2)

cam.release()
#os.system('del test.jpg')

Solution

  • Preprocessing to clean the image before performing text extraction can help. Here's a simple approach

    • Convert image to grayscale and sharpen image
    • Adaptive threshold
    • Perform morpholgical operations to clean image
    • Invert image

    First we convert to grayscale then sharpen the image using a sharpening kernel

    enter image description here

    Next we adaptive threshold to obtain a binary image

    enter image description here

    Now we perform morphological transformations to smooth the image

    enter image description here

    Finally we invert the image

    enter image description here

    import cv2
    import numpy as np
    
    image = cv2.imread('1.jpg')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    sharpen = cv2.filter2D(gray, -1, sharpen_kernel)
    thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
    result = 255 - close
    
    cv2.imshow('sharpen', sharpen)
    cv2.imshow('thresh', thresh)
    cv2.imshow('close', close)
    cv2.imshow('result', result)
    cv2.waitKey()