Search code examples
pythonopencvocrtesseractimage-preprocessing

image identification using tessaract


I am trying to identify numbers and their position in the image enter image description here

This is my code:

import cv2
import pytesseract


def round_to_nearest_10(number):
    return round(number / 10) * 10


def parse_image_grid(filename):
    # Set the path to the Tesseract executable (update with your path)
    pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

    # Read the image
    image = cv2.imread(filename)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise and improve OCR accuracy
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Use the Canny edge detector to find edges in the image
    edges = cv2.Canny(blurred, 50, 150)

    # Find contours in the image
    contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Dictionary to store the mapping of square coordinates to identified numbers
    square_dict = {}

    # Iterate through each contour
    for contour in contours:
        # Approximate the contour to a polygon
        epsilon = 0.04 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)

        # Check if the polygon has four corners (likely a square)
        if len(approx) == 4:
            # Extract the region of interest (ROI) containing the square
            x, y, w, h = cv2.boundingRect(contour)
            square_roi = image[y:y + h, x:x + w]

            # print(square_roi)

            # Use OCR to extract numbers from the square
            square_text = pytesseract.image_to_string(square_roi, config="--psm 6").strip()

            # Print the square coordinates and extracted numbers
            print(f"Square at ({x}, {y}), Numbers: {square_text}")

Output:

Square at (221, 71), Numbers: 4a
Square at (181, 61), Numbers: fi
Square at (31, 61), Numbers: 3 |
Square at (211, 31), Numbers: @
Square at (181, 31), Numbers: 2
Square at (121, 31), Numbers: ff
Square at (91, 31), Numbers: &
Square at (61, 31), Numbers: @
Square at (1, 31), Numbers: 
Square at (121, 1), Numbers: 5 |
Square at (91, 1), Numbers: Es
Square at (61, 1), Numbers: @
Square at (31, 0), Numbers: 9

It is identifying some blocks correctly. For others it is identifying numbers as @ or | characters.

I tried to change psm settings but that did not work .

Am I missing something here ?


Solution

  • There were some problems in your code that are generally detrimental to the detection of the numbers. Some improvements that can be done:

    • get the exact number of expected squares
    • generate an image where the numbers are the positive class (1) and the background is negative class (0)

    So here is my approach:

    pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # path to my tesseract (on linux)
    image = cv2.imread("sudoku.jpg") # read image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # turn to gray
    _, threshold = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY_INV) # apply threshold to detect grid
    thresholdDilated = cv2.dilate(threshold, np.ones((2,2), np.uint8) # dilate to make sure no breaks inthe contour
    thresholdDilated = cv2.bitwise_not(thresholdDilated) # flip the pixels
    contours, _ = cv2.findContours(thresholdDilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # contouring
    filteredContours = [cnt for cnt in contours if cv2.contourArea(cnt) >= 100] # remove contours with low area
    

    This gave me exactly 24 contours in the filtered contours, otherwise I had 26 contours with two contours having an area of 0... No idea what happened there.

    Otherwise, I did one small change to your code:

    # Dictionary to store the mapping of square coordinates to identified numbers
    square_dict = {}
    for contour in filteredContours:
        # Approximate the contour to a polygon
        epsilon = 0.04 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
    
        # Check if the polygon has four corners (likely a square)
        if len(approx) == 4:
            # Extract the region of interest (ROI) containing the square
            x, y, w, h = cv2.boundingRect(contour)
            square_roi = gray[y:y + h, x:x + w]
            # print(square_roi)
            _, binaryThreshold = cv2.threshold(square_roi, 127, 255, cv2.THRESH_BINARY) # MAKE NUMBERS POSITIVE CLASS
            # Use OCR to extract numbers from the square
            square_text = pytesseract.image_to_string(binaryThreshold, config="--psm 6").strip()
            # Print the square coordinates and extracted numbers
            print(f"Square at ({x}, {y}), Numbers: {square_text}")
            # Map square coordinates to identified numbers in the dictionary
            square_dict[(x, y, x + w, y + h)] = square_text
    

    I did the detection on the binaryThreshold where the numbers are the positive class. Here are the results:

    Square at (212, 62), Numbers: 4
    Square at (182, 62), Numbers: 
    Square at (152, 62), Numbers: 
    Square at (122, 62), Numbers: 
    Square at (92, 62), Numbers: 1
    Square at (62, 62), Numbers: 
    Square at (32, 62), Numbers: 3
    Square at (2, 62), Numbers: 0
    Square at (212, 32), Numbers: 
    Square at (182, 32), Numbers: 2
    Square at (152, 32), Numbers: 7
    Square at (122, 32), Numbers: 
    Square at (92, 32), Numbers: 
    Square at (62, 32), Numbers: 
    Square at (32, 32), Numbers: 
    Square at (2, 32), Numbers: 8
    Square at (212, 2), Numbers: 6
    Square at (182, 2), Numbers: 
    Square at (152, 2), Numbers: 
    Square at (122, 2), Numbers: 5
    Square at (92, 2), Numbers: 
    Square at (62, 2), Numbers: 
    Square at (32, 2), Numbers: 9
    Square at (2, 2), Numbers: 
    

    If I were you, I would sort the contours such that they start from top left to bottom right. Otherwise, hope this answer helped :D