I am trying to identify numbers and their position in the image
This is my code:
import cv2
import pytesseract
def round_to_nearest_10(number):
return round(number / 10) * 10
def parse_image_grid(filename):
# Set the path to the Tesseract executable (update with your path)
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
# Read the image
image = cv2.imread(filename)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply GaussianBlur to reduce noise and improve OCR accuracy
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Use the Canny edge detector to find edges in the image
edges = cv2.Canny(blurred, 50, 150)
# Find contours in the image
contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Dictionary to store the mapping of square coordinates to identified numbers
square_dict = {}
# Iterate through each contour
for contour in contours:
# Approximate the contour to a polygon
epsilon = 0.04 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# Check if the polygon has four corners (likely a square)
if len(approx) == 4:
# Extract the region of interest (ROI) containing the square
x, y, w, h = cv2.boundingRect(contour)
square_roi = image[y:y + h, x:x + w]
# print(square_roi)
# Use OCR to extract numbers from the square
square_text = pytesseract.image_to_string(square_roi, config="--psm 6").strip()
# Print the square coordinates and extracted numbers
print(f"Square at ({x}, {y}), Numbers: {square_text}")
Output:
Square at (221, 71), Numbers: 4a
Square at (181, 61), Numbers: fi
Square at (31, 61), Numbers: 3 |
Square at (211, 31), Numbers: @
Square at (181, 31), Numbers: 2
Square at (121, 31), Numbers: ff
Square at (91, 31), Numbers: &
Square at (61, 31), Numbers: @
Square at (1, 31), Numbers:
Square at (121, 1), Numbers: 5 |
Square at (91, 1), Numbers: Es
Square at (61, 1), Numbers: @
Square at (31, 0), Numbers: 9
It is identifying some blocks correctly. For others it is identifying numbers as @
or |
characters.
I tried to change psm settings but that did not work .
Am I missing something here ?
There were some problems in your code that are generally detrimental to the detection of the numbers. Some improvements that can be done:
So here is my approach:
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # path to my tesseract (on linux)
image = cv2.imread("sudoku.jpg") # read image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # turn to gray
_, threshold = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY_INV) # apply threshold to detect grid
thresholdDilated = cv2.dilate(threshold, np.ones((2,2), np.uint8) # dilate to make sure no breaks inthe contour
thresholdDilated = cv2.bitwise_not(thresholdDilated) # flip the pixels
contours, _ = cv2.findContours(thresholdDilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # contouring
filteredContours = [cnt for cnt in contours if cv2.contourArea(cnt) >= 100] # remove contours with low area
This gave me exactly 24 contours in the filtered contours, otherwise I had 26 contours with two contours having an area of 0... No idea what happened there.
Otherwise, I did one small change to your code:
# Dictionary to store the mapping of square coordinates to identified numbers
square_dict = {}
for contour in filteredContours:
# Approximate the contour to a polygon
epsilon = 0.04 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# Check if the polygon has four corners (likely a square)
if len(approx) == 4:
# Extract the region of interest (ROI) containing the square
x, y, w, h = cv2.boundingRect(contour)
square_roi = gray[y:y + h, x:x + w]
# print(square_roi)
_, binaryThreshold = cv2.threshold(square_roi, 127, 255, cv2.THRESH_BINARY) # MAKE NUMBERS POSITIVE CLASS
# Use OCR to extract numbers from the square
square_text = pytesseract.image_to_string(binaryThreshold, config="--psm 6").strip()
# Print the square coordinates and extracted numbers
print(f"Square at ({x}, {y}), Numbers: {square_text}")
# Map square coordinates to identified numbers in the dictionary
square_dict[(x, y, x + w, y + h)] = square_text
I did the detection on the binaryThreshold
where the numbers are the positive class. Here are the results:
Square at (212, 62), Numbers: 4
Square at (182, 62), Numbers:
Square at (152, 62), Numbers:
Square at (122, 62), Numbers:
Square at (92, 62), Numbers: 1
Square at (62, 62), Numbers:
Square at (32, 62), Numbers: 3
Square at (2, 62), Numbers: 0
Square at (212, 32), Numbers:
Square at (182, 32), Numbers: 2
Square at (152, 32), Numbers: 7
Square at (122, 32), Numbers:
Square at (92, 32), Numbers:
Square at (62, 32), Numbers:
Square at (32, 32), Numbers:
Square at (2, 32), Numbers: 8
Square at (212, 2), Numbers: 6
Square at (182, 2), Numbers:
Square at (152, 2), Numbers:
Square at (122, 2), Numbers: 5
Square at (92, 2), Numbers:
Square at (62, 2), Numbers:
Square at (32, 2), Numbers: 9
Square at (2, 2), Numbers:
If I were you, I would sort the contours such that they start from top left to bottom right. Otherwise, hope this answer helped :D