Search code examples
pythonocrtesseract

Python to Automate mini game. Tesseract unable to find single character


I'm still new to Python, and I've been using ChatGBT as my tutor, I'm trying to automate a mini game, it needs to read the single character (letter or number) on my screen, and then when the timing is right, press that button. I've been at 3 different versions trying to find a solution to this, I simply can't get it to recognize the character. I really hope someone can help me move forward!

Images:

  1. game
  2. cut out
  3. processed
import pyautogui
import pytesseract
from PIL import Image, ImageEnhance
import time
import re

def preprocess_image(image):
    """
    Preprocess the image to enhance OCR detection.

    Args:
        image (PIL.Image.Image): The input image.

    Returns:
        PIL.Image.Image: The preprocessed image.
    """
    # Convert to grayscale
    gray_image = image.convert("L")

    # Enhance contrast significantly
    enhancer = ImageEnhance.Contrast(gray_image)
    enhanced_image = enhancer.enhance(5.0)

    # Apply thresholding to retain only the most prominent white text
    threshold_image = enhanced_image.point(lambda p: p > 200 and 255)

    return threshold_image

def analyze_region(region, region_name):
    """
    Analyzes a specific region for characters.

    Args:
        region (tuple): The region to analyze (x, y, width, height).
        region_name (str): A name for the region for debugging purposes.

    Returns:
        str: Detected valid character, if any.
    """
    region_screenshot = pyautogui.screenshot(region=region)

    # Save the original scanned image for debugging
    region_screenshot.save(f"scanned_region_{region_name}_original.png")

    # Preprocess the image
    processed_image = preprocess_image(region_screenshot)

    # Save the processed image for debugging
    processed_image.save(f"scanned_region_{region_name}_processed.png")

    # Analyze the region for text
    config = '--psm 10 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    text = pytesseract.image_to_string(processed_image, config=config)
    print(f"Raw OCR output from {region_name}: {text.strip()}\nFiltered characters: {re.findall(r'[A-Z0-9]', text)}")  # Debugging output

    valid_characters = re.findall(r'[A-Z0-9]', text)
    return ''.join(valid_characters)

def find_color_on_screen(target_color):
    """
    Continuously scans the screen for a specific color.

    Args:
        target_color (tuple): RGB values of the target color (e.g., (255, 0, 0) for red).

    Outputs:
        Prints a message when the color is found and analyzes specific regions to identify a character.
    """
    print(f"Scanning for color: {target_color} (RGB)...")
    while True:
        screenshot = pyautogui.screenshot()
        width, height = screenshot.size

        pixels = screenshot.load()
        for y in range(height):
            for x in range(width):
                if pixels[x, y] == target_color:
                    print(f"Color {target_color} found at pixel ({x}, {y})!")

                    # Define regions for progress bar positions
                    TOP_BAR_REGION = (1010, 103, 530, 77)  # (x, y, width, height)
                    RIGHT_BAR_REGION = (1840, 120, 75, 695)  # (x, y, width, height)

                    for _ in range(10):  # Perform 10 checks within 2 seconds
                        top_bar_result = analyze_region(TOP_BAR_REGION, "top_bar")
                        right_bar_result = analyze_region(RIGHT_BAR_REGION, "right_bar")

                        if top_bar_result:
                            print(f"Detected character(s) in top bar: {top_bar_result}")
                            input("Press Enter to acknowledge and exit...")
                            return

                        if right_bar_result:
                            print(f"Detected character(s) in right bar: {right_bar_result}")
                            input("Press Enter to acknowledge and exit...")
                            return

                        time.sleep(0.2)  # Brief delay to ensure 10 checks within 2 seconds

                    print("No valid capital letters or numbers detected within the time frame.")
                    input("Press Enter to exit...")
                    return

# Example usage
if __name__ == "__main__":
    # Replace with the RGB value of the color you want to detect
    target_color = (0, 141, 146)  # 008D92 in HEX
    find_color_on_screen(target_color)

Command Prompt:

Raw OCR output from top_bar: Filtered characters: [] Raw OCR output from right_bar: Filtered characters: [] No valid capital letters or numbers detected within the time frame. Press Enter to exit...


Solution

  • I did some testing with pytesseract and found out that it didn't like the white borders around the letter.

    I rewrote the preprocess_image() to get the letter only, it's probably not the best way of doing it it but it's just to give an understanding of what pytesseract needs:

    def preprocess_image(image: Image, top_right: bool):
        npimage = np.array(image.convert('RGB'))
    
        Y, X = np.where(np.all(npimage==[255, 255, 255], axis=2))
    
        output = Image.new('RGB', image.size, (255, 255, 255))
    
        for x, y in zip(X, Y):
            if (top and (x != max(X) and x != min(X))) or (not top and (y != max(Y) and y != min(Y))):
                output.putpixel((x, y), (0, 0, 0))
    
        return output
    

    It's taking the positions of all the white pixels in the image ([255, 255, 255]) and checking that it isn't the first or last x position, which corresponds to the white bars around the number (if x != max(X) and x != min(X):).

    Here's an image of what it returns: code output

    Sources :
    Find the coordinates in an image where a specified colour is detected