Search code examples
pythonopencvcomputer-visionocr

Detect only left-most boxes in image


I have a JPG image that contain mobile brand names:
enter image description here

Now I want to detect each word first character by python script
I wrote following python script for this:

import cv2
import numpy as np
from tkinter import Tk, Canvas, Frame, Scrollbar, BOTH, VERTICAL, HORIZONTAL
from PIL import Image, ImageTk

# Function to draw rectangles around shapes and display using Tkinter
def draw_rectangles(image_path):
    # Create a Tkinter window to display the image
    root = Tk()
    root.title("Image with Left-Most Rectangles Only")

    # Load the image
    image = cv2.imread(image_path)
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply adaptive thresholding to get better separation of text
    thresh = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
    )

    # Find contours in the binary image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Dictionary to store contours grouped by Y-coordinate ranges
    contours_by_y = {}

    # Sort contours by X-coordinate to ensure we pick the left-most character first
    sorted_contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])

    # Group contours by their Y coordinate to keep only the left-most rectangle per Y range
    for contour in sorted_contours:
        x, y, w, h = cv2.boundingRect(contour)
        if w > 15 and h > 15:  # Adjust the size filter to remove small artifacts
            aspect_ratio = w / float(h)
            # Ensure the aspect ratio is within the typical range of letters
            if 0.2 < aspect_ratio < 5:
                y_range = y // 20  # Group by a smaller Y coordinate range for better separation

                # Check if the current rectangle is more left-most in X within its Y range
                if y_range not in contours_by_y:
                    contours_by_y[y_range] = (x, y, w, h)  # Store the first contour found in this range
                else:
                    # Compare and keep the left-most (smallest X) rectangle
                    current_x, _, _, _ = contours_by_y[y_range]
                    # Check distance between new contour and the existing one to avoid close detection
                    if x < current_x and (x - current_x) > 20:  # Distance threshold to filter out close contours
                        contours_by_y[y_range] = (x, y, w, h)

    # Draw only the left-most rectangles
    for (x, y, w, h) in contours_by_y.values():
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)  # Red color in BGR

    # Convert the image to RGB (OpenCV uses BGR by default)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Convert the image to a format Tkinter can use
    image_pil = Image.fromarray(image_rgb)
    image_tk = ImageTk.PhotoImage(image_pil)

    # Create a frame for the Canvas and scrollbars
    frame = Frame(root)
    frame.pack(fill=BOTH, expand=True)

    # Create a Canvas widget to display the image
    canvas = Canvas(frame, width=image_tk.width(), height=image_tk.height())
    canvas.pack(side="left", fill="both", expand=True)

    # Add scrollbars to the Canvas
    v_scrollbar = Scrollbar(frame, orient=VERTICAL, command=canvas.yview)
    v_scrollbar.pack(side="right", fill="y")

    h_scrollbar = Scrollbar(frame, orient=HORIZONTAL, command=canvas.xview)
    h_scrollbar.pack(side="bottom", fill="x")

    canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
    canvas.create_image(0, 0, anchor="nw", image=image_tk)
    canvas.config(scrollregion=canvas.bbox("all"))

    # Keep a reference to the image to prevent garbage collection
    canvas.image = image_tk

    root.mainloop()

# Path to your image
image_path = r"E:\Desktop\mobile_brands\ORG_027081-Recovered.jpg"

# Call the function
draw_rectangles(image_path)

But I don't know why it not working good. The accuracy of this script is 90%. for example in above image it detect "a" character in "Samsung"
enter image description here

where is my script problem?
How can I fix this problem?
maybe by Y and X coordinate can not detect left-most boxes in image.
Note that I don't want to use OCR


Solution

  • I will first sort those contours by y-coordinate, then group the sorted contours by rows. Finally draw the left-most one in each row:

    # Sort contours by Y-coordinate
    sorted_contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[1])
    
    # group contours by row
    last_y = None
    rows = []
    for contour in sorted_contours:
        x, y, w, h = cv2.boundingRect(contour)
        if w > 15 or h > 15:
            if last_y is None or abs(last_y-y) > 50:  # 50 is for provided image, it may not work for other image
                # this is a new row
                rows.append([])
                last_y = y  # save the reference y-coordinate
            rows[-1].append((x, y, w, h)) # append item into current row
    
    for row in rows:
        # show left-most one in current row
        x, y, w, h = sorted(row, key=lambda x: x[0])[0]
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 2)
    

    Note that I have an assumption that difference of y-coordinate between rows is larger than 50 pixels.

    Result:

    enter image description here