Search code examples
pythonmachine-learninglogic

Separation of Braille characters inside of an image


I'm making a project that would transform braille to text. I have written the code for identifying the braille dots from the image but I cant figure out how to segment the braille into cells.

This part is identifying the blobs in the image (smaller low quality images don't work right now)

import cv2
import numpy as np
from sklearn.cluster import KMeans

# Load the image
image_path = "braille.jpg"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

# Set up SimpleBlobDetector
params = cv2.SimpleBlobDetector_Params()

# Filter by area (size of the blob)
params.filterByArea = True
params.minArea = 100  # Adjust based on dot size
params.maxArea = 1000

# Filter by circularity
params.filterByCircularity = True
params.minCircularity = 0.9  # Adjust for shape of the dots

# Filter by convexity
params.filterByConvexity = False
params.minConvexity = 0.7

# Filter by inertia (roundness)
params.filterByInertia = True
params.minInertiaRatio = 0.95

# Create a detector with the parameters
detector = cv2.SimpleBlobDetector_create(params)

# Detect blobs
keypoints = detector.detect(image)

# Draw detected blobs as red circles
output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
output_image = cv2.drawKeypoints(output_image, keypoints, np.array([]),
                                 (0, 0, 255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

print("output image")
cv2.imshow("outputimage",output_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

print(f"Number of blobs detected: {len(keypoints)}")

The code below puts the coordinates of the blob on a graph (thought it might be easier to work with this way)

#convert image into graph

import matplotlib.pyplot as plt
import numpy

blob_coords = np.array([kp.pt for kp in keypoints])  #coords of blob
rounded_coords = np.round(blob_coords).astype(int)  #rounded coords

x_coords = rounded_coords[:, 0]
y_coords = rounded_coords[:, 1]

# PROXIMITY BASED GROUPING
# IF X DISTANCE IS LESS THAN MIN DISTANCE
# IF Y DISTANCE IS LESS THAN MIN DISTANCE
# STORE X AND Y COORDINATES

# Calculate smallest x and y differences (trying for proximity based)
minx = 10000
miny = 10000
for i in x_coords:
    for j in x_coords:
        if abs(i - j) <= minx and (15 < abs(i - j)):  # Threshold for cell width
            minx = abs(i - j)

for i in y_coords:
    for j in y_coords:
        if abs(i - j) <= miny and (15 < abs(i - j)):  # Threshold for cell height
            miny = abs(i - j)

print(f"Smallest x difference: {minx}, Smallest y difference: {miny}",)

# Plotting
fig, ax = plt.subplots()
ax.scatter(x_coords, y_coords, color="blue")  # Plot the blobs
ax.invert_yaxis()
plt.title("Braille Cell Detection")
plt.show()

Tried to separate them via proximity (the cells that are in close proximity of each other get grouped), but I couldn't figure out the logic for it. I also tried group clustering (Kmeans) but it isn't very accurate and it wouldn't work for images with different number of characters because it constantly needs to know how many clusters are to be formed.

# trying out kmeans clustering method
# kmeans dont work (can't figure out number of clusters from image)
# could work if nclusters can be figured out

import math
from sklearn.cluster import KMeans

blob_coords = np.array([kp.pt for kp in keypoints])  # Extract (x, y) positions of blobs
rounded_coords = np.round(blob_coords).astype(int)  # Round coordinates for simplicity


x_coords = rounded_coords[:, 0]
y_coords = rounded_coords[:, 1]

fig, ax = plt.subplots()
ax.scatter(x_coords, y_coords, color="blue")  # Plot the blobs

ax.invert_yaxis()  # Invert Y-axis for image-like coordinates
plt.title("Braille Cell Detection")
plt.show()

inertias = []

# 2
kmeans = KMeans(n_clusters=26)
kmeans.fit(rounded_coords)

plt.scatter(x_coords,y_coords, c=kmeans.labels_)
plt.show()

Solution

  • Here's a PoC on translating Braille to text from a well defined image. Real images can be more complicated specially for hand written Braille since dot/cell spacing is not constant. Also, this image is of uncontracted type (Grade I) so translating contracted Braille (Grade II) could require a significant mapping dictionary and a more elaborated algorithm to identify cell indexes.

    Key points of the algorithm:

    • Extract and sort coordinates of detected keypoints.
    • Find x,y differences between contiguous dots. X negative values mean second/third rows in a cell and the start of a line. e.g.: previous point p0=(520,69), current point p1=(69, 140).
      xydiff = (-451, 71). xdiff is negative, ydiff is greater than vertical cell size --> current dot is starting a line.
    • Find cell parameters: Min/max x/y coord., x min cell spacing, y min cell spacing.
    • Group coordinates by line into lists (group_by_lines()).
    • Find dot indexes on each cell in the line. . . --> (1,4) --> 'c'
    • Map the tuple to a text character.
    import sys
    import cv2
    import numpy as np
    
    cell_map = {
        (1,): 'a', (1,2): 'b', (1,4): 'c', (1,4,5): 'd', (1,5): 'e',
        (1,2,4): 'f', (1,2,4,5): 'g', (1,2,5): 'h', (2,4): 'i', (2,4,5): 'j',
        (1,3): 'k', (1,2,3): 'l', (1,3,4): 'm', (1,3,4,5): 'n', (1,3,5): 'o',
        (1,2,3,4): 'p', (1,2,3,4,5): 'q', (1,2,3,5): 'r', (2,3,4): 's', (2,3,4,5): 't',
        (1,3,6): 'u', (1,2,3,6): 'v', (2,4,5,6): 'w', (1,3,4,6): 'x', (1,3,4,5,6): 'y', (1,3,5,6): 'z'
        }
    
    def get_build_detector_params():
        # Set up SimpleBlobDetector
        params = cv2.SimpleBlobDetector_Params()
    # Filter by area (size of the blob)
        params.filterByArea = True
        params.minArea = 10 # Adjust based on dot size
        params.maxArea = 1000 # Filter by circularity
        params.filterByCircularity = True
        params.minCircularity = 0.9 # Adjust for shape of the dots
    # Filter by convexity
        params.filterByConvexity = False
        params.minConvexity = 0.7
    # Filter by inertia (roundness)
        params.filterByInertia = True
        params.minInertiaRatio = 0.95
        return params
    
    def show_detection(image, detected_lines, xcell, xsep, xmin, ymax):
        """Help to visually debug if lines are correctly detected since dots would be colored by line.
        Black dots represent not correctly detected cells/lines.
        Color will repeat every for lines."""
        
        colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0), (178,102,255)]
        while len(colors) < len(detected_lines):
            colors.extend(colors)
        # Draw detected blobs as red circles
        output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        
    #    x = int(xmin)
    #    for i in range(1,7):
    #        output_image = cv2.line(image, ( x * i + int(xsep), 50), (x * i + int(xsep), ymax), (0, 255, 0), thickness=2)
    
        for i, line in enumerate(detected_lines):
            output_image = cv2.drawKeypoints(output_image, line, np.array([]), colors[i], cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    
        print("output image")
        cv2.imshow("outputimage", output_image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    def get_cell_parameters(blob_coords, xydiff):
        """Parameters to help find cells from detected coordinates.
        """
        xuniq = np.unique(np.array([round(xy[0]) for xy in xydiff if xy[0] > 1]))
        yuniq = np.unique(np.array([round(xy[1]) for xy in xydiff if xy[1] > 1]))
        # x separation between dots in a cell
        xcell = round(xuniq.min())
        # y separation between dots in a cell
        ycell = round(yuniq.min())
        xcoords = [xy[0] for xy in blob_coords if xy[0] > 1]
        ycoords = [xy[1] for xy in blob_coords if xy[1] > 1]
        # minimum x in the whole image
        xmin = round(np.array(xcoords).min())
        # max x in the whole image. Represents last dot in a line.
        xmax = round(np.array(xcoords).max())
        # x separation between cells
        xsep = np.unique(np.array([round(xy[0]) for xy in xydiff if xy[0] > xcell + xcell / 2])).min()
        # minimum y in the whole image
        ymin = round(np.array(ycoords).min())
        return ycell, ymin, xmin, xcell, xsep, xmax
    
    def group_by_lines(kp_map, blob_coords, xydiff, ycell):
        """Group coordinates by lines."""
        
        detected_lines = [[kp_map[blob_coords[0][0], blob_coords[0][1]]]]
        print(f"new line at: {int(blob_coords[0][0])},{int(blob_coords[0][1])}")
    # split coordinates by lines
        for i, d in enumerate(xydiff):
            curr_pt = blob_coords[i + 1]
        #print(d, curr_pt, blob_coords[i+1], f"xdiff {d}, ydiff: {blob_coords[i+1][1] - blob_coords[i][1]}")
            if d[0] < 0 and d[1] >= ycell * 3:
                print(f"new line at: {curr_pt}, curr xdiff: {d}, {ycell * 3}, previous: {blob_coords[i]}")
                detected_lines.append([kp_map[curr_pt[0], curr_pt[1]]])
            else:
                detected_lines[-1].append(kp_map[curr_pt[0], curr_pt[1]])
        
        return detected_lines
    
    def char_to_tuple(ycell, offset, cur_char):
        """Return a sorted tuple representing dot indexes in the cell.
        The tuple should map to a text character in cell_map dict.
        Indexes are
        1 4
        2 5
        3 6
        
        Cell    Indexes       Text
        .
        .
        . . --> (1,2,3,6) --> 'v'
        
        """
        cell = []
        x1 = np.array([cc[0] for cc in cur_char]).min() + offset
        y1 = np.array([cc[1] for cc in cur_char]).min() + offset
        y2 = y1 + ycell + offset
        cell_idx = None
        for cc in cur_char:
            if cc[0] <= x1:
                if cc[1] <= y1:
                    cell_idx = 1
                elif cc[1] > y1 and cc[1] <= y2:
                    cell_idx = 2
                elif cc[1] >= y2:
                    cell_idx = 3
            elif cc[0] >= x1:
                if cc[1] <= y1:
                    cell_idx = 4
                elif cc[1] > y1 and cc[1] <= y2:
                    cell_idx = 5
                elif cc[1] >= y2:
                    cell_idx = 6 
            #print("       ", cell_idx, cc, x1, y1)
            if cell_idx is None or cell_idx in cell:
                print(f"WARNING. cell_idx duplicate or not found: {cell_idx}, {cc}", x1, y1, y2)
            if len(cell) == 0 and cell_idx == 3:
                print("ERROR. First cell_idx can't be 3")
            cell.append(cell_idx)
            cell_idx = None
        return tuple(sorted(cell))
    
    text = ''
    count = 0
    image_path = "braille.jpg"
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    params = get_build_detector_params()
    # Create a detector with the parameters
    detector = cv2.SimpleBlobDetector_create(params)
    # Detect blobs
    keypoints = detector.detect(image)
    # map of keypoints coordinates to keypoints
    kp_map = { (round(kp.pt[0], 2), round(kp.pt[1], 2)): kp for kp in keypoints}
    
    # all dots coordinates, sorted to help find lines.
    blob_coords = np.array(list(kp_map.keys()))
    blob_coords = blob_coords[np.lexsort((blob_coords[:,0], blob_coords[:,1]))]
    
    # x,y differences between contiguous dots. Negative values mean second/third rows in a cell and the start of a line.
    # e.g.: previous point p0=(520,69), current point =(69, 140). xydiff = (-451, 71).
    # xdiff is negative, ydiff is greater than vertical cell size --> current dot is starting a line.
    xydiff = np.array([ (kp[0] - blob_coords[i-1][0], kp[1] - blob_coords[i-1][1]) for i,kp in enumerate(blob_coords) if i > 0 ])
    
    print(f"blob_coords: {len(blob_coords)}, xydiff: {len(xydiff)}")
    ycell, ymin, xmin, xcell, xsep, xmax = get_cell_parameters(blob_coords, xydiff)
    
    #print(xuniq)
    #print(yuniq)
    print(f"x params: xcell {xcell}, xmin {xmin}, xsep {xsep}, xmax {xmax}")
    print(f"y params: ycell {ycell}, ymin {ymin}")
    print(f"max cells per line: {round((xmax)/(xcell + xsep))}")
    offset = 2
    
    # List of list of cells by line
    detected_lines = group_by_lines(kp_map, blob_coords, xydiff, ycell)
    p0 = 0
    p1 = len(detected_lines[0])
    # process each line
    for j, line in enumerate(detected_lines):
        if j > 0:
            p0 = p0 + len(detected_lines[j - 1])
            p1 = p0 + len(detected_lines[j])
        print(f"\nSTARTING line: {j}, p0: {p0}, p1: {p1} {len(xydiff[p0:p1])}/{len(line)}")
    
        # coordinates of a line
        cur_coor = blob_coords[p0:p1]
        xchar1 = xmin
        xchar2 = xmin + xcell + xsep
        while xchar1 <= xmax:
            #cur_char = np.array([xy for xy in cur_coor if xy[0] >= xchar1 and xy[0] <= xchar2])
            cur_char =  cur_coor[ (xchar1 <= cur_coor[:,0]) & (cur_coor[:,0] <= xchar2) ]
            if len(cur_char) == 0 or cur_char[0][0] > xchar2:
                # No coordinates found at the x-range. It's a space so shift x range and move to next cell.
                print(f"WARNING: No char found. adding space to text. {xchar1}, {xchar2}")
                text += ' '
                xchar1 += xcell + xsep
                xchar2 = xchar1 + xcell + xsep
                continue
            cur_char = cur_char[np.lexsort((cur_char[:,0], ))]
            # build a cell indexes tuple to finally decode the cell to text
            cell = char_to_tuple(ycell, offset, cur_char)
            
            if cell in cell_map:
                print(cell_map[cell], ': ', cell )
                text += cell_map[cell]
            else:
                print('ERROR. Cell to text mapping not found: ', cell, cur_char)
                text += '?'
                
            xchar1 += xcell + xsep
            xchar2 = xchar1 + xcell + xsep
            if xchar2 > xmax:
                xchar2 = xmax
    
    print(f"\nFound text:\n'{text}'")
    show_detection(image, detected_lines, xcell, xsep, xmin, 400)
    sys.exit()
    

    Text from below image (bug: extra space at the end).

    'abcdefghijklmnopqrstu vwxyz '

    Braille alphabet