Python detect clusters of colors in an image

I have an image with a transparent background and defined clusters of colors

I would like to automate workflow in python that breaks down the image and saves each cluster to a new image file with a unique ID. In the example from the image above, there would be two unique image exports.

Unique Image 1

Unique Image 2

Solution

This code guesses that the most frequent color on the corners of the image is the background color. It will fail if there is an object on the corners.

There are a lot of small bits on your image different from the background color. You can discard the small images by rising the minimumSize variable, which is set here to 10 pixels.

The code works first by isolating anything different from the background color:

Then it detects the contours (discarding nested contours), and use each detected contour to mask and crop the pieces from the original image.

from matplotlib import image # No clue why, but cv2 doesn't works witouth this import
import numpy as np
import cv2


def downloadImage(URL):
    """Downloads the image on the URL, and convers to cv2 RGB format"""
    from io import BytesIO
    from PIL import Image as PIL_Image
    import requests

    response = requests.get(URL)
    image = PIL_Image.open(BytesIO(response.content))
    return cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)


URL = "https://i.sstatic.net/ycWzD.jpg"

# Read image
img = downloadImage(URL)

# minimum size of image to identify and crop
minimumSize = 10

# Get colors on 4 corners
cornerCoord = [[0, 0], [0, -1], [-1, 0], [-1, -1]]
cornerColors = [img[c, r] for c, r in cornerCoord]
# select most frequent color in cornerColors
color, count = np.unique(cornerColors, return_counts=True, axis=0)
mostCommonColor = color[count == max(count)]

# it is pressumed that mostCommonColor is the background color

# mask is true  where img!=mostCommonColor
mask = np.all(img != mostCommonColor, axis=2).astype(np.uint8)

# find contours of masked image, and discard nested contours
contours, hierarchy = cv2.findContours(
    mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# discard contours of isolated pixels with size <10
contours = tuple(c for c in contours if c.size > minimumSize)

for number, contour in enumerate(contours):
    # Create a dummy image to draw the contour on
    thisMask = mask.copy()
    cv2.drawContours(thisMask, contours, number,
                     (255, 255, 255), thickness=cv2.FILLED)
    # Create empty image to copy the cropped image to
    maskedImg = np.zeros(img.shape, dtype=np.uint8)
    # Copy the masked image to the empty image
    maskedImg[thisMask == 255] = img[thisMask == 255]
    # crop the empty space
    coords = cv2.findNonZero(cv2.cvtColor(maskedImg, cv2.COLOR_BGR2GRAY))
    x, y, w, h = cv2.boundingRect(coords)
    maskedImg = maskedImg[y:y+h, x:x+w]
    cv2.imshow(f"contour {number}", maskedImg)

The "hairiness" on the borders of the images, are colors almost white, but not white. Those are the antialiasing pixels from the original image.