Search code examples
pythonpython-imaging-librarycluster-computing

How to ignore a color or alpha when using clusters


I am trying to find the dominant color of an image using Pil and cluster. My problem is that my images has a transparent background because these are .png and so i always get black as the dominant color. I'd like to ignore the first dominant color and pick the second most dominant color.

Is there a way to ignore alpha color or just remove it from the result? I am afraid that by just removing the first most dominant color, i would sometimes remove the actual dominant color in case of the background being a really small part of the image.

Here is my code :

from PIL import Image
import numpy
import math
import matplotlib.pyplot as plot
from sklearn.cluster import MiniBatchKMeans

imgfile = Image.open("images/abra.png")
numarray = numpy.array(imgfile.getdata(), numpy.uint8)

X = []
Y = []

fig, axes = plot.subplots(nrows=5, ncols=2, figsize=(20,25))

xaxis = 0
yaxis = 0

cluster_count = 3

clusters = MiniBatchKMeans(n_clusters = cluster_count)
clusters.fit(numarray)

npbins = numpy.arange(0, cluster_count + 1)
histogram = numpy.histogram(clusters.labels_, bins=npbins)
labels = numpy.unique(clusters.labels_)

barlist = axes[xaxis, yaxis].bar(labels, histogram[0])
if(yaxis == 0):
    yaxis = 1
else:
    xaxis = xaxis + 1
    yaxis = 0
for i in range(cluster_count):
    barlist[i].set_color('#%02x%02x%02x' % (
    math.ceil(clusters.cluster_centers_[i][0]),
        math.ceil(clusters.cluster_centers_[i][1]), 
    math.ceil(clusters.cluster_centers_[i][2])))


plot.show()

Here is en example of my current code :

Image given :

abra.png

Returned values :

Returned values


Solution

  • You could avoid passing transparent pixels into the classifier like this, if that's what you mean:

    #!/usr/bin/env python3
    
    from PIL import Image
    import numpy as np
    import math
    import matplotlib.pyplot as plot
    from sklearn.cluster import MiniBatchKMeans
    
    # Open image
    imgfile = Image.open("abra.png")
    
    # Only pass through non-transparent pixels, i.e. those where A!=0 in the RGBA quad
    na = np.array([f for f in imgfile.getdata() if f[3] !=0], np.uint8)
    
    X = []
    Y = []
    
    fig, axes = plot.subplots(nrows=5, ncols=2, figsize=(20,25))
    
    xaxis = 0
    yaxis = 0
    
    cluster_count = 3
    
    clusters = MiniBatchKMeans(n_clusters = cluster_count)
    clusters.fit(na)
    
    npbins = np.arange(0, cluster_count + 1)
    histogram = np.histogram(clusters.labels_, bins=npbins)
    labels = np.unique(clusters.labels_)
    
    barlist = axes[xaxis, yaxis].bar(labels, histogram[0])
    if(yaxis == 0):
        yaxis = 1
    else:
        xaxis = xaxis + 1
        yaxis = 0
    for i in range(cluster_count):
        barlist[i].set_color('#%02x%02x%02x' % (
        math.ceil(clusters.cluster_centers_[i][0]),
            math.ceil(clusters.cluster_centers_[i][1]), 
        math.ceil(clusters.cluster_centers_[i][2])))
    
    
    plot.show()
    

    enter image description here