Search code examples
python-3.xmeanbinning

Python: Compute Bin-Mean Value of Scatter Plot Bullets


I have three 1D arrays (A, B, C) of equal length/size. I plot a scatter plot of B vs. A where I color each scatter plot bullet by the corresponding value in the C array (see the code below).

# Imports
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

# Create the Arrays
A = 10 * np.random.random_sample((20, 20))
B = 10 * np.random.random_sample((20, 20))
C = 100 * np.random.random_sample((20, 20))

A = A.reshape(20*20)
B = B.reshape(20*20)
C = C.reshape(20*20)

# Create the Colormap and Define Boundaries
cmap_C = cm.jet
cmap_C.set_bad(color='white')
bounds_C =  np.arange(0, 110, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)

# Plot the Figure
plt.figure()
plt.scatter(A, B, c=C, marker='o', s=100, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.xlabel('A')
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Value of C')
plt.show()

Some bullets overlap in the figure so we cannot see them clearly. Therfore, next I now want to compute and plot the mean C value of all scatter plot bullets within each 1 integer x 1 integer bin in the figure so that each square grid point is colored by one single color (these bins are illustrated by the figure gridding). How can I do this?


Solution

  • Below is a solution that works for my purposes.

    # Imports
    import matplotlib.cm as cm
    import matplotlib.colors as colors
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import numpy as np
    from zipfile import ZipFile 
    
    # Create the Arrays
    xx = 5
    yy = 5
    
    A = 10 * np.random.random_sample((xx, yy))
    B = 10 * np.random.random_sample((xx, yy))
    C = 100 * np.random.random_sample((xx, yy))
    
    A = A.reshape(xx*yy)
    B = B.reshape(xx*yy)
    C = C.reshape(xx*yy)
    
    color_map = {(x, y): color for x, y, color in zip(A,B,C)}
    
    xedges = np.arange(11)
    yedges = np.arange(11)
    H, xedges, yedges = np.histogram2d(A, B, bins=(xedges, yedges))
    HT = H.T
    
    ca = np.asarray(list(color_map))
    print(ca)
    
    cai = ca.astype(int)
    print(cai)
    
    # Extracting all dictionary values using loop + keys() 
    res = [] 
    for key in color_map.keys() : 
        res.append(color_map[key]) 
    
    res = np.asarray(res)
    resi = res.astype(int)
    print(resi)
    
    BMC = np.zeros([10, 10])
    
    for i in np.arange(len(resi)):
        BMC[cai[i,1],cai[i,0]] = BMC[cai[i,1],cai[i,0]] + resi[i]
        print(cai[i])
        print(resi[i])
        print(BMC[cai[i,1],cai[i,0]])
    
    print(HT)
    print(BMC)
    BMC = BMC/HT
    print(BMC)
    
    # Create the Colormap and Define Boundaries
    cmap_C = cm.jet
    cmap_C.set_bad(color='white')
    bounds_C =  np.arange(-5, 115, 10)
    norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)
    
    cmap_hist2d = cm.CMRmap_r
    cmap_hist2d.set_bad(color='white')
    bounds_hist2d =  np.arange(-0.5, 4.5, 1)
    norm_hist2d = mpl.colors.BoundaryNorm(bounds_hist2d, cmap_hist2d.N)
    
    cmap_C = cm.jet
    cmap_C.set_bad(color='white')
    BMC_plot = np.ma.array ( BMC, mask=np.isnan(BMC))     # Mask NaN
    bounds_C =  np.arange(-5, 115, 10)
    norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)
    
    plt.subplot(311)
    plt.scatter(A, B, c=C, marker='o', s=100, cmap=cmap_C, norm=norm_C)
    plt.xlim([-1, 11])
    plt.ylim([-1, 11])
    plt.xticks(np.arange(0, 11, 1))
    plt.yticks(np.arange(0, 11, 1))
    plt.ylabel('B')
    plt.grid()
    plt.colorbar(label='Value of C', ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    
    plt.subplot(312)
    x, y = np.meshgrid(xedges, yedges)
    plt.pcolor(x, y, HT, cmap=cmap_hist2d, norm=norm_hist2d)
    plt.xlim([-1, 11])
    plt.ylim([-1, 11])
    plt.xticks(np.arange(0, 11, 1))
    plt.yticks(np.arange(0, 11, 1))
    plt.ylabel('B')
    plt.grid()
    plt.colorbar(label='Number of Data in Bin', ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    
    plt.subplot(313)
    plt.pcolor(x, y, BMC_plot, cmap=cmap_C, norm=norm_C)
    plt.xlim([-1, 11])
    plt.ylim([-1, 11])
    plt.xticks(np.arange(0, 11, 1))
    plt.yticks(np.arange(0, 11, 1))
    plt.xlabel('A')
    plt.ylabel('B')
    plt.grid()
    plt.colorbar(label='Bin-Mean C Value', ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    plt.show()