Search code examples
pythonimagematplotlibplotgraphics

How can I draw a line along my graph to show me where the data density is the highest?


datos = fits.open('/home/citlali/Documentos/Servicio/Lista.fits')
data = datos[1].data


#Linea [SIII] 9532
Mask_1 = data['flux_[SIII]9531.1_Re_fit'] / data['e_flux_[SIII]9531.1_Re_fit'] > 5
newdata1 = data[Mask_1]
dat_flux = newdata1['flux_[SIII]9069.0_Re_fit']
dat_eflux = newdata1['e_flux_[SIII]9069.0_Re_fit']
Mask_2 = dat_flux / dat_eflux > 5
newdata2 = newdata1[Mask_2]


H1_alpha = newdata1['log_NII_Ha_Re']
H1_beta = newdata1['log_OIII_Hb_Re']
H2_alpha = newdata2['log_NII_Ha_Re']
H2_beta = newdata2['log_OIII_Hb_Re']


M = H1_alpha < -0.9
newx = H1_alpha[M]
newy = H1_beta[M]  
ex = newx 
ey = newy 
#print("Elementos de SIII [9532]: ", len(newx))
m = H2_alpha < -0.9
newxm = H2_alpha[m]
newym = H2_beta[m] 
#print("Elementos de SIII [9069]: ", len(newxm))

sm = heapq.nsmallest(3000, zip(newx, newy)) # zip them to sort together
newx, newy = zip(*sm) # unzip them

plt.figure()
plt.plot(H1_alpha, H1_beta, '*', color ='darkred', markersize="7", label = "SIII [9532]") 
plt.plot(H2_alpha, H2_beta, '.', color ='rosybrown', markersize="3", label = "SIII [9069]")
plt.xlim(-1.5, 0.75)
plt.ylim(-1, 1) 
plt.title('Diagrama de diagnóstico')
plt.ylabel('OIII/Hbeta')
plt.xlabel('NII/Halpha')
plt.grid()
plt.legend()
fig = plt.gcf()
fig.set_size_inches(8, 6)
plt.show()

coding plot In the figure I show my plot, and the black line is what I want to obtain.

The code reads me downloaded data that I have plotted, it shows galaxies with a signal/noise greater than 5. The data to consider for plotting the line must be H1_alpha, H1_beta and/or H2_alpha, H2_beta.


Solution

  • The example below using a test dataset shows how you can derive sample weights based on density, and then supply those sample weights to a curve-fitting algorithm.

    The example dataset is a quadratic-like curve/density embedded in noise:

    enter image description here

    We can model the density using sklearn's GaussianMixture. It provides a score for each sample, where higher scores correspond to the more probable (i.e more dense) areas.

    enter image description here

    Next, we use scipy's curve_fit() to fit a quadratic curve using the previously-computed sample weights. curve_fit needs the inverse of the weights (error bars), so we supply it with sigma=1/weights:

    enter image description here

    Overlaying the individual steps:

    enter image description here

    import numpy as np
    import matplotlib.pyplot as plt
    
    #
    #Make some test data - a quadratic curve embedded in noise
    #
    from sklearn.datasets import make_moons
    X1, y1 = make_moons(500, noise=0.7, random_state=0)
    X2, y2 = make_moons(500, noise=0.4, random_state=1)
    X3, y3 = make_moons(600, noise=0.08, random_state=2)
    X = np.concatenate([X1, X2, X3[y3==0]], axis=0)
    
    data_xy = X
    data_x, data_y = X[:, 0], X[:, 1]
    
    #View the data
    plt.scatter(data_x, data_y, marker='s', s=1)
    plt.gca().set(xlabel='x', ylabel='y')
    plt.gcf().set_size_inches(8, 3)
    
    #
    #Model the density and view the results.
    #
    
    # Increasing "n_components" gives you a more granular/patchy-looking fit; tune as needed.
    from sklearn.mixture import GaussianMixture
    gmm = GaussianMixture(n_components=20, random_state=0).fit(data_xy)
    
    #Define a regular XY grid
    x_axis_fine = np.linspace(data_x.min(), data_x.max())
    y_axis_fine = np.linspace(data_y.min(), data_y.max())
    xx, yy = np.meshgrid(x_axis_fine, y_axis_fine)
    
    log_scores = gmm.score_samples(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
    scores = np.exp(log_scores)
    
    plt.contour(xx, yy, scores, levels=6, cmap='plasma_r', alpha=1)
    
    #Get the density score for each datapoint
    # we can optionally scale them to interpret them as weights summing to 1
    data_scores = np.exp(gmm.score_samples(data_xy))
    data_scores /= data_scores.sum()
    
    
    #
    #Define and fit a curve using scipy's curve_fit
    #
    from scipy.optimize import curve_fit
    
    def quadratic_func(x, A, B, C):
        #Given x and coefficients (A, B C): y =  Ax^2 + Bx + C
        return A*x**2 + B*x + C
    
    #Find optimal parameters
    (A_opt, B_opt, C_opt), _ = curve_fit(quadratic_func, data_x, data_y, sigma=1 / data_scores)
    
    plt.plot(
        x_axis_fine, quadratic_func(x_axis_fine, A_opt, B_opt, C_opt),
        color='tab:brown', lw=10, alpha=0.3, label='curve_fit()'
    )
    plt.gca().legend()
    
    #Optional formatting
    ax = plt.gca()
    ax.spines['left'].set_bounds([-3, 2])
    ax.spines['bottom'].set_bounds([-2, 3])
    ax.spines[['top', 'right']].set_visible(False)