Search code examples
pythoncurve-fittinggaussian

Multiple gaussian fit issue


I have this code that aims to fit the data in here https://drive.google.com/file/d/1uBrHxuftALiQTcTeBl-s6AHGiC8DGBWV/view?usp=sharing.

Where the function read_file is used to extract the information in a proper way. I do not what i am doing wrong that the gaussian fit is not right as you can see in the image (Gaussian fit) , and the standard deviation for all the fitted gaussian is the same or it varies very little (1e-09). Can anyone help me?

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import os

file_path='/content/drive/MyDrive/ProjetoXRF_Manuel/April2024/Amostra4.mca' # You need to change this to your directory

def read_file(file_path):
    # Abrir el archivo
    with open(file_path, 'r', encoding='latin1') as f:
        data = []
        live_time = None  # Inicializar variable live_time
        real_time = None  # Inicializar variable real_time
        for line in f:
            if 'LIVE_TIME' in line:
                live_time = line.strip()  # Almacenar la línea que contiene LIVE_TIME
            elif 'REAL_TIME' in line:
                real_time = line.strip()  # Almacenar la línea que contiene REAL_TIME
            elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
                # Analizar y almacenar datos numéricos como flotante único
                row = [float(x) for x in line.split()]
                data.extend(row)

    # Convertir 'data' a un arreglo de numpy
    return np.array(data), live_time, real_time

data, live_time, real_time = read_numeric_data(file_path)
a = -0.0076702251954372525
b = 0.03952691936704189
Data=data

peaks, _ = find_peaks(Data[:, 0] / maxim, height=0.1)

# Get peak values and indices
peak_values = Data[peaks] / maxim
peak_indices = peaks

# Definición de la función gaussiana
def gaus(x, a, x0, sigma):
    return a * np.exp(-(x - x0) ** 2 / (2 * sigma ** 2))
limite=(10-a)/b
print(limite)
# Datos
x_values = a + b * np.linspace(0, 270, 270)
y_values = np.squeeze(Data[:270]) / np.max(Data[:270])

# Parámetros iniciales
sigma = 2 # initial value
standar=[]
# Ajuste de curvas para diferentes conjuntos de datos
for i in range(len(peak_values)):
    mean = a + b * peak_indices[i] 
    amplitude = peak_values[i][0]
    
    # Ajustar la curva para cada conjunto de datos
    popt, pcov = curve_fit(gaus, x_values, y_values, p0=[amplitude, mean, 1])
    
    # Visualización del ajuste
    m,n,p= popt
    plt.plot(x_values, gaus(x_values,amplitude,mean,p), label='fit')

    print(mean, amplitude,"Desviación standar",p)

    standar.append(p)

plt.scatter(x_values, y_values, label='Data')
plt.xlim(4, 10)
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.grid(True)
plt.show()

Solution

  • The two main issues were:

    • The code fits just a single gaussian rather than a sum of gaussians. You need to fit (amplitude, mean, sigma) parameters for each gaussian, and add the resulting gaussians together.
    • The fitting algorithm seems quite sensitive to the initialisation of sigma, so I've used a better starting value.

    enter image description here

    The modified code:

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    from scipy.signal import find_peaks
    
    file_path='Amostra4.mca'
    
    def read_file(file_path):
        # Abrir el archivo
        with open(file_path, 'r', encoding='latin1') as f:
            data = []
            live_time = None  # Inicializar variable live_time
            real_time = None  # Inicializar variable real_time
            for line in f:
                if 'LIVE_TIME' in line:
                    live_time = line.strip()  # Almacenar la línea que contiene LIVE_TIME
                elif 'REAL_TIME' in line:
                    real_time = line.strip()  # Almacenar la línea que contiene REAL_TIME
                elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
                    # Analizar y almacenar datos numéricos como flotante único
                    row = [float(x) for x in line.split()]
                    data.extend(row)
    
        # Convertir 'data' a un arreglo de numpy
        return np.array(data), live_time, real_time
    
    def multi_gaussian(x, *params):
        eps = 1e-10
        y = np.zeros_like(x)
        for i in range(0, len(params), 3):
            amplitude, mean, sigma = params[i:i+3]
            y += amplitude * np.exp(-(x - mean)**2 / (2 * sigma**2 + eps))
        return y
    
    #Load and prepare data
    data, live_time, real_time = read_file(file_path)
    
    a = -0.0076702251954372525
    b = 0.03952691936704189
    y_start = 150
    y_end = 270
    data_y = data[y_start:y_end] / data[y_start:y_end].max()
    data_x = a + b * np.linspace(y_start, y_end, num=len(data_y))
    
    #Some initial estimates that could help with curve_fit() convergence
    peak_indices, _ = find_peaks(data_y / data_y.max(), height=0.1)
    peak_amplitudes = data_y[peak_indices]
    peak_means = data_x[peak_indices]
    peak_sigmas = [0.1] * len(peak_indices)
    
    params_init = list(zip(peak_amplitudes, peak_means, peak_sigmas))
    params_init = np.concatenate(params_init)
    
    #Fit
    params, params_cov = curve_fit(multi_gaussian, data_x, data_y, p0=params_init)
    
    #Get a high-resolution interpolation of the fit
    x_fine = np.linspace(data_x.min(), data_x.max(), num=500)
    y_fit = multi_gaussian(x_fine, *params)
    
    #
    # Plot
    #
    fig, ax = plt.subplots(figsize=(8, 2))
    
    #Data
    ax.scatter(data_x, data_y, label='data', color='black', marker='o', s=20)
    
    #Fitted curve
    ax.plot(x_fine, y_fit, label='fit', linewidth=1.5, color='tab:purple')
    
    #Annotate the estimated locations
    [ax.axvline(mean, color='darkslategray', linewidth=1, linestyle=':') for mean in params[1::3]]
    
    #Formatting
    ax.set(xlabel='X', ylabel='Y', title='Data and fit')
    ax.spines[['right', 'top']].set_visible(False)
    ax.legend()