Multiple gaussian fit issue

I have this code that aims to fit the data in here https://drive.google.com/file/d/1uBrHxuftALiQTcTeBl-s6AHGiC8DGBWV/view?usp=sharing.

Where the function read_file is used to extract the information in a proper way. I do not what i am doing wrong that the gaussian fit is not right as you can see in the image (Gaussian fit) , and the standard deviation for all the fitted gaussian is the same or it varies very little (1e-09). Can anyone help me?

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import os

file_path='/content/drive/MyDrive/ProjetoXRF_Manuel/April2024/Amostra4.mca' # You need to change this to your directory

def read_file(file_path):
    # Abrir el archivo
    with open(file_path, 'r', encoding='latin1') as f:
        data = []
        live_time = None  # Inicializar variable live_time
        real_time = None  # Inicializar variable real_time
        for line in f:
            if 'LIVE_TIME' in line:
                live_time = line.strip()  # Almacenar la línea que contiene LIVE_TIME
            elif 'REAL_TIME' in line:
                real_time = line.strip()  # Almacenar la línea que contiene REAL_TIME
            elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
                # Analizar y almacenar datos numéricos como flotante único
                row = [float(x) for x in line.split()]
                data.extend(row)

    # Convertir 'data' a un arreglo de numpy
    return np.array(data), live_time, real_time

data, live_time, real_time = read_numeric_data(file_path)
a = -0.0076702251954372525
b = 0.03952691936704189
Data=data

peaks, _ = find_peaks(Data[:, 0] / maxim, height=0.1)

# Get peak values and indices
peak_values = Data[peaks] / maxim
peak_indices = peaks

# Definición de la función gaussiana
def gaus(x, a, x0, sigma):
    return a * np.exp(-(x - x0) ** 2 / (2 * sigma ** 2))
limite=(10-a)/b
print(limite)
# Datos
x_values = a + b * np.linspace(0, 270, 270)
y_values = np.squeeze(Data[:270]) / np.max(Data[:270])

# Parámetros iniciales
sigma = 2 # initial value
standar=[]
# Ajuste de curvas para diferentes conjuntos de datos
for i in range(len(peak_values)):
    mean = a + b * peak_indices[i] 
    amplitude = peak_values[i][0]
    
    # Ajustar la curva para cada conjunto de datos
    popt, pcov = curve_fit(gaus, x_values, y_values, p0=[amplitude, mean, 1])
    
    # Visualización del ajuste
    m,n,p= popt
    plt.plot(x_values, gaus(x_values,amplitude,mean,p), label='fit')

    print(mean, amplitude,"Desviación standar",p)

    standar.append(p)

plt.scatter(x_values, y_values, label='Data')
plt.xlim(4, 10)
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.grid(True)
plt.show()

Solution

The two main issues were:

The code fits just a single gaussian rather than a sum of gaussians. You need to fit (amplitude, mean, sigma) parameters for each gaussian, and add the resulting gaussians together.
The fitting algorithm seems quite sensitive to the initialisation of sigma, so I've used a better starting value.

The modified code:

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.signal import find_peaks

file_path='Amostra4.mca'

def read_file(file_path):
    # Abrir el archivo
    with open(file_path, 'r', encoding='latin1') as f:
        data = []
        live_time = None  # Inicializar variable live_time
        real_time = None  # Inicializar variable real_time
        for line in f:
            if 'LIVE_TIME' in line:
                live_time = line.strip()  # Almacenar la línea que contiene LIVE_TIME
            elif 'REAL_TIME' in line:
                real_time = line.strip()  # Almacenar la línea que contiene REAL_TIME
            elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
                # Analizar y almacenar datos numéricos como flotante único
                row = [float(x) for x in line.split()]
                data.extend(row)

    # Convertir 'data' a un arreglo de numpy
    return np.array(data), live_time, real_time

def multi_gaussian(x, *params):
    eps = 1e-10
    y = np.zeros_like(x)
    for i in range(0, len(params), 3):
        amplitude, mean, sigma = params[i:i+3]
        y += amplitude * np.exp(-(x - mean)**2 / (2 * sigma**2 + eps))
    return y

#Load and prepare data
data, live_time, real_time = read_file(file_path)

a = -0.0076702251954372525
b = 0.03952691936704189
y_start = 150
y_end = 270
data_y = data[y_start:y_end] / data[y_start:y_end].max()
data_x = a + b * np.linspace(y_start, y_end, num=len(data_y))

#Some initial estimates that could help with curve_fit() convergence
peak_indices, _ = find_peaks(data_y / data_y.max(), height=0.1)
peak_amplitudes = data_y[peak_indices]
peak_means = data_x[peak_indices]
peak_sigmas = [0.1] * len(peak_indices)

params_init = list(zip(peak_amplitudes, peak_means, peak_sigmas))
params_init = np.concatenate(params_init)

#Fit
params, params_cov = curve_fit(multi_gaussian, data_x, data_y, p0=params_init)

#Get a high-resolution interpolation of the fit
x_fine = np.linspace(data_x.min(), data_x.max(), num=500)
y_fit = multi_gaussian(x_fine, *params)

#
# Plot
#
fig, ax = plt.subplots(figsize=(8, 2))

#Data
ax.scatter(data_x, data_y, label='data', color='black', marker='o', s=20)

#Fitted curve
ax.plot(x_fine, y_fit, label='fit', linewidth=1.5, color='tab:purple')

#Annotate the estimated locations
[ax.axvline(mean, color='darkslategray', linewidth=1, linestyle=':') for mean in params[1::3]]

#Formatting
ax.set(xlabel='X', ylabel='Y', title='Data and fit')
ax.spines[['right', 'top']].set_visible(False)
ax.legend()