python numpy matplotlib scipy data-fitting

Gamma function fitting OptimizeWarning: Covariance of the parameters could not be estimated in Python

I am trying to fit functions to histograms using curve_fit from scipy, however a warning occurs when running the program:

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.special import gamma as gamma_func
import tkinter as tk


def gaussian(x, m, s):
    return (1 / (s * np.sqrt(2 * np.pi))) * (np.exp((-1 / 2) * ((x - m) / s) ** 2))


def gamma(x, k, theta):
    return (x ** (k - 1) * np.exp(-x / theta)) / (theta ** k * gamma_func(k))


def display_gaussian(data_set, mean, sigma):
    plt.figure()
    y, bin_edges = np.histogram(data_set, bins=100, density=True)
    x = (bin_edges[:-1] + bin_edges[1:]) / 2
    plt.hist(data_set, density=True, color='skyblue', label='Histogram', edgecolor='black')

    init_gaussian = [mean, sigma]
    params_gaussian, cov_gaussian = curve_fit(gaussian, x, y, init_gaussian)


    fit = np.linspace(min(data_set), max(data_set), 1000)

    plt.plot(fit, gaussian(fit, *params_gaussian), 'r--',
             label=f'Fit: mean={params_gaussian[0]:.2f}, sigma={params_gaussian[1]:.2f}')
    plt.xlabel('Distribution')
    plt.ylabel('Frequency')
    plt.title('Gaussian distribution')
    plt.legend()
    plt.show(block=False)


def display_gamma(data_set, sigma):
    plt.figure()
    y, bin_edges = np.histogram(data_set, bins=100, density=True)
    x = (bin_edges[:-1] + bin_edges[1:]) / 2
    plt.hist(data_set, density=True, color='forestgreen', label='Histogram', edgecolor='black')

    shape_guess = 2.0
    init_gamma = [shape_guess, sigma]
    params_gamma, cov_gamma = curve_fit(gamma, x, y, init_gamma)


    fit = np.linspace(min(data_set), max(data_set), 1000)

    plt.plot(fit, gamma(fit, *params_gamma), 'y--',
             label=f'Fit: mean={params_gamma[0]:.2f}, sigma={params_gamma[1]:.2f}')
    plt.xlabel('Distribution')
    plt.ylabel('Frequency')
    plt.title('Gamma distribution')
    plt.legend()
    plt.show(block=False)


def run():
    mean = float(m_ent.get())
    sigma = float(s_ent.get())

    data_set = np.random.normal(mean, sigma, 1000)

    display_gaussian(data_set, mean, sigma)
    display_gamma(data_set, sigma)


root = tk.Tk()
root.title("Input parameters")
root.minsize(400, 300)

m_l = tk.Label(root, text="mean", font=15)
m_l.pack(pady=10)
m_ent = tk.Entry(root)
m_ent.pack(pady=10)

s_l = tk.Label(root, text="sigma", font=15)
s_l.pack(pady=10)
s_ent = tk.Entry(root)
s_ent.pack(pady=10)

run_btn = tk.Button(root, text="Run", command=run, font=15)
run_btn.pack(pady=20)

root.mainloop()

OptimizeWarning: Covariance of the parameters could not be estimated

params_gamma, cov_gamma = curve_fit(gamma, x, y, init_gamma)

I am not sure what is the problem here.

I thought the problem was with the bins of the histogram and tried setting it to 'auto', also changing the density, but the warning occurs again. The same problem happened when fitting the Gaussian and after adding density=True the covariance was successfully estimated, but it is not working for fitting the Gamma.

Solution

You are facing three challenges:

Fitting normally distributed data (which can be negative) with a gamma distribution which does have a positive support;
Fitting a PDF to binned data (histogram) while you have access yo the raw data and you can perform a MLE instead;
Writing naively the PDF while you can rely on numerically stable version implemented in scipy.stats.

Here is an example how to draw gamma sample and fit it with MLE:

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats, optimize

law = stats.gamma(a=1.5, scale=0.8)
data = law.rvs(1_000)

def likelihood_factory(data):
    def wrapped(parameters):
        return - np.sum(stats.gamma(a=parameters[0], scale=parameters[1]).logpdf(data))
    return wrapped

likelihood = likelihood_factory(data)

solution = optimize.minimize(likelihood, x0=[1., 1.], tol=1e-4)
#  message: Optimization terminated successfully.
#  success: True
#   status: 0
#      fun: 1126.2309539053672
#        x: [ 1.508e+00  7.876e-01]
#      nit: 8
#      jac: [-1.526e-05 -3.052e-05]
# hess_inv: [[ 3.781e-03 -1.978e-03]
#            [-1.978e-03  1.447e-03]]
#     nfev: 33
#     njev: 11