I am trying to fit functions to histograms using curve_fit from scipy, however a warning occurs when running the program:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.special import gamma as gamma_func
import tkinter as tk
def gaussian(x, m, s):
return (1 / (s * np.sqrt(2 * np.pi))) * (np.exp((-1 / 2) * ((x - m) / s) ** 2))
def gamma(x, k, theta):
return (x ** (k - 1) * np.exp(-x / theta)) / (theta ** k * gamma_func(k))
def display_gaussian(data_set, mean, sigma):
plt.figure()
y, bin_edges = np.histogram(data_set, bins=100, density=True)
x = (bin_edges[:-1] + bin_edges[1:]) / 2
plt.hist(data_set, density=True, color='skyblue', label='Histogram', edgecolor='black')
init_gaussian = [mean, sigma]
params_gaussian, cov_gaussian = curve_fit(gaussian, x, y, init_gaussian)
fit = np.linspace(min(data_set), max(data_set), 1000)
plt.plot(fit, gaussian(fit, *params_gaussian), 'r--',
label=f'Fit: mean={params_gaussian[0]:.2f}, sigma={params_gaussian[1]:.2f}')
plt.xlabel('Distribution')
plt.ylabel('Frequency')
plt.title('Gaussian distribution')
plt.legend()
plt.show(block=False)
def display_gamma(data_set, sigma):
plt.figure()
y, bin_edges = np.histogram(data_set, bins=100, density=True)
x = (bin_edges[:-1] + bin_edges[1:]) / 2
plt.hist(data_set, density=True, color='forestgreen', label='Histogram', edgecolor='black')
shape_guess = 2.0
init_gamma = [shape_guess, sigma]
params_gamma, cov_gamma = curve_fit(gamma, x, y, init_gamma)
fit = np.linspace(min(data_set), max(data_set), 1000)
plt.plot(fit, gamma(fit, *params_gamma), 'y--',
label=f'Fit: mean={params_gamma[0]:.2f}, sigma={params_gamma[1]:.2f}')
plt.xlabel('Distribution')
plt.ylabel('Frequency')
plt.title('Gamma distribution')
plt.legend()
plt.show(block=False)
def run():
mean = float(m_ent.get())
sigma = float(s_ent.get())
data_set = np.random.normal(mean, sigma, 1000)
display_gaussian(data_set, mean, sigma)
display_gamma(data_set, sigma)
root = tk.Tk()
root.title("Input parameters")
root.minsize(400, 300)
m_l = tk.Label(root, text="mean", font=15)
m_l.pack(pady=10)
m_ent = tk.Entry(root)
m_ent.pack(pady=10)
s_l = tk.Label(root, text="sigma", font=15)
s_l.pack(pady=10)
s_ent = tk.Entry(root)
s_ent.pack(pady=10)
run_btn = tk.Button(root, text="Run", command=run, font=15)
run_btn.pack(pady=20)
root.mainloop()
OptimizeWarning: Covariance of the parameters could not be estimated
params_gamma, cov_gamma = curve_fit(gamma, x, y, init_gamma)
I am not sure what is the problem here.
I thought the problem was with the bins of the histogram and tried setting it to 'auto', also changing the density, but the warning occurs again. The same problem happened when fitting the Gaussian and after adding density=True
the covariance was successfully estimated, but it is not working for fitting the Gamma.
You are facing three challenges:
scipy.stats
.Here is an example how to draw gamma sample and fit it with MLE:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats, optimize
law = stats.gamma(a=1.5, scale=0.8)
data = law.rvs(1_000)
def likelihood_factory(data):
def wrapped(parameters):
return - np.sum(stats.gamma(a=parameters[0], scale=parameters[1]).logpdf(data))
return wrapped
likelihood = likelihood_factory(data)
solution = optimize.minimize(likelihood, x0=[1., 1.], tol=1e-4)
# message: Optimization terminated successfully.
# success: True
# status: 0
# fun: 1126.2309539053672
# x: [ 1.508e+00 7.876e-01]
# nit: 8
# jac: [-1.526e-05 -3.052e-05]
# hess_inv: [[ 3.781e-03 -1.978e-03]
# [-1.978e-03 1.447e-03]]
# nfev: 33
# njev: 11