Search code examples
pythonscipyscipy-optimizemodel-fitting

How to increase the accuracy of my model fitting using Scipy Optimization


I want to make a curve fitting of the following data: munich_temperatures_average.txt

I have tried:

import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize

def func(temp, a, b, c):
    return a * np.cos(2 * np.pi * temp + b) + c

date, temperature = np.loadtxt('munich_temperatures_average.txt', unpack=True)

result = optimize.curve_fit(func, date, temperature)

plt.plot(date, temperature, '.')
plt.plot(date, func(date, result[0][0], result[0][1], result[0][2]), c='red', zorder=10)
plt.ylim([-20, 30])
plt.xlabel("Year", fontsize=18)
plt.ylabel("Temperature", fontsize=18)
plt.show()

But as you can see in the output image, the oscillation magnitude of the model after fitting seems to be less than the actual, can you please tell me how can I make the fitting more accurate? thanks in advance.

enter image description here


Solution

  • Thanks to @Reinderien explanation, I used 1D median filter to filter outliers, hence the model fitting seems to be accurate:

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy import optimize
    from scipy.signal import medfilt
    
    def func(temp, a, b, c):
        return a * np.cos(2 * np.pi * temp + b) + c
    
    date, temperature = np.loadtxt('./data/munich_temperatures_average.txt', unpack=True)
    popt, pcov = optimize.curve_fit(func, date, temperature)
    
    # Median Filter
    filtered = medfilt(temperature, 21)
    for i in range(10):
        filtered = medfilt(filtered, 21)
    
    fig = plt.figure(figsize=(14, 6), dpi=80)
    ax1 = fig.add_subplot(131)
    # ax1.subplot(121)
    ax1.hist(temperature, color = "lightblue", ec="green", bins=150, orientation="horizontal")
    
    x1 = np.linspace(0, 0, 100)
    x2 = np.linspace(20, 20, 100)
    y  = np.linspace(0, 400, 100)
    ax1.plot(y, x1, 'r--', linewidth=1, markersize=1)
    ax1.plot(y, x2, 'r--', linewidth=1, markersize=1)
    
    ax1.set_ylim([-20, 30])
    ax1.set_ylabel("Temperature", fontsize=14)
    ax1.set_xlabel("Frequency", fontsize=14)
    
    ax2 = fig.add_subplot(132)
    ax2.plot(date, temperature, '.', zorder=0, label='data', alpha=0.1)
    ax2.plot(date, func(date, *popt),'m', zorder=10, label='model')
    ax2.set_ylim([-20, 30])
    ax2.set_xlabel("Year", fontsize=14)
    # ax2.set_ylabel("Temperature", fontsize=14)
    ax2.legend(loc='best')
    
    popt2, pcov2 = optimize.curve_fit(func, date, filtered)
    
    ax3 = fig.add_subplot(133)
    ax3.plot(date, filtered, '.', zorder=0, label='filtered data')
    # ax3.plot(date, func(date, *popt2),'m', zorder=10, label='model')
    ax3.set_ylim([-20, 30])
    ax3.set_xlabel("Year", fontsize=14)
    # ax2.set_ylabel("Temperature", fontsize=14)
    ax3.legend(loc='best')
    

    enter image description here