Search code examples
pythonnumpyscipylinear-regressioncurve-fitting

Exponential Curve Fitting using Python


I was wondering if anyone knew how to find the values of A and B in Arrhenius equation (𝑘𝑔𝑎𝑠=𝐴𝑒^(−𝐵𝑅𝑇)) using Python. In this equation, 𝑇 is in Kelvin, and A has the unit of mL/min. We want to obtain a description of the rate as a function for temperature over the range from 0°C to 35°C, under the assumption that we can extrapolate.

Basically, we are trying to find out the activity of the yeast with temperature, but first we have to find A and B. I tried a couple curve fitting techniques but all have failed. My fit was a straight line and my numbers were really off.

this is the image

This is the code/info given in the image:

import matplotlib.pyplot as plt
import numpy as np
import scipy as scipy

temperature = np.array([20,25,30,33,35,37,40,45])                  # degrees C
prod_rate = np.array([6.9,9.9,21.2,26.1,32.9,30.4,18.8,18.5])/30   #mL/min

plt.plot(temperature,prod_rate,'ok')
plt.ylabel('Production (mL/min)')
plt.xlabel('Temperature (C)')

this was my best attempt:

import matplotlib.pyplot as plt
import numpy as np
import scipy as scipy

def arrhenius(T, A, B):
    R = 8.314  # Gas constant, J/(mol·K)
    return A * np.exp(-B / (R * T))

temperature = np.array([20,25,30,33,35,37,40,45])                  # degrees C
prod_rate = np.array([6.9,9.9,21.2,26.1,32.9,30.4,18.8,18.5])/30   #mL/min

plt.plot(temperature, prod_rate, 'ok')
plt.ylabel('Production (mL/min)')
plt.xlabel('Temperature (C)')

popt, pcov = curve_fit(arrhenius, temperature, prod_rate)
print(popt)

A_optimal, B_optimal = popt
fitted_curve = arrhenius(temperature, A_optimal, B_optimal)

plt.plot(temperature, prod_rate, 'ok', label='Data')
plt.plot(temperature, fitted_curve, '-r', label='Fitted Curve')
plt.ylabel('Production (mL/min)')
plt.xlabel('Temperature (°C)')
plt.legend()
plt.show()


print("Optimal value for A:", A_optimal, "mL/min")
print("Optimal value for B:", B_optimal)

This was the image it gave me and the numbers I got:

here is the graph

here is the new graph with Kelvin instead of Celsius:

import matplotlib.pyplot as plt
import numpy as np
import scipy as scipy
from scipy.optimize import curve_fit

def arrhenius(T, A, B):
    R = 8.314  # Gas constant, J/(mol·K)
    return A * np.exp(-B / (R * T))

temperature = np.array([20,25,30,33,35,37,40,45])  
temperature_K = temperature + 273.15                # degrees C
prod_rate = np.array([6.9,9.9,21.2,26.1,32.9,30.4,18.8,18.5])/30   #mL/min


plt.plot(temperature_K, prod_rate, 'ok')
plt.ylabel('Production (mL/min)')
plt.xlabel('Temperature (C)')

popt, pcov = curve_fit(arrhenius, temperature_K, prod_rate)
print(popt)

A_optimal, B_optimal = popt
fitted_curve = arrhenius(temperature_K, A_optimal, B_optimal)

plt.plot(temperature_K, prod_rate, 'ok', label='Data')
plt.plot(temperature_K, fitted_curve, '-r', label='Fitted Curve')
plt.ylabel('Production (mL/min)')
plt.xlabel('Temperature (°C)')
plt.legend()
plt.show()

print("Optimal value for A:", A_optimal, "mL/min")
print("Optimal value for B:", B_optimal)

here


Solution

  • Let's assume that your experiment was done wrong. Choose a sub-array to fit, and do it in the logarithmic scale because the linear scales are massive. Do use Kelvin, not Celsius. Here I assume the units of A are arbitrary because the production rate's units have not been corrected.

    import matplotlib.pyplot as plt
    import numpy as np
    
    # Gas constant, J/(mol·K); and 0C in K
    from scipy.constants import R, zero_Celsius
    from scipy.optimize import curve_fit
    
    temperature_C = np.array((20, 25, 30, 33, 35, 37, 40, 45))
    temperature_K = temperature_C + zero_Celsius
    prod_rate_mL_min = np.array((6.9,9.9,21.2,26.1,32.9,30.4,18.8,18.5))/30   # mL/min
    
    
    def arrhenius(T: np.ndarray, A: float, B: float) -> np.ndarray:
        return A * np.exp(-B/R/T)
    
    
    def arrhenius_log(T: np.ndarray, logA: float, B: float) -> np.ndarray:
        return logA - B/R/T
    
    
    # The data are garbage; cut off the last three points during fit
    valid_end = -3
    
    # Estimate calculation, partially based on experimental data
    B0 = 80_000
    logA0 = np.log(
        prod_rate_mL_min[valid_end - 1]
    ) + B0/R/temperature_K[valid_end - 1]
    p0 = logA0, B0
    
    (logA, B), _ = curve_fit(
        f=arrhenius_log, p0=p0,
        xdata=temperature_K[:valid_end],
        ydata=np.log(prod_rate_mL_min[:valid_end]),
    )
    
    # Keep A in log space for accuracy's sake
    a_pow = np.floor(logA / np.log(10))
    a_mantissa = np.exp(logA - a_pow*np.log(10))
    print(f'A = {a_mantissa:.6f}e{a_pow:.0f}')
    print(f'B = {B:.3f}')
    
    fig, ax = plt.subplots()
    ax.scatter(
        temperature_C[:valid_end], prod_rate_mL_min[:valid_end],
        label='experiment (OK)',
    )
    ax.scatter(
        temperature_C[valid_end:], prod_rate_mL_min[valid_end:],
        label='experiment (bad)',
    )
    T_hires = np.linspace(
        start=temperature_K.min(),
        stop=temperature_K.max(),
        num=201,
    )
    ax.plot(
        T_hires - zero_Celsius,
        np.exp(arrhenius_log(T_hires, *p0)),
        label='guess',
    )
    ax.plot(
        T_hires - zero_Celsius,
        arrhenius(
            T=T_hires, A=a_mantissa * 10**a_pow, B=B,
        ),
        label='fit',
    )
    ax.set_xlabel('T (C)')
    ax.set_ylabel('Rate (mL/min)')
    ax.legend()
    plt.show()
    
    A = 6.119210e13
    B = 81109.535
    

    fit

    Linear-logspace fit

    Notice that in log space the expression is linear, so you can reduce to a computationally simpler linear fit that does not require estimates:

    import numpy as np
    
    # Gas constant, J/(mol·K); and 0C in K
    from scipy.constants import R, zero_Celsius
    
    temperature_C = np.array((20, 25, 30, 33, 35, 37, 40, 45))
    temperature_K = temperature_C + zero_Celsius
    prod_rate_mL_min = np.array((6.9,9.9,21.2,26.1,32.9,30.4,18.8,18.5))/30   # mL/min
    
    # The data are garbage; cut off the last three points during fit
    valid_end = -3
    
    (logA, B), *_ = np.linalg.lstsq(
        a=np.stack((
            np.ones(temperature_K.size + valid_end),
            -1/R/temperature_K[:valid_end],
        ), axis=1),
        b=np.log(prod_rate_mL_min[:valid_end]),
        rcond=None,
    )
    
    # Keep A in log space for accuracy's sake
    a_pow = np.floor(logA / np.log(10))
    a_mantissa = np.exp(logA - a_pow*np.log(10))
    print(f'A = {a_mantissa:.6f}e{a_pow:.0f}')
    print(f'B = {B:.3f}')