python python-2.7 python-2.x integral data-fitting

Fit an integral function with parametric limit to data with Python (Debye Model)

I am trying to fit resistivity vs temperature data to Bloch-Gruneisen formula for resistivity in metals:

as you can see there is an integral function with a parametric limit. I don't know how to implement an algorithm to run a least squares fit. I came up with:

import matplotlib.pyplot as plt
import numpy as np
import pylab as pl
import scipy as sp
from scipy.optimize import leastsq

#retrieve data from file
data = pl.loadtxt('salita.txt')
Temp = data[:, 0]
Res = data[:, 2]

def debye_func(p, T, r):
    rho0, AD, TD = p
    coeff = AD*np.power(T, 5)/np.power(TD, 4)
    f = np.power(x^5)/np.power(np.sinh(x), 2) #function to integrate
    err_debye = r - rho0 - coeff * #integral???
    return err_debye

p0 = sp.array([0.0001 , 0.00001, 50])

plsq = leastsq(debye_func, p0, args=(Temp, Res))

print plsq

Ideas on how could I write it?

EDIT: my code has become:

import matplotlib.pyplot as plt
import numpy as np
import pylab as pl
import scipy as sp
from scipy.optimize import leastsq
from scipy.integrate import quad

#retrieve data from file
data = pl.loadtxt('salita.txt')
Temp = data[:, 0]
Res = data[:, 2]

def debye_integrand(x):
    return np.power(x, 5)/np.power(np.sinh(x), 2)

def debye_func(p, T, r):
    rho0, AD, TD = p
    coeff = AD*np.power(T, 5)/np.power(TD, 4)
    err_debye = r - rho0 - coeff * quad(debye_integrand, 0, TD/(2*T))
    return err_debye

p0 = sp.array([0.0001 , 0.00001, 50])

plsq = leastsq(debye_func, p0, args=(Temp, Res))

print plsq

Now I get a ValueError:

Traceback (most recent call last):
  File "debye.py", line 24, in <module>
    plsq = leastsq(debye_func, p0, args=(Temp, Res))
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/minpack.py", line 348, in leastsq
    m = _check_func('leastsq', 'func', func, x0, args, n)[0]
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/optimize/minpack.py", line 14, in _check_func
    res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
  File "debye.py", line 19, in debye_func
    err_debye = r - rho0 - coeff * quad(debye_integrand, 0, TD/(2*T))
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/integrate/quadpack.py", line 247, in quad
    retval = _quad(func,a,b,args,full_output,epsabs,epsrel,limit,points)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/scipy/integrate/quadpack.py", line 296, in _quad
    if (b != Inf and a != -Inf):
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

I think that means I'm providing leastsq an argument it can't take, but I don't know how to modify my code.

EDIT2: I solved my equation analytically with Maxima and I got

import matplotlib.pyplot as plt
import numpy as np
import pylab as pl
import scipy as sp
from scipy.optimize import leastsq
from scipy.integrate import quad
from scipy.special import zetac
from mpmath import polylog

#retrieve data from file
data = pl.loadtxt('salita.txt')
Temp = data[:, 0]
Res = data[:, 2]

def debye_integrand(x):
    return np.power(x, 5)/np.power(np.sinh(x), 2)

def debye_func(p, T, r, integral):
    rho0, AD, TD = p
    coeff = AD*np.power(T, 5)/np.power(TD, 4)
    den = np.exp(TD/T) -1
    m1 = 5*((TD/(2*T))**4)*np.log(np.exp(TD/(2*T)+1)*(np.exp(TD/T)-1)+120*polylog(5, np.exp(TD/(T))*(1-np.exp(TD/(2*T)))
    m2 = 120*(TD/(2*T))*polylog(4, np.exp(TD/(2*T)))*(np.exp(np.exp(TD/T))-1)+60*((TD/(2*T))**2)*polylog(3, np.exp(TD/(2*T))*(1-np.exp((TD/(2*T)))
    m3 = 20*((TD/(2*T))**3)*polylog(2, np.exp(TD/(2*T))*(np.exp(TD/T)-1)+120**polylog(5, -np.exp(TD/(2*T)))*(1-np.exp(TD/T))
    m4 = 120*(TD/(2*T))*polylog(4, -np.exp(TD/(2*T)))*(np.exp(TD/T)-1)+60*((TD/(2*T))**2)*polylog(3, -np.exp(TD/(2*T)))*(1-np.exp(TD/T))
    m5 = 20*((TD/(2*T))**3)*polylog(2, -np.exp(TD/(2*T)))*(np.exp(TD/T)-1) -2*((TD/(2*T))**5)*np.exp(TD/T)
    m6 = 5*((TD/(2*T))**4)*np.log(1-np.exp(TD/(2*T))*(np.exp(TD/T)-1)
    zeta = 15.0*zetac(5)/2

    integral = (m1+m2+m3+m4+m5+m6)/den +zeta

    err_debye = r - rho0 - coeff * integral
    return err_debye

#initizalied with Einstein model fit
p0 = sp.array([0.00001 , 0.0000001, 70.0])  

plsq = leastsq(debye_func, p0, args=(Temp, Res))

print plsq

It says SyntaxError: invalid syntax in m2. I tried to do it with loops in the numerical way, but I didn't succeed.

My .txt file is here, if you want to try. First column is temperature, third one is resistivity.

Solution

You could, for instance, separately define the integrand function,

def debye_integrand(x,  n):
        return x**n/((np.exp(x) - 1)*(1 - np.exp(-x)))

and then use scipy.integrate.quad to do this integration numerically,

from scipy.integrate import quad 

def debye_func(p, T, r):
    # [...] the rest of your code from above here
    err_debye = r - rho0 - coeff * quad(debye_integrand, 0, T/TD, args=(n,))
    return np.sum(err_debye**2)

That's the general idea, and this might need to be adapted further to your code. An ideal solution would be to find an analytical solution to that integral, or rewrite it with classical integral functions from scipy.special, ~~but it might not be straightforward~~ (see below).

Also you should use the more general scipy.opitimize.minimize function instead of the least-square fit, since it provides algorithms that are more efficient and robust for non-smooth optimizations. The default optimization method BFGS is a good start.

Edit: actually, there is an analytical solution to this integral (for n=5), that you can obtain, for instance, with Maxima,

>> integrate(x**5/((exp(x) - 1)*(1 - exp(-x))), x, 0, a)

Solution part 1 enter image description here

where a is the integration limit, li_k the Polylogarithm function of order k (see mpmath.polylog) and ζ is the Riemann Zeta function (see scipy.special.zetac).

Although, depending on your needs, it might be faster to just go with a numerical integration (or pre-calculated table lookup) rather than puyting all of this together, and converting it to python.

Edit 2: Here the final solution with analytical calculation of the integral,

import numpy as np
import mpmath as mp
from scipy.optimize import minimize
from scipy.integrate import quad
import matplotlib.pyplot as plt

def debye_integral_sym_scalar(x):
     """
     Calculate the Debye integral for a scalar using multi precision math,
     as otherwise it overflows with 64bit floats
     """
     exp_x = mp.exp(x)

     m1 = -120*mp.polylog(5, exp_x)
     m2 = 120*x*mp.polylog(4, exp_x)
     m3 = -60*x**2*mp.polylog(3, exp_x)
     m4 = 20*x**3*mp.polylog(2, exp_x)
     m5 = 5*x**4*mp.log(1 - exp_x)
     m6 = - x**5*exp_x

     return m1 + m2 + m3 + m4 + m5 + m6/(exp_x - 1) + 120*mp.zeta(5)

 # this is the actual function that we can use

def debye_integral_sym(x):
     f =  np.vectorize(debye_integral_sym_scalar, otypes=[np.complex])
     return f(x).real

def debye_integrand(x,  n):
     return x**n/((np.exp(x) - 1)*(1 - np.exp(-x)))

# test that debye_integral_sym returns the same result as quad
a = 10.0 
res0 =  quad(debye_integrand, 0, a, args=(5,))[0]
res1 = debye_integral_sym(a)
np.testing.assert_allclose(res0, res1)

def resistivity_fit(p, T):
    rho0, AD, TD = p
    coeff = AD*np.power(T, 5)/np.power(TD, 4)
    return rho0 + coeff * debye_integral_sym(TD/(2*T))


def debye_err_func(p, T, r):
    return  np.sum((r - resistivity_fit(p, T))**2)

# wget "http://pastebin.com/raw.php?i=tvzcdxYA" -O salita.txt
data = np.loadtxt('salita.txt')

temp_exp = data[:, 0]
res_exp = data[:, 2]

p0 = np.array([0.0001 , 0.00001, 50])
p_opt = minimize(debye_err_func, p0, args=(temp_exp, res_exp))

print p_opt

temp = np.linspace(temp_exp.min(), temp_exp.max(), 100)

plt.plot(temp_exp, res_exp, '.', label='Experimental data')
plt.plot(temp, resistivity_fit(p_opt.x, temp), 'r', label='Bloch-Gruneisen fit')
plt.legend(loc='best')
plt.xlabel('Temperature [K]')
plt.ylabel('Resistivity')
plt.show()

With the output of the optimization function,

   status: 0
   success: True
   njev: 5
   nfev: 25
   hess_inv: array([[  7.32764243e-01,  -4.89555962e-01,  -1.93879729e-08],
   [ -4.89555962e-01,   3.27690582e-01,  -2.09510086e-08],
   [ -1.93879729e-08,  -2.09510086e-08,   1.00000000e+00]])
   fun: 1.784420370873494e-11
   x: array([  9.96468440e-06,   7.40349389e-06,   5.00000000e+01])
   message: 'Optimization terminated successfully.'
   jac: array([ -1.11880569e-06,   1.28115957e-06,   2.31303410e-12])

and the resulting plot,