Search code examples
pythonstatisticshistogramprobability-distribution

Calculation of the first and third quartile


I'm trying to calculate the mean, standard deviation, median, first quartile and third quartile of the lognormal distribution that I fit to my histogram. So far I've only been able to calculate the mean, standard deviation and median, based on the formulas I found on Wikipedia, but I don't know how to calculate the first quartile and the third quartile. How could I calculate in Python the first quartile and the third quartile, based on the lognormal distribution?

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
#from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True

from scipy.optimize import curve_fit

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

plt.rcParams["figure.figsize"] = [18,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')

def fun(y, mu, sigma):
    return 1.0/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))

step = 0.1

xx = x-step*0.99

nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)

frequencia_relativa /= nrm # normalize frequences histogram

print(np.sum(frequencia_relativa*step)) # check normalizatio

params, extras = curve_fit(fun, xx, frequencia_relativa)

print(params)

axes = f.add_subplot(111, frameon=False)

ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.axvspan(0.243, 1.481, label='Média $\pm$ desvio padrão', ymin=0.0, ymax=1.0, alpha=0.2, color='Plum') #lognormal distribution
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(xx, height=frequencia_relativa, label='Frequência relativa normalizada do tamanho triangular', alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(xx, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")

xxx = np.linspace (0.001, 8, 1000)
ax.plot(xxx, fun(xxx, params[0], params[1]), "r-", label='Distribuição log-normal', linewidth=3)
ax2.plot(xxx, fun(xxx, params[0], params[1]), "r-", linewidth=3)

ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))

plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,1.4)
ax2.set_xlim(7.0,8.0)
def func(x, pos):  # formatter function takes tick label and tick position
    s = str(x)
    ind = s.index('.')
    return s[:ind] + ',' + s[ind+1:]   # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa normalizada', va='center', rotation='vertical', fontsize=22)

ax.axvline(0.862, color='k', linestyle='-', linewidth=1.3) #lognormal distribution
ax.axvline(0.243, color='k', linestyle='--', linewidth=1)  #lognormal distribution
ax.axvline(1.481, color='k', linestyle='--', linewidth=1)  #lognormal distribution
f.legend(loc=9, 
          bbox_to_anchor=(.77,.99),
          labelspacing=1.5,
          numpoints=1,
          columnspacing=0.2,
          ncol=1, fontsize=18,
          frameon=False)

ax.text(0.86*0.63, 1.4*0.92, 'tamanho = (0,86 $\pm$ 0,62) $\mu m$', fontsize=20) #Excel
mu = params[0]
sigma = params[1]

# calculate mean value
print(np.exp(mu + sigma*sigma/2.0))

# calculate stddev
print(np.sqrt((np.exp(sigma*sigma)-1)*np.exp(mu+sigma*sigma/2.0)))

# calculate median value
print(np.exp(mu))

f.tight_layout()
#plt.show()
plt.savefig('output.png', dpi=500, bbox_inches='tight')

Graph: enter image description here


Solution

  • Welcome back

    Ok, for log-normal distribution one could compute quantiles using expression in the page in the wiki

    import numpy as np
    from scipy import special
    
    SQRT2 = 1.41421356237
    
    def LNquantile(μ, σ, p):
        """
        Compute quantile function for log-normal distribution
        """
        nq = SQRT2 * special.erfinv(2.0*p - 1.0) # N(0,1) normal quantile
        t = μ +  σ * nq # N(μ, σ) quantile
        return np.exp(t) # LN(μ, σ) quantile
    
    μ = 0.0
    σ = 1.0
    
    q1 = LNquantile(μ, σ, 0.25)
    q3 = LNquantile(μ, σ, 0.75)
    
    print(q1)
    print(q3)
    

    prints reasonable

    0.5094162838640294
    1.9630310841553595
    

    You could put any values and compare with online calculator at https://planetcalc.com/7263/