Search code examples
pythonmatplotlibdata-analysiserrorbar

Function plotting with matplotlib


I am trying to model an equation that depends on T and parameters xi, mu, sig.

I have inferred parameters and spread(standard deviation) of those parameters for different durations (1h, 3h, etc). In the example code the parameters are for 1h duration.

I need to create a forloop to create a cloud of zp with the array of xi, mu and sig. The different values T can take are [2, 5, 25, 50, 75, 100]

I also want to show error bars or uncertainty with the standard deviation in line 2. I used Metropolis Hastings Algorithm for exploring the parametric space with 15000 iterations in 3 chains

xi = accepted[0:]
xi = array([[-2.00000000e-01,  1.00000000e-01,  1.00000000e-01],
   [-2.06044711e-01,  1.51739593e-01,  1.36675069e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   ...,
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [-3.14226453e-02,  1.44844410e+01,  5.00637147e+00]])
 
   mu = accepted[1:]
   mu = array([[-2.06044711e-01,  1.51739593e-01,  1.36675069e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   ...,
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [-3.14226453e-02,  1.44844410e+01,  5.00637147e+00]])

   sig = accepted [2:]

   sig = array([[-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   [-2.91747418e-01,  1.10818827e-01,  1.80040639e-01],
   ...,
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [ 1.45611857e-02,  1.46824099e+01,  5.16110127e+00],
   [-3.14226453e-02,  1.44844410e+01,  5.00637147e+00]])

spread = accepted[:,0].std(), accepted[:,1].std(), accepted[:,2].std()
(xi, mu, sig)
def zp(T, xi = accepted[0:], mu = accepted[1:], sig= accepted[2:]):
    p = 1/T
    yp = - np.log10(1 - p)
    zp = np.ndarray(shape=(xi.size, T.size))
    for i in range(xi.size):
    if xi[i] == 0:
        zp[i,:] = mu[i] - (sig[i]*(np.log10(yp)))
    else:
        zp[i,:] = mu[i] - ((sig[i]/xi[i])*(1-(yp**(-xi[i]))))
    return zp
    # get results
    res = zp(T, xi, mu, sig)

Solution

  • So, you have the (15000,3) matrix accepted, where xi=accepted[:,0], mu=accepted[:,1] and sig=accepted[:,2].

    I will generate some sample data for xi, mu and sig, just to show you the results of plotting.

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    # I will generate some sample data
    # You'll have to use your own data
    n = 15000
    np.random.seed(1)
    xi, mu, sig = (
        np.random.normal(loc=-0.15153068743678966, scale=0.2254333661580348, size=(n)),
        np.random.normal(loc=14.241861263759796, scale=2.6116567608814196, size=(n)),
        np.random.normal(loc=5.5779131542307345, scale=0.9627764065564182, size=(n)),
    )
    

    You defined T as

    # define T steps
    T =  np.array([2, 5, 25, 50, 75, 100])
    

    Now we take mean and standard deviation of parameters

    xi_mean = xi.mean()
    mu_mean = mu.mean()
    sig_mean = sig.mean()
    
    xi_std = xi.std()
    mu_std = mu.std()
    sig_std = sig.std()
    

    and define a function zp

    # function zp
    def zp(T, xi, mu, sig):
        p = 1 / T
        yp = - np.log10(1 - p)
        
        # ravel results
        _xi = xi.ravel()
        _mu = mu.ravel()
        _sig = sig.ravel()
        
        res = np.ndarray(shape=(_xi.size, T.size))
        
        for i in range(_xi.size):
            if _xi[i] == 0:
                res[i,:] = _mu[i] - (_sig[i]*(np.log10(yp)))
            else:
                res[i,:] = _mu[i] - ((_sig[i]/_xi[i])*(1-(yp**(-_xi[i]))))
        return res
    
    # get results
    res = zp(T, xi, mu, sig)
    

    We can define a DataFrame with all results

    # define results DataFrame
    df = pd.DataFrame(res, columns=T)
    print(df)
    
                 2          5          25         50         75         100
    0      24.610952  34.489626  54.614356  65.349657  72.376143  77.735341
    1      16.554362  20.033999  23.514591  24.524273  25.023313  25.342476
    2      23.468215  28.276272  33.212243  34.678420  35.410825  35.882346
    3      23.102447  26.089680  28.680803  29.339580  29.646593  29.835899
    4      21.021596  30.494043  45.594905  52.182941  56.105955  58.925041
    ...          ...        ...        ...        ...        ...        ...
    14995  22.964737  27.856439  33.039263  34.623438  35.425031  35.945247
    14996  21.371429  29.078696  47.122467  57.868230  65.281555  71.127181
    14997  18.534785  21.512996  24.424363  25.251344  25.656252  25.913699
    14998  19.915343  28.939309  43.440076  49.808611  53.612702  56.351668
    14999  20.835338  25.069838  29.829853  31.364291  32.159584  32.683499
    
    [15000 rows x 6 columns]
    

    Now we compute zp with mean +/- std of parameters

    zp_mean = zp(T, xi_mean, mu_mean, sig_mean).ravel()
    zp_lo = zp(T, xi_mean-xi_std, mu_mean-mu_std, sig_mean-sig_std).ravel()
    zp_hi = zp(T, xi_mean+xi_std, mu_mean+mu_std, sig_mean+sig_std).ravel()
    

    and we can finally plot the 15000 lines and mean+/-std

    fig, ax = plt.subplots(figsize=(12, 5))
    
    ax.errorbar(
        T, zp_mean,
        yerr=[zp_mean-zp_lo, zp_hi-zp_mean],
        color='k', zorder=999,
        label='mean and std'
    )
    
    for i, col in enumerate(df.T.columns):
        _df = df.T[col]
        ax.plot(_df, lw=1, alpha=.01, color='r')
        
    ax.set(
        xlabel='duration',
        ylabel='value',
        # adjust this
        ylim=(10, 150)
    )
    ax.legend()
    plt.show()
    

    enter image description here


    You could also choose a faster solution with seaborn.

    First, melt the DataFrame

    melt_df = df.melt(var_name='duration')
    print(melt_df)
    
           duration      value
    0             2  24.610952
    1             2  16.554362
    2             2  23.468215
    3             2  23.102447
    4             2  21.021596
    ...         ...        ...
    89995       100  35.945247
    89996       100  71.127181
    89997       100  25.913699
    89998       100  56.351668
    89999       100  32.683499
    
    [90000 rows x 2 columns]
    

    then plot with relplot and choose the wanted confidence interval (here is 99%, could be also 'sd')

    import seaborn as sns
    g = sns.relplot(
        kind='line',
        data=melt_df,
        x='duration', y='value',
        ci=99
    )
    g.axes.flat[0].set_title('Confidence Interval 99%')
    plt.show()
    

    enter image description here