Plotting gBM issues

Geometric Brownian motion (gBM) is a stochastic process and can be thought of as an extension of standard Brownian motion.

I am trying to write a function that simulates different paths (ntraj paths) of gBM and then plots a histogram at certain points specified in a list tcheck. Once it has plotted these plots the function is meant to superimpose a lognormal distribution on the plot for each time.

The output is meant to look like this

except for gBM rather than a standard Brownian motion process. So far I have a function to generate multiple paths of gBM as,

def oneDGeometricBM(nTraj=100,n=100,T=1.0,sigma=1,mu=0):
    '''
    DOCSTRING:
    1D geomwtric brownian motion
    INPUTS:
    ntraj = "number of trajectories"
    n = "length of a trajectory"
    T = "last time point, i.e final tradjectory t = {0,1,...,T}"
    sigma= volatility
    mu= percentage drift

    '''
    np.random.seed(52323)
    S_0 = 0

    # Discretize, dt =  time step = $t_{j+1}- t_{j}$
    dt = T/(n)  
    sqrtdt = np.sqrt(dt)

    # Container for different colors for each trajectory
    colors = plt.cm.jet(np.linspace(0,1,nTraj))
    # Container for trajectories
    xtraj=np.zeros(n+1,float)
    ztraj=np.zeros(n+1,float)
    trange=np.linspace(start = 0,stop = T ,num = n+1)

    # Simulation
    # Random Variable $X_{n}$ is distributed np.sqrt(dt)* N(mu=0,sigma=1) 
    for j in range(nTraj):
        # Loop over time
        for i in range(n):
            xtraj[i+1]=xtraj[i]+ sqrtdt * np.random.randn() + dt*mu
        # Loop again over time in order to make geometric drift
        ztraj = S_0 * np.exp(xtraj) # ztraj[z+1]=  ztraj[0]+ np.exp(xtraj[z])

        plt.plot(trange , xtraj,'b-',alpha=0.2, color=colors[j], lw=3.0,label="$\sigma$={}, $\mu$={:.5f}".format(sigma,mu))
    plt.title("1D Geometric Brownian Motion:\n nTraj={}, T={},\n $\Delta t$={:.3f}, $\sigma$={}, $\mu$={:.3f}".format(nTraj,T,dt,sigma,mu))    
    plt.xlabel(r'$t$')
    plt.ylabel(r'$Z_t$');

oneDGeometricBM(nTraj=5,n=10**3,T=10.0,sigma=0.8,mu=1.1)

I have seen many answers to questions on how to plot multiple paths of gBM but I am interested in how to get look at the histograms at specific times and then look at the distribution. Below is my function so far. It is not working but I am not able to figure out what I am doing wrong. I also added the output I got.

import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.stats import norm, lognorm
ntraj = 10000
S_0 =0
sigma=1
mu=1
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 1.0
'''
ntraj = 10**4
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 5.0 # limits
'''
n=int(tfinal/dt)
sqrtdt = np.sqrt(dt)

x=np.zeros(shape=[ntraj,n+1], dtype=float)
z=np.zeros(shape=[ntraj,n+1], dtype=float)
zrange=np.arange(start=-xv, stop=xv, step=dt)

# Calculate the number of the bins 
binval = math.ceil(np.sqrt(ntraj))
# Nested for loop to create Drifted BM
for i in range(n):
    for j in range(ntraj):
        x[j,i+1]=x[j,i]+ sqrtdt*np.random.randn()


 #Nested loop to create gBM
for j0 in range(ntraj):
    for i0 in range(n+1):
        z[j0,i0] = 0 + np.exp(x[j0,i0])

# Loop to plot the distribution of gBM tradjectories at different times       
for i1 in range(n):
    # Compute histogram at every tsample , sample at time t
    t=(i1+1)*dt
    if t in tcheck:
       # Plot histogram on sample
       plt.hist(z[:,i1],bins=30,density=False,alpha=0.6,label=['t ={}'.format(t)] )
       # Superimpose each samples mean
       xbar = np.average(z[:,i1])
       plt.axvline(xbar, color='RED', linestyle='dashed', linewidth=2) 
       # Plot theoretic distribution { N(0, sqrt[t] ) }
       #plt.plot(xrange,norm.pdf(xrange,0.0,np.sqrt(t)),'k--')

So to summarize my question. I am trying to simulate multiple trajectories of gBM, store my results in an array then loop over this array and plot a histogram over the specific points using matplotlib then lastly superimpose a lognormal distribution on my histograms.

Edit 1:

I need to superimpose the lognormal distribution on the GBM and the Cauchy if possible. My problem is that when I edited the correction by @Paul Harris I got,

def oneDGeometricBM(nTraj=100,n=100,T=1.0,sigma=1,mu=0):
    '''
    DOCSTRING:
    INPUTS:
    ntraj = "number of trajectories"
    n = "length of a trajectory"
    T = "last time point, i.e final tradjectory t = {0,1,...,T}"
    sigma= volatility
    mu= percentage drift

    '''
    np.random.seed(52323)
    S0 = 10

    # Discretize, dt =  time step = $t_{j+1}- t_{j}$
    dt = T/(n)  
    sqrtdt = np.sqrt(dt)

    # Container for different colors for each trajectory
    colors = plt.cm.jet(np.linspace(0,1,nTraj))
    # Container for trajectories
    xtraj=np.zeros(n+1,float)
    ztraj=np.zeros(n+1,float)
    trange=np.linspace(start = 0,stop = T ,num = n+1)

    out = []
    # Simulation
    # Random Variable $X_{n}$ is distributed np.sqrt(dt)* N(mu=0,sigma=1) 
    for j in range(nTraj):
        # Loop over time
        for i in range(n):
            xtraj[i+1]=xtraj[i]+ sqrtdt * np.random.randn() + dt*mu

        # Loop again over time in order to make geometric drift
    ztraj = S0 * np.exp(xtraj)
    # Return gBM 
    return ztraj

# Plotting
fig, ax = plt.subplots(ncols=2, figsize=plt.figaspect(1./2))

colors = ['k', 'r', 'b']
T =  [1.0, 2.0, 5.0]
sigma=0.8
mu=1.1 

for c, T in zip(colors, T):
    ztraj = oneDGeometricBM(nTraj=5,n=10**4,T=T,sigma=0.8,mu=1.1)
    # Plot Emperical Values
    xrange = range(0,80,1)
    ax[0].hist(ztraj, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(0, 95))

    # Plot the theoretical values
    theoretic_mean = math.exp(mu * T + 0.5 * sigma**2 * T)
    theoretic_var = math.exp(2* mu * T + sigma**2 * T)* (math.exp(sigma**2 * T) - 1)
    ax[0].plot(xrange,lognorm.pdf(xrange, theoretic_mean , theoretic_var ),'k--')

    # Plot the differences between consecutive elements of gBM (an array)
    diff = np.ediff1d(ztraj)
    ax[1].hist(diff, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(-5, 5))


ax[0].set_xlabel('z')
ax[0].set_ylabel('$p(z,T)$')
ax[0].set_title('Histogram of ztraj positions')

ax[1].set_xlabel('dz')
ax[1].set_ylabel('$p(dz,T)$')
ax[1].set_title('Histogram of d(ztraj) positions\nbetween time steps')

ax[0].legend()
fig.tight_layout()

So to summarize I need to superimpose the distribution at each time point, the theoretical distribution for the gBM which is the lognormal distribution.

Solution

So I have had a look at your problem. I have edited your function to stop plotting and return xtraj which I assume is your Brownian motion:

def oneDGeometricBM(nTraj=100,n=100,T=1.0,sigma=1,mu=0):
    '''
    DOCSTRING:
    1D geomwtric brownian motion
    INPUTS:
    ntraj = "number of trajectories"
    n = "length of a trajectory"
    T = "last time point, i.e final tradjectory t = {0,1,...,T}"
    sigma= volatility
    mu= percentage drift

    '''
    np.random.seed(52323)
    S_0 = 10

    # Discretize, dt =  time step = $t_{j+1}- t_{j}$
    dt = T/(n)  
    sqrtdt = np.sqrt(dt)

    # Container for different colors for each trajectory
    colors = plt.cm.jet(np.linspace(0,1,nTraj))
    # Container for trajectories
    xtraj=np.zeros(n+1,float)
    ztraj=np.zeros(n+1,float)
    trange=np.linspace(start = 0,stop = T ,num = n+1)

    out = []
    # Simulation
    # Random Variable $X_{n}$ is distributed np.sqrt(dt)* N(mu=0,sigma=1) 
    for j in range(nTraj):
        # Loop over time
        for i in range(n):
            xtraj[i+1]=xtraj[i]+ sqrtdt * np.random.randn() + dt*mu

        # Loop again over time in order to make geometric drift
    ztraj = S_0 * np.exp(xtraj) # ztraj[z+1]=  ztraj[0]+ np.exp(xtraj[z])

    return ztraj

The displacement per time step is then the differences within the array xtraj: dx = np.ediff1d(oneDGeometricBM(...)), so we compute a histogram of those values:

fig, ax = plt.subplots()

ax.hist(np.ediff1d(oneDGeometricBM(nTraj=5,n=10**3,T=10.0,sigma=0.8,mu=1.1)), bins=50, alpha=0.5, label='T=10', density=True)
ax.hist(np.ediff1d(oneDGeometricBM(nTraj=5,n=10**3,T=1.0,sigma=0.8,mu=1.1)), bins=50, alpha=0.5, color='k', label='T=1', density=True)
ax.hist(np.ediff1d(oneDGeometricBM(nTraj=5,n=10**3,T=5.0,sigma=0.8,mu=1.1)), bins=50, alpha=0.5, color='r', label='T=5', density=True)

ax.set_xlabel('x')
ax.set_ylabel('$p(x,T)$')

ax.legend()

I have used 3 different T values, as in the example. To normalise the histogram such that the y-axis now represents probability p(x,T), ie. sum of all p*x = 1, we use the density=True argument.

EDIT

I have edited the oneDGeometricBM function to return ztraj = S0*np.exp(xtraj). Your initial S0 value was 0, so I have made it non-zero.

You can plot ztraj differences as:

fig, ax = plt.subplots()

colors = ['k', 'r', 'b']
T = [1.0, 2.0, 5.0]

for c, T in zip(colors, T):
    ztraj = oneDGeometricBM(nTraj=5,n=10**3,T=T,sigma=0.8,mu=1.1)
    diff = np.ediff1d(ztraj)
    ax.hist(diff, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(-10, 10))

ax.set_xlabel('x')
ax.set_ylabel('$p(x,T)$')

ax.legend()

EDIT2

From looking more closely at your produced histograms I think your modelling was correct, just the xrange of the plot should be adjusted as ztraj gets large for large T, you can limit the histogram using the range argument. So I have plotted ztraj and d(ztraj) for three separate T. ztraj does appear to approximately follow a log-normal distribution and the difference in ztraj appears to approximately follow a Lorentzian distribution (have to check theory on that one, maybe Gaussian). Code to reproduce:

fig, ax = plt.subplots(ncols=2, figsize=plt.figaspect(1./2))

colors = ['k', 'r', 'b']
T = [1.0, 2.0, 5.0]

for c, T in zip(colors, T):
    ztraj = oneDGeometricBM(nTraj=5,n=10**4,T=T,sigma=0.8,mu=1.1)

    ax[0].hist(ztraj, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(0, 95))

    diff = np.ediff1d(ztraj)
    ax[1].hist(diff, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(-5, 5))

ax[0].set_xlabel('z')
ax[0].set_ylabel('$p(z,T)$')
ax[0].set_title('Histogram of ztraj positions')

ax[1].set_xlabel('dz')
ax[1].set_ylabel('$p(dz,T)$')
ax[1].set_title('Histogram of d(ztraj) positions\nbetween time steps')

ax[0].legend()
fig.tight_layout()

And here is your data and plot but limiting the histogram range=(0, 10):

EDIT3

I have included code to fit the lognormal distributions and shown them on your original plot. We define the lognormal function as:

from scipy.optimize import curve_fit

def lognorm(x, x0, A, sigma):
    return A * np.exp(-(np.log(x)-x0)**2 / (2*sigma**2))

and then fit using the values and bins from the histogram in the final loop as:

# Loop to plot the distribution of gBM tradjectories at different times       
for i1 in range(n):
    # Compute histogram at every tsample , sample at time t
    t=(i1+1)*dt
    if t in tcheck:
        # Plot histogram on sample
        v, b, patches = plt.hist(z[:,i1],bins=200,density=False,alpha=0.6,label=['t ={}'.format(t)], range=(0, 10) )

        # second term is bin centre locations rather than bin edges
        popt, pcov = curve_fit(lognorm, b[:-1] + np.ediff1d(b), v, p0=(0.1, 300, 0.3))

        # make colors match their original data but no transparency
        plt.plot(b, lognorm(b, *popt), color=patches[0].get_facecolor()[:3])

        print(f'tcheck: {t} with parameters: {popt}')

Output:

tcheck: 0.5 with parameters: [ -0.42334757 358.38545736   0.6748076 ]
tcheck: 1.0 with parameters: [ -0.90719967 321.03944864   0.96137893]
tcheck: 4.0 with parameters: [ -3.66426932 721.41708932   1.86376987]

EDIT4

The whole code to generate the above output would be:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.stats import norm, lognorm
from scipy.optimize import curve_fit

def lognorm(x, x0, A, sigma):
    return A * np.exp(-(np.log(x)-x0)**2 / (2*sigma**2))

ntraj = 10000
S_0 =0
sigma=1
mu=1
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 1.0
'''
ntraj = 10**4
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 5.0 # limits
'''
n=int(tfinal/dt)
sqrtdt = np.sqrt(dt)

x=np.zeros(shape=[ntraj,n+1], dtype=float)
z=np.zeros(shape=[ntraj,n+1], dtype=float)
zrange=np.arange(start=-xv, stop=xv, step=dt)

# Calculate the number of the bins 
binval = math.ceil(np.sqrt(ntraj))
# Nested for loop to create Drifted BM
for i in range(n):
    for j in range(ntraj):
        x[j,i+1]=x[j,i]+ sqrtdt*np.random.randn()


 #Nested loop to create gBM
for j0 in range(ntraj):
    for i0 in range(n+1):
        z[j0,i0] = 0 + np.exp(x[j0,i0])

# Loop to plot the distribution of gBM tradjectories at different times       
for i1 in range(n):
    # Compute histogram at every tsample , sample at time t
    t=(i1+1)*dt
    if t in tcheck:
        # Plot histogram on sample
        v, b, patches = plt.hist(z[:,i1],bins=200,density=True,alpha=0.6,label=['t ={}'.format(t)], range=(0, 10))

        popt, pcov = curve_fit(lognorm, b[:-1] + np.ediff1d(b), v, p0=(0.1, 300, 0.3))
        # make colors match their original data but no transparency
        plt.plot(b, lognorm(b, *popt), color=patches[0].get_facecolor()[:3])

        print(f'tcheck: {t} for parameters: {popt}')