Search code examples
python-3.xnumpynumbascientific-computing

How to speed up the following code using numba?


I am doing a molecular dynamics simulation. It consists of numerical integration, many for loops, manipulating large NumPy arrays. I have tried to use NumPy function and arrays wherever possible. But the code is still too slow. I thought of using numba jit as a speedup. But it always throws an error message.

Here is the code.

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020

@author: Sandipan
"""

import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys

# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp   = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM     =3
N       =500
density =0.776
Rcutoff =3


#----------------------Function Definitions---------------------

#------------------Initialise Configuration--------

@jit(nopython=True)
def initialise_config(N,DIM,density):
    velocity = (np.random.randn(N,DIM)-0.5)


    # Set initial momentum to zero
    COM_V = np.sum(velocity)/N     #Center of mass velocity
    velocity = velocity - COM_V    # Fix any center-of-mass drift

    # Calculate initial kinetic energy
    k_energy=0
    for i in range (N):
        k_energy+=np.dot(velocity[i],velocity[i])
    vscale=np.sqrt(DIM*temp/k_energy)
    velocity*=vscale

    #Initialize with zeroes
    coords = np.zeros([N,DIM]);

    # Get the cooresponding box size
    L = (N/density)**(1.0/DIM)

    """ Find the lowest perfect cube greater than or equal to the number of
     particles"""
    nCube = 2

    while (nCube**3 < N):
        nCube = nCube + 1



    # Assign particle positions
    ip=-1
    x=0
    y=0
    z=0

    for i in range(0,nCube):
        for j in range(0,nCube):
            for k in range(0,nCube):
                if(ip<N):
                    x=(i+0.5)*(L/nCube)
                    y=(j+0.5)*(L/nCube)
                    z=(k+0.5)*(L/nCube)
                    coords[ip]=np.array([x,y,z])
                    ip=ip+1
                else:
                    break
    return coords,velocity,L


@jit(nopython=True)
def wrap(pos,L):
    '''Apply perodic boundary conditions.'''

    for i in range (len(pos)):
        for k in range(DIM):
                if (pos[i][k]>0.5):
                    pos[i][k]=pos[i][k]-1
                if (pos[i][k]<-0.5):
                    pos[i][k]=pos[i][k]+1


    return (pos)    






@jit(nopython=True)
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
    # Compute forces on positions using the Lennard-Jones potential
    # Uses double nested loop which is slow O(N^2) time unsuitable for large systems
    Sij = np.zeros(DIM) # Box scaled units
    Rij = np.zeros(DIM) # Real space units

    #Set all variables to zero
    ene_pot = np.zeros(N)
    acc = acc*0
    virial=0.0

    # Loop over all pairs of particles
    for i in range(N-1):
        for j in range(i+1,N): #i+1 to N ensures we do not double count
            Sij = pos[i]-pos[j] # Distance in box scaled units
            for l in range(DIM): # Periodic interactions
                if (np.abs(Sij[l])>0.5):
                    Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5  (scaled units) then subtract 0.5 to find periodic interaction distance.

            Rij   = L*Sij # Scale the box to the real units in this case reduced LJ units
            Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance

            if(Rsqij < Rcutoff**2):
                # Calculate LJ potential inside cutoff
                # We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
                rm2      = 1.0/Rsqij # 1/r^2
                rm6      = rm2**3
                forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
                phi      =4*(rm6**2-rm6)

                ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy

                ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy

                virial     = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
                acc[i]     = acc[i]+forcefact*Sij # Accumulate forces
                acc[j]     = acc[j]-forcefact*Sij # (Fji=-Fij)
    return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient


@jit(nopython=True)
def Calculate_Temperature(vel,L,DIM,N):

    ene_kin = 0.0

    for i in range(N):
        real_vel = L*vel[i]
        ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)

    ene_kin_aver = 1.0*ene_kin/N
    temperature = 2.0*ene_kin_aver/DIM

    return ene_kin_aver,temperature


# Main MD loop
@jit(nopython=True)
def main():

    # Vectors to store parameter values at each step

    ene_kin_aver = np.ones(NSteps)
    ene_pot_aver = np.ones(NSteps)
    temperature = np.ones(NSteps)
    virial = np.ones(NSteps)
    pressure = np.ones(NSteps)


    pos,vel,L = initialise_config(N,DIM,density)
    acc = (np.random.randn(N,DIM)-0.5)
    volume=L**3

    # Open file which we will save the outputs to
    if os.path.exists('energy2'):
        os.remove('energy2')
    f = open('traj.xyz', 'w')

    for k in range(NSteps):

        # Refold positions according to periodic boundary conditions
        pos=wrap(pos,L)

        # r(t+dt) modify positions according to velocity and acceleration
        pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Rescale velocities and take half step
        chi = np.sqrt(temp/temperature[k])
        vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2

        # Compute forces a(t+dt),ene_pot,virial
        acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3

        # Complete the velocity step 
        vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Calculate pressure
        pressure[k]= density*temperature[k] + virial[k]/volume

    # Print output to file every DumpFreq number of steps
        if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values

            f.write("%s\n" %(N)) # Write the number of particles to file
            # Write all of the quantities at this step to the file
            f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
            for n in range(N): # Write the positions to file
                f.write("X"+" ")
                for l in range(DIM):
                    f.write(str(pos[n][l]*L)+" ")
                f.write("\n")


        if (k%5==0):
           # print("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            sys.stdout.write("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            sys.stdout.flush()

    return ene_kin_aver, ene_pot_aver, temperature, pressure, pos

#------------------------------------------------------    
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()




# Plot all of the quantities
def plot():
    plt.figure(figsize=[7,12])
    plt.rc('xtick', labelsize=15) 
    plt.rc('ytick', labelsize=15)
    plt.subplot(4, 1, 1)
    plt.plot(ene_kin_aver,'k-')
    plt.ylabel(r"$E_{K}", fontsize=20)
    plt.subplot(4, 1, 2)
    plt.plot(ene_pot_aver,'k-')
    plt.ylabel(r"$E_{P}$", fontsize=20)
    plt.subplot(4, 1, 3)
    plt.plot(temperature,'k-')
    plt.ylabel(r"$T$", fontsize=20)
    plt.subplot(4, 1, 4)
    plt.plot(pressure,'k-')
    plt.ylabel(r"$P$", fontsize=20)
    plt.show()

plot()

The error I am getting is:


runfile('E:/Project/LJMD4.py', wdir='E:/Project')
Traceback (most recent call last):

  File "<ipython-input-8-aeebce887079>", line 1, in <module>
    runfile('E:/Project/LJMD4.py', wdir='E:/Project')

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "E:/Project/LJMD4.py", line 226, in <module>
    ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 351, in _compile_for_args
    error_rewrite(e, 'typing')

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 318, in error_rewrite
    reraise(type(e), e, None)

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\six.py", line 658, in reraise
    raise value.with_traceback(tb)

TypingError: cannot determine Numba type of <class 'builtin_function_or_method'>

When I searched on the internet, I found numba may not support some function I am using. But I am not using any Pandas or other data frame. I am just using pure python loop or NumPy which as far numba documentation suggests are well supported. I have tried removing numba from some functions and making nopython=0 but still, it sends different error messages. I can't figure out what is wrong with it. Without numba the code will not be feasible for actual use. Any further tips on speedup will be of great help. Thank you in advance.


Solution

  • A few common errors

    1. Use of unsupported functions

      file operations, many string operation. These can be in a objmode block. In this example I commented these things out.

    2. Wrong way of initializing arrays

      Only tuples are supported, not lists (Numpy accepts both but the documentation there are only tuples mentioned)

    3. Checking for division by zero and throwing an exception

      This is the standard behavior of Python, but not Numpy. If you want to avoid this overhead (if/else on every division) turn on the Numpy default behaviour (error_model="numpy").

    4. Use of globals

      Globals are hard coded into the compiled code (as you would directly write them into the code). They cannot be changed without recompilation.

    5. Wrong indexing of Numpy arrays

      pos[i][k] instead of pos[i,k]. Numba may optimize this away, but this has a quite noticeable negative impact in Pure Python code.

    Working version

    # -*- coding: utf-8 -*-
    """
    Created on Sat Mar 28 12:10:42 2020
    
    @author: Sandipan
    """
    
    import numpy as np
    import matplotlib.pyplot as plt
    from numba import jit
    import os
    import sys
    
    # All globals are compile time constants
    # recompilation needed if you change this values
    # Better way: hand a tuple of all needed vars to the functions
    # params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)
    
    # Setting up the simulation
    NSteps =100 # Number of steps
    deltat = 0.005 # Time step in reduced time units
    temp   = 0.851# #Reduced temperature
    DumpFreq = 100 # Save the position to file every DumpFreq steps
    epsilon = 1.0 # LJ parameter for the energy between particles
    DIM     =3
    N       =500
    density =0.776
    Rcutoff =3
    
    params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)
    
    #----------------------Function Definitions---------------------
    
    #------------------Initialise Configuration--------
    
    #error_model=True
    #Do you really want to search for division by zeros (additional cost)?
    
    @jit(nopython=True,error_model="numpy")
    def initialise_config(N,DIM,density):
        velocity = (np.random.randn(N,DIM)-0.5)
    
    
        # Set initial momentum to zero
        COM_V = np.sum(velocity)/N     #Center of mass velocity
        velocity = velocity - COM_V    # Fix any center-of-mass drift
    
        # Calculate initial kinetic energy
        k_energy=0
        for i in range (N):
            k_energy+=np.dot(velocity[i],velocity[i])
        vscale=np.sqrt(DIM*temp/k_energy)
        velocity*=vscale
    
        #wrong array initialization (use tuple)
        #Initialize with zeroes
        coords = np.zeros((N,DIM))
    
        # Get the cooresponding box size
        L = (N/density)**(1.0/DIM)
    
        """ Find the lowest perfect cube greater than or equal to the number of
         particles"""
        nCube = 2
    
        while (nCube**3 < N):
            nCube = nCube + 1
    
    
    
        # Assign particle positions
        ip=-1
        x=0
        y=0
        z=0
    
        for i in range(0,nCube):
            for j in range(0,nCube):
                for k in range(0,nCube):
                    if(ip<N):
                        x=(i+0.5)*(L/nCube)
                        y=(j+0.5)*(L/nCube)
                        z=(k+0.5)*(L/nCube)
                        coords[ip]=np.array([x,y,z])
                        ip=ip+1
                    else:
                        break
        return coords,velocity,L
    
    
    @jit(nopython=True)
    def wrap(pos,L):
        '''Apply perodic boundary conditions.'''
    
        #correct array indexing
        for i in range (len(pos)):
            for k in range(DIM):
                    if (pos[i,k]>0.5):
                        pos[i,k]=pos[i,k]-1
                    if (pos[i,k]<-0.5):
                        pos[i,k]=pos[i,k]+1
    
    
        return (pos)    
    
    
    @jit(nopython=True,error_model="numpy")
    def LJ_Forces(pos,acc,epsilon,L,DIM,N):
        # Compute forces on positions using the Lennard-Jones potential
        # Uses double nested loop which is slow O(N^2) time unsuitable for large systems
        Sij = np.zeros(DIM) # Box scaled units
        Rij = np.zeros(DIM) # Real space units
    
        #Set all variables to zero
        ene_pot = np.zeros(N)
        acc = acc*0
        virial=0.0
    
        # Loop over all pairs of particles
        for i in range(N-1):
            for j in range(i+1,N): #i+1 to N ensures we do not double count
                Sij = pos[i]-pos[j] # Distance in box scaled units
                for l in range(DIM): # Periodic interactions
                    if (np.abs(Sij[l])>0.5):
                        Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5  (scaled units) then subtract 0.5 to find periodic interaction distance.
    
                Rij   = L*Sij # Scale the box to the real units in this case reduced LJ units
                Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance
    
                if(Rsqij < Rcutoff**2):
                    # Calculate LJ potential inside cutoff
                    # We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
                    rm2      = 1.0/Rsqij # 1/r^2
                    rm6      = rm2**3
                    forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
                    phi      =4*(rm6**2-rm6)
    
                    ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy
    
                    ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy
    
                    virial     = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
                    acc[i]     = acc[i]+forcefact*Sij # Accumulate forces
                    acc[j]     = acc[j]-forcefact*Sij # (Fji=-Fij)
        #If you want to get get the best performance, sum directly in the loop intead of 
        #summing at the end np.sum(ene_pot)
        return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient
    
    
    @jit(nopython=True,error_model="numpy")
    def Calculate_Temperature(vel,L,DIM,N):
    
        ene_kin = 0.0
    
        for i in range(N):
            real_vel = L*vel[i]
            ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)
    
        ene_kin_aver = 1.0*ene_kin/N
        temperature = 2.0*ene_kin_aver/DIM
    
        return ene_kin_aver,temperature
    
    
    # Main MD loop
    @jit(nopython=True,error_model="numpy")
    def main(params):
        NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff=params
        # Vectors to store parameter values at each step
    
        ene_kin_aver = np.ones(NSteps)
        ene_pot_aver = np.ones(NSteps)
        temperature = np.ones(NSteps)
        virial = np.ones(NSteps)
        pressure = np.ones(NSteps)
    
    
        pos,vel,L = initialise_config(N,DIM,density)
        acc = (np.random.randn(N,DIM)-0.5)
        volume=L**3
    
        # Open file which we will save the outputs to
        # Unsupported operations have to be in an objectmode block
        # or simply write the outputs at the end in a pure Python function
        """
        if os.path.exists('energy2'):
            os.remove('energy2')
        f = open('traj.xyz', 'w')
        """
        for k in range(NSteps):
    
            # Refold positions according to periodic boundary conditions
            pos=wrap(pos,L)
    
            # r(t+dt) modify positions according to velocity and acceleration
            pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1
    
            # Calculate temperature
            ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
    
            # Rescale velocities and take half step
            chi = np.sqrt(temp/temperature[k])
            vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2
    
            # Compute forces a(t+dt),ene_pot,virial
            acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3
    
            # Complete the velocity step 
            vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4
    
            # Calculate temperature
            ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
    
            # Calculate pressure
            pressure[k]= density*temperature[k] + virial[k]/volume
    
            # Print output to file every DumpFreq number of steps
            """
            if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values
    
                f.write("%s\n" %(N)) # Write the number of particles to file
                # Write all of the quantities at this step to the file
                f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
                for n in range(N): # Write the positions to file
                    f.write("X"+" ")
                    for l in range(DIM):
                        f.write(str(pos[n][l]*L)+" ")
                    f.write("\n")
    
            #Simple prints without formating are supported
            if (k%5==0):
                #print("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
                #sys.stdout.write("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
                #sys.stdout.flush()
            """
        return ene_kin_aver, ene_pot_aver, temperature, pressure, pos
    
    #------------------------------------------------------    
    ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main(params)
    
    
    
    
    # Plot all of the quantities
    def plot():
        plt.figure(figsize=[7,12])
        plt.rc('xtick', labelsize=15) 
        plt.rc('ytick', labelsize=15)
        plt.subplot(4, 1, 1)
        plt.plot(ene_kin_aver,'k-')
        plt.ylabel(r"$E_{K}", fontsize=20)
        plt.subplot(4, 1, 2)
        plt.plot(ene_pot_aver,'k-')
        plt.ylabel(r"$E_{P}$", fontsize=20)
        plt.subplot(4, 1, 3)
        plt.plot(temperature,'k-')
        plt.ylabel(r"$T$", fontsize=20)
        plt.subplot(4, 1, 4)
        plt.plot(pressure,'k-')
        plt.ylabel(r"$P$", fontsize=20)
        plt.show()
    
    plot()