Search code examples
pythonmultithreadingperformancegilhamming-distance

Why does not threading make it faster to iterate on a numpy array?


My question is about multi-threading in python. The problem I am working on is finding 80 percent similar arrays(lengths are 64) to a given array with the same length from 10 Million arrays. The problem is that although my code executes in 12.148 seconds when I iterate linearly inside a while loop, it doesn't execute in at least 28-30 seconds when I use multi threading. Both implementations are below. Any advice appreciated and please enlighten me, why does it make it slower to multi thread in this case? First code:

import timeit
import numpy as np

ph = np.load('newDataPhoto.npy')
myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
,  0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
,  1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])



start = timeit.default_timer()

kk=0
i=0
while i< 10000000:
    u = np.count_nonzero(ph[i] != myPhoto1)
    if  u <= 14:
        kk+=1
    i+=1

print(kk)

stop = timeit.default_timer()
print stop-start

Second one(multi-threaded):

from threading import Thread
import numpy as np
import timeit

start = timeit.default_timer()
ph = np.load('newDataPhoto.npy')
pc = np.load('newDataPopCount.npy')

myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
,  0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
,  1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])

def hamming_dist(left, right, name):
    global kk
    start = timeit.default_timer()
    while left<=right:
        if(np.count_nonzero(ph[left] != myPhoto1)<=14):
            kk+=1
        left+=1

    stop=timeit.default_timer()
    print name
    print stop-start

def Main():
    global kk
    kk=0
    t1 = Thread(target=hamming_dist, args=(0,2500000, 't1'))
    t2 = Thread(target=hamming_dist, args=(2500001, 5000000, 't2'))
    t3 = Thread(target=hamming_dist, args=(5000001, 7500000,'t3'))
    t4 = Thread(target=hamming_dist, args=(7500001, 9999999, 't4'))

    t1.start()
    t2.start()
    t3.start()
    t4.start()

    print ('main done')

if __name__ == "__main__":
    Main()

And their outputs in order:

38
12.148679018 
#####
main done
t4
26.4695241451
t2
27.4959039688
t3
27.5113890171
t1
27.5896160603

Solution

  • I solved the problem. I found out that threading is blocked by GIL which never allows to use more that the current processor. However using multiprocessing module worked. Here is the modification I made:

    import numpy as np
    import multiprocessing
    import timeit
    
    start = timeit.default_timer()
    ph = np.load('newDataPhoto.npy')
    pc = np.load('newDataPopCount.npy')
    
    myPhoto1 = np.array([ 1. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 0. , 1. , 1. , 1. , 0. , 0.
    ,  0. , 1. , 1. , 0. , 1. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 1. , 1. , 0. , 0. , 1. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 1. , 0. , 0. , 1.
    ,  1. , 0. , 1. , 0. , 1. , 0. , 0. , 1. , 1. , 0. ])
    
    def hamming_dist(left, right, name):
        global kk
        start = timeit.default_timer()
        while left<=right:
            if(np.count_nonzero(ph[left] != myPhoto1)<=14):
                kk+=1
            left+=1
    
        stop=timeit.default_timer()
        print name
        print stop-start
    
    def Main():
        global kk
        kk=0
        t1 = multiprocessing.Process(target=hamming_dist, args=(0,2500000, 't1'))
        t2 = multiprocessing.Process(target=hamming_dist, args=(2500001, 5000000, 't2'))
        t3 = multiprocessing.Process(target=hamming_dist, args=(5000001, 7500000,'t3'))
        t4 = multiprocessing.Process(target=hamming_dist, args=(7500001, 9999999, 't4'))
    
        t1.start()
        t2.start()
        t3.start()
        t4.start()
    
        print ('main done')
    
    if __name__ == "__main__":
        Main()