I've multiple files in a directory. I want to extract data from every file and to perform this task, I want to involve python-multiprocessing. In the following piece of code if 'filelist' contain single element then 'my_result' gives the value of 'np_array_list' but when 'filelist' contains multiple element then 'my_result' does not give any result. What is the problem with the 'filelist' or 'filename_arg'? Can anyone suggest the solution?
import numpy as np
import multiprocessing as mp
path = 'C:\\Users\\sys\\PycharmProjects\\MPtest\\*.gwf'
filenames = [os.path.basename(x) for x in glob.glob(path)]
filelist= sorted(filenames, key=lambda x: float(re.findall("(\d+)", x)[0]))
channelslist = ["VALUE_" + str(int(n)) for n in np.linspace(201, 234, 34)]
rows = 500000
cols = len(channelslist) # No. of channels involved in the measurement
sensdataarray = np.zeros((rows, cols))
np_array_list = []
def myfunc(filename_arg):
for inumber, iname in enumerate(channelslist):
sensdataarray[:, inumber] = framel.frgetvect(filename_arg, iname, verbose=False)[0]
np_array_list.append(sensdataarray)
return np_array_list
# MP
print("no of CPUs:", mp.cpu_count())
if __name__ == '__main__':
pool = mp.Pool()
my_result = pool.map(myfunc, filelist)
# when 'filelist' contain single element then 'my_result' gives the value of 'np_array_list'
#but when 'filelist' contains multiple element then 'my_result' does not give any result!!
pool.close()
pool.join()
print('My result is :', my_result)
Initialize sensdataarray
in your function and just return it.
...
import numpy as np
import multiprocessing as mp
path = 'C:\\Users\\sys\\PycharmProjects\\MPtest\\*.gwf'
filenames = [os.path.basename(x) for x in glob.glob(path)]
filelist= sorted(filenames, key=lambda x: float(re.findall("(\d+)", x)[0]))
channelslist = ["VALUE_" + str(int(n)) for n in np.linspace(201, 234, 34)]
rows = 500000
cols = len(channelslist) # No. of channels involved in the measurement
def myfunc(filename_arg):
sensdataarray = np.zeros((rows, cols))
for inumber, iname in enumerate(channelslist):
sensdataarray[:, inumber] = framel.frgetvect(filename_arg, iname, verbose=False)[0]
return sensdataarray
# MP
print("no of CPUs:", mp.cpu_count())
if __name__ == '__main__':
pool = mp.Pool()
my_result = pool.map(myfunc, filelist)
# when 'filelist' contain single element then 'my_result' gives the value of 'np_array_list'
#but when 'filelist' contains multiple element then 'my_result' does not give any result!!
pool.close()
pool.join()
print('My result is :', my_result)
Besides, if your script has an error subprocesses may not be joined and may be left in the background. Either use a try/finally clause to guarantee that the pool is joined or you can use the parmap module:
import glob
import os
import re
import framel
import parmap
import numpy as np
def myfunc(filename_arg, rows, channelslist):
cols = len(channelslist)
sensdataarray = np.zeros((rows, cols))
for inumber, iname in enumerate(channelslist):
sensdataarray[:, inumber] = framel.frgetvect(filename_arg, iname, verbose=False)[0]
return sensdataarray
if __name__ == '__main__':
path = 'C:\\Users\\sys\\PycharmProjects\\MPtest\\*.gwf'
filenames = [os.path.basename(x) for x in glob.glob(path)]
filelist= sorted(filenames, key=lambda x: float(re.findall("(\d+)", x)[0]))
channelslist = ["VALUE_" + str(int(n)) for n in np.linspace(201, 234, 34)]
my_result = parmap.map(myfunc, filelist, rows=500000, channelslist=channelslist)
print('My result is :', my_result)