I have a function main_fun()
with 3 arguments. (And it returns the pandas dataframe
)
i use the multiprocessing on this within for loop:
def main_fun(ar1, ar2, ar3):
# Do something
df = ar1.copy()
return df
process_list = []
for i in glob.glob(FolderPath + "/*.csv"):
for j in ['File1', 'File2']:
if (j == 'File1'): j_tmp = 'File1_XXX'
else: j_tmp = j
if j_tmp in i.upper():
p = mp.Process(target = main_fun, args = (i, j, FolderPath + '/This_Is_Fixed.csv'))
p.start()
process_list.append(p)
for process in process_list:
process.join()
With above, I can get the job done.
However, if I want to collect each dataframe results and concat them after multiprocessing
, how should I do with this for loop?
Here is the answer with simple example:
def foo(name, name2):
d = {'col1': f'{name}', 'col2': f'{name2}'}
df = pd.DataFrame(data=[d])
return df
arg1 = []
arg2 = []
for i in ['bob1_XX', 'steve_XX', 'andy_XX']:
for j in ["BOB", "ANDY"]:
if (j == 'BOB'): j_tmp = 'BOB1'
else: j_tmp = j
if j_tmp in i.upper():
arg1.append(i)
arg2.append(j)
with mp.Pool(processes = (mp.cpu_count() - 1)) as pool:
data = pool.starmap(foo, zip(arg1, arg2))
pd.concat(data, axis=0)