I currently have this code running
def single_iteration(iter:int):
some_parameters = 100
data1, data2, data3, data4 = do_something()
result = []
for i in range(100):
data = {'Col1':data1, 'Col2':data1, 'Col3':data3, 'Col4':data4}
result.append(data)
df = pd.DataFrame(result)
return df
if __name__ == "__main__":
run_stop = 100
number_of_cores = int(os.environ['SLURM_CPUS_PER_TASK'])
with multiprocessing.Pool(number_of_cores) as pool:
results = pool.map(single_iteration, range(run_stop))
df = pd.concat(results, ignore_index=True)
df.to_csv(path, file_name)
However, now I want two different CSVs , like with df1 = pd.DaFrame[{'Col1':data1, 'Col2':data2}]
and df2 = pd.DaFrame[{'Col3':data3, 'Col4':data4}]
and return
them in each run and then concat them separately and save them.
import os
import multiprocessing
import pandas as pd
def single_iteration(iter: int):
some_parameters = 100
data1 = [1] * 100
data2 = [2] * 100
data3 = [3] * 100
data4 = [4] * 100
result1 = []
result2 = []
for i in range(100):
data1_dict = {'Col1': data1[i], 'Col2': data2[i]}
data2_dict = {'Col3': data3[i], 'Col4': data4[i]}
result1.append(data1_dict)
result2.append(data2_dict)
df1 = pd.DataFrame(result1)
df2 = pd.DataFrame(result2)
return df1, df2
if __name__ == "__main__":
run_stop = 100
number_of_cores = int(os.environ.get('SLURM_CPUS_PER_TASK', 4))
with multiprocessing.Pool(number_of_cores) as pool:
results = pool.map(single_iteration, range(run_stop))
# Separate the results into two lists of DataFrames
df1_list = [res[0] for res in results]
df2_list = [res[1] for res in results]
# Concatenate all DataFrames in each list
df1 = pd.concat(df1_list, ignore_index=True)
df2 = pd.concat(df2_list, ignore_index=True)
df1.to_csv('df1_file_name.csv', index=False)
df2.to_csv('df2_file_name.csv', index=False)