I'm doing multiples experiments, but it takes a lot of time doing each one of them, so I tried to use the package multiprocessing
, but I have an error.
The function is defined like this:
def run_exp_anxia_sim(num_exp,num1,num2,
eps, isfuzzy, remove, using_train, iscompress):
if using_train == False:
results,x , y,dic1,dic2 = classificator_pos_neg(all_pos, all_neg, test_anxia,
num_feat1=num1,num_feat2=num2,
tau=eps,fuzzy=isfuzzy,remove_stop=remove,
train_data =using_train,compress = iscompress)
result_name = 'result_anxia_key' + str(num_exp) + '.txt'
path_name = '/content/sample_data'+ '/' + result_name
with open(path_name, "w") as f:
f.write("Experimento de anorexia número: " + str(num_exp) + '\n')
f.write("Confusion matrix: \n")
f.write(str(confusion_matrix(test_labels_anxia, results)) + '\n')
f.write('Metrics classification \n')
f.write(str(metrics.classification_report(test_labels_anxia, results)) + '\n')
f.close()
return f1_score(test_labels_anxia, results)
if using_train == True:
seed_val = 42
np.random.seed(seed_val)
parameters = {'C': [.05, .12, .25, .5, 1, 2, 4]}
results, x, y,z,dic1,dic2 = classificator_pos_neg(all_pos, all_neg, test_anxia,num1,num2,
tau=eps,fuzzy = isfuzzy,remove_stop=remove, train_data =using_train,compress = iscompress)
svr = svm.LinearSVC(class_weight='balanced', dual=False)
grid_anorexia = GridSearchCV(estimator=svr, param_grid=parameters, n_jobs=8, scoring='f1_macro', cv=5)
grid_anorexia.fit(z, tr_labels)
y_pred = grid_anorexia.predict(results)
a1 = grid_anorexia.best_params_
p, r, f, _ = precision_recall_fscore_support(test_labels_anxia, y_pred, average='macro', pos_label=1)
result_name = 'result_anxia_key' + str(num_exp) + '.txt'
path_name = '/content/sample_data' + '/' + result_name
with open(path_name, "w") as f:
f.write("Experimento de anorexia número: " + str(num_exp) + '\n')
f.write("Confusion matrix: \n")
f.write(str(confusion_matrix(test_labels_anxia, y_pred)) + '\n')
f.write('Metrics classification \n')
f.write(str(metrics.classification_report(test_labels_anxia, y_pred)) + '\n')
f.write('Best parameter:\n')
f.write(str(a1))
f.write('\n')
f.close()
return f1_score(test_labels_anxia, y_pred), a1
and the arguments are:
arg1 = [1,2]
arg6 = [1000,1500]
arg7 = [1000,1500]
arg8 = [0.99]*2
arg10 = [True]*2
arg11 = [False]*2
arg12 = [False]*2
arg13 = [False]*2
but when I run
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
x,y,z,w,t = pool.apply(run_exp_anxia_sim, zip(arg1,arg6,arg7,arg8,arg10,arg11,arg12,arg13))
print(x,y,z,w,t)
it says
TypeError: run_exp_anxia_sim() missing 6 required positional arguments: 'num2', 'eps', 'isfuzzy', 'remove', 'using_train', and 'iscompress'
and when I try the same with starmap
I get an error about unpacked values.
What can I do?
You are mixing two different things.
Firstly, pool.apply
is limited in it's usefulness because it blocks until the result is ready, which is probably not something you want when using multiprocessing.
Secondly, when using pool.apply
, the arguments you supply are directly unpacked to the target function. So because zipping creates an iterable of tuples, you will be passing that entire iterable as the first argument, which is why the error is raised since the function only got one argument.
Here is how to use pool.apply
in this context:
from multiprocessing import Pool
def run_exp_anxia_sim(num_exp,num1,num2,
eps, isfuzzy, remove, using_train, iscompress):
print(locals())
if __name__ == "__main__":
arg1 = [1, 2]
arg6 = [1000, 1500]
arg7 = [1000, 1500]
arg8 = [0.99] * 2
arg10 = [True] * 2
arg11 = [False] * 2
arg12 = [False] * 2
arg13 = [False] * 2
all_args = zip(arg1, arg6, arg7, arg8, arg10, arg11, arg12, arg13)
with Pool(4) as pool:
results = [pool.apply(run_exp_anxia_sim, args=args) for args in all_args]
print(results)
Here is how to instead use pool.starmap
(which makes more sense here):
all_args = zip(arg1, arg6, arg7, arg8, arg10, arg11, arg12, arg13)
with Pool(4) as pool:
results = pool.starmap(run_exp_anxia_sim, all_args)
print(results)