I have written the function scores_single
, which calculates all the permuatations of a board and scores it.
I tried to follow this SO answer to parallelise the function using OpenMP, and came up with scores_parallel
. The problem is that the parallel version iterates over the same permutations in multiple loops.
The code follows:
#include <iostream>
#include <omp.h>
#include <vector>
// Single threaded version
int scores_single(const int tokens, const int board_length) {
std::vector<int> scores;
// Generate boards
std::vector<char> board(board_length);
std::fill(board.begin(), board.end() - tokens, '-');
std::fill(board.end() - tokens, board.end(), 'T');
do {
// printf("Board: %s\n", std::string(board.data(), board.size()).c_str());
// Score board
auto value = 3;
scores.push_back(value);
} while (std::next_permutation(board.begin(), board.end()));
return scores.size();
}
// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
std::vector<std::vector<int>*> score_lists(board_length);
// Generate boards
std::vector<char> board(board_length);
std::fill(board.begin(), board.end() - tokens, '-');
std::fill(board.end() - tokens, board.end(), 'T');
printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
{
#pragma omp single nowait
for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
{
auto *scores = new std::vector<int>;
// Make a copy of the board for this thread
auto board_thread = board;
// Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
do {
printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
// Score board
auto value = 3;
scores->push_back(value);
} while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
score_lists[i] = scores;
printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
}
}
}
std::vector<int> scores;
int k = 0;
for (auto & list : score_lists) {
for (int j : *list) {
scores.push_back(j);
}
}
printf("Finished, size: %lu\n", scores.size());
return scores.size();
}
int main() {
int p = scores_parallel(2, 4);
int s = scores_single(2, 4);
std::cout << p << " != " << s << std::endl;
return 0;
}
Output:
Starting
[01] board: --TT
[03] board: T--T
[03] board: T-T-
[02] board: T--T
[03] board: TT--
[01] board: -T-T
[02] board: T-T-
[00] board: --TT
[03] Finished on thread 10 with 3 values
[00] board: -T-T
[00] board: -TT-
[02] board: TT--
[00] Finished on thread 11 with 3 values
[01] board: -TT-
[02] Finished on thread 12 with 3 values
[01] Finished on thread 4 with 3 values
Finished, size: 12
12 != 6
I think I understand the SO answer I am copying, but I am not sure what I have done wrong.
6 is the expected answer, as 4C2 = 6.
Figured it out, I was calculating the same permutation multiple times. I fixed it with the if statement below.
#include <iostream>
#include <omp.h>
#include <vector>
// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
std::vector<std::vector<int>*> score_lists(board_length);
// Generate boards
std::vector<char> board(board_length);
std::fill(board.begin(), board.end() - tokens, '-');
std::fill(board.end() - tokens, board.end(), 'T');
printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
{
#pragma omp single nowait
for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
{
auto *scores = new std::vector<int>;
// No need to process this branch if it will be identical to a prior branch.
if (board[i] != board[i + 1]) {
// Make a copy of the board for this thread
auto board_thread = board;
printf("[%02d] evaluating: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
// Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
do {
printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
// Score board
auto value = 3;
scores->push_back(value);
} while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
}
score_lists[i] = scores;
printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
}
}
}
std::vector<int> scores;
int k = 0;
for (auto & list : score_lists) {
for (int j : *list) {
scores.push_back(j);
}
}
printf("Finished, size: %lu\n", scores.size());
return scores.size();
}
int main() {
int p = scores_parallel(2, 4);
int s = scores_single(2, 4);
std::cout << p << " == " << s << std::endl;
return p != s;
}