Search code examples
c++parallel-processingopenmppermutationc++20

OpenMP parallelise std::next_permutation


I have written the function scores_single, which calculates all the permuatations of a board and scores it.

I tried to follow this SO answer to parallelise the function using OpenMP, and came up with scores_parallel. The problem is that the parallel version iterates over the same permutations in multiple loops.

The code follows:

#include <iostream>
#include <omp.h>
#include <vector>

// Single threaded version
int scores_single(const int tokens, const int board_length) {
    std::vector<int> scores;

    // Generate boards
    std::vector<char> board(board_length);
    std::fill(board.begin(), board.end() - tokens, '-');
    std::fill(board.end() - tokens, board.end(), 'T');

    do {
//        printf("Board: %s\n", std::string(board.data(), board.size()).c_str());

        // Score board
        auto value = 3;

        scores.push_back(value);
    } while (std::next_permutation(board.begin(), board.end()));
    return scores.size();
}

// OpenMP parallel version
int scores_parallel(const int tokens, const int board_length) {
    std::vector<std::vector<int>*> score_lists(board_length);

    // Generate boards
    std::vector<char> board(board_length);
    std::fill(board.begin(), board.end() - tokens, '-');
    std::fill(board.end() - tokens, board.end(), 'T');

    printf("Starting\n");
#pragma omp parallel default(none) shared(board, board_length, score_lists)
    {
#pragma omp single nowait
        for (int i = 0; i < board_length; ++i) {
#pragma omp task untied
            {
                auto *scores = new std::vector<int>;

                // Make a copy of the board for this thread
                auto board_thread = board;
                // Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
                std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
                do {
                    printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());

                    // Score board
                    auto value = 3;

                    scores->push_back(value);
                } while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
                score_lists[i] = scores;
                printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
            }
        }
    }

    std::vector<int> scores;

    int k = 0;
    for (auto & list : score_lists) {
        for (int j : *list) {
            scores.push_back(j);
        }
    }
    printf("Finished, size: %lu\n", scores.size());

    return scores.size();
}

int main() {
    int p = scores_parallel(2, 4);
    int s = scores_single(2, 4);
    std::cout << p << " != " << s << std::endl;
    return 0;
}

Output:

Starting
[01] board: --TT
[03] board: T--T
[03] board: T-T-
[02] board: T--T
[03] board: TT--
[01] board: -T-T
[02] board: T-T-
[00] board: --TT
[03] Finished on thread 10 with 3 values
[00] board: -T-T
[00] board: -TT-
[02] board: TT--
[00] Finished on thread 11 with 3 values
[01] board: -TT-
[02] Finished on thread 12 with 3 values
[01] Finished on thread 4 with 3 values
Finished, size: 12
12 != 6

I think I understand the SO answer I am copying, but I am not sure what I have done wrong.

6 is the expected answer, as 4C2 = 6.


Solution

  • Figured it out, I was calculating the same permutation multiple times. I fixed it with the if statement below.

    #include <iostream>
    #include <omp.h>
    #include <vector>
    
    // OpenMP parallel version
    int scores_parallel(const int tokens, const int board_length) {
        std::vector<std::vector<int>*> score_lists(board_length);
    
        // Generate boards
        std::vector<char> board(board_length);
        std::fill(board.begin(), board.end() - tokens, '-');
        std::fill(board.end() - tokens, board.end(), 'T');
    
        printf("Starting\n");
    #pragma omp parallel default(none) shared(board, board_length, score_lists)
        {
    #pragma omp single nowait
            for (int i = 0; i < board_length; ++i) {
    #pragma omp task untied
                {
                    auto *scores = new std::vector<int>;
    
                    // No need to process this branch if it will be identical to a prior branch.
                    if (board[i] != board[i + 1]) {
                        // Make a copy of the board for this thread
                        auto board_thread = board;
                        printf("[%02d] evaluating: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
                        // Subset for this thread, see: https://stackoverflow.com/questions/30865231/parallel-code-for-next-permutation
                        std::rotate(board_thread.begin(), board_thread.begin() + i, board_thread.begin() + i + 1);
                        do {
                            printf("[%02d] board: %s\n", i, std::string(board_thread.data(), board_thread.size()).c_str());
    
                            // Score board
                            auto value = 3;
    
                            scores->push_back(value);
                        } while (std::next_permutation(board_thread.begin() + 1, board_thread.end()));
                    }
                    score_lists[i] = scores;
                    printf("[%02d] Finished on thread %d with %lu values\n", i, omp_get_thread_num(), scores->size());
                }
            }
        }
    
        std::vector<int> scores;
    
        int k = 0;
        for (auto & list : score_lists) {
            for (int j : *list) {
                scores.push_back(j);
            }
        }
        printf("Finished, size: %lu\n", scores.size());
    
        return scores.size();
    }
    
    int main() {
        int p = scores_parallel(2, 4);
        int s = scores_single(2, 4);
        std::cout << p << " == " << s << std::endl;
        return p != s;
    }