Search code examples
c++multithreadingmpiopenmpeigen

How to launch multi-threads in MPI + openmp?


I want to launch an OpenMP multi-thread region in one process within my MPI application code. For example:

#include <iostream>
#include <omp.h>
#include <mpi.h>
#include <Eigen/Dense>
using std::cin;
using std::cout;
using std::endl;

using namespace Eigen;

int main(int argc, char ** argv)
{
    int rank, num_process;
    MatrixXd A = MatrixXd::Ones(8, 4);
    MatrixXd B = MatrixXd::Zero(8, 4);
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &num_process);
    MPI_Status status;
    if (rank == 0)
    {
        int i, j, bnum = 2, brow = 4, thid;
        #pragma omp parallel shared(A, B) private(i, j, brow, bnum, thid) num_threads(2)
        for (i = 0; i < brow; i ++)
        {
            for (j = 0; j < 4; j ++)
            {
                thid = omp_get_thread_num(); 
                //cout << "thid " << thid << endl;
                B(thid * brow+i,j) = A(thid*brow+i, j);
            }
        }
        cout << "IN rank 0" << endl;
        cout << B << endl;
        cout << "IN rank 0" << endl;
        MPI_Send(B.data(), 32, MPI_DOUBLE, 1, 1, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Recv(B.data(), 32, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &status);
        cout << "IN rank 1" << endl;
        cout << B << endl;
        cout << "IN rank 1" << endl;
    }
    MPI_Finalize();
    return 0;
}

In my example code, I want to launch 2 threads to copy data from matrix A to matrix B, and my machine has 4 cores. But when run the program, the matrix B has only got half the data.

$ mpirun -n 2 ./shareMem
IN rank 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 0
IN rank 1
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 1

$ mpirun -n 4 ./shareMem # it just hang on and doesn't exit
IN rank 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 0
IN rank 1
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
IN rank 1

And the output I expected is

$ mpirun -n 2 ./shareMem # it just hang on and doesn't exit
IN rank 0
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
IN rank 0
IN rank 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
IN rank 1

How can I fix it and make 2 threads run in my code? Thank you!


Solution

  • Change

    #pragma omp parallel shared(A, B) private(i, j, brow, bnum, thid) num_threads(2)
    

    to

    #pragma omp parallel shared(A, B) private(i, j, thid) num_threads(2)
    

    brow, bnum are shared variables. By adding names bnum and brow to private clause you are making new auto variables with such names for each thread and by default they are undefined.