Search code examples

Unexpected distribution of columns when using using MPI_Scatter with MPI_Type_vector

I am trying to divide columns of a 2D matrix among N process with MPI. For template I used the example on MPI_Scatter - sending columns of 2D array.

My code:

    char** board_initialize(int n, int m)
        int k, l;

        char* bd = (char*)malloc(sizeof(char) * n * m);
        char** b = (char**)malloc(sizeof(char*) * n);
        for (k = 0; k < n; k++)
            b[k] = &bd[k * m];

        for (k = 0; k < n; k++)
            for (l = 0; l < m; l++)
                b[k][l] = rand() < 0.25 * RAND_MAX;

        return b;

    void board_print(char** b, int n, int m)
        int k, l;

    //  system("@cls||clear");
        for (k = 0; k < n; k++)
            for (l = 0; l < m; l++)
                printf("%d", b[k][l]);

    int main(int argc, char* argv[])
        int N = 10;
        int i, j;

        char * boardptr = NULL;                 // ptr to board
        char ** board;                          // board, 2D matrix, contignous memory allocation!

        int procs, myid;            
        int mycols;
        char ** myboard;                        // part of board that belongs to a process

        MPI_Init(&argc, &argv);                 // initiailzation
        MPI_Comm_rank(MPI_COMM_WORLD, &myid);   // process ID
        MPI_Comm_size(MPI_COMM_WORLD, &procs);  // number of processes

        // initialize global board
        if (myid == 0)
            board = board_initialize(N, N);
            boardptr = *board;
            board_print(board, N, N);
        // divide work
        mycols = N / procs;

        // initialize my structures
        myboard = board_initialize(N,mycols);

        MPI_Datatype column_not_resized, column_resized;
        MPI_Type_vector(N, 1, N, MPI_CHAR, &column_not_resized);
        MPI_Type_create_resized(column_not_resized, 0, 1*sizeof(char), &column_resized);

        // scatter initial matrix
        MPI_Scatter(boardptr, mycols, column_resized, *myboard, mycols, column_resized, 0, MPI_COMM_WORLD);

        board_print(myboard, N, mycols);
        MPI_Finalize();         // finalize MPI

        return 0;

The whole board looks like:


And if I use 2 process I expect that each proces would get half (first process columns 1-5 and second proces columns 6-10). But if I print myboard of both process I get some strange results:

proc0:       proc1:
0 0 0 0 0    1 0 0 1 0 
0 0 1 0 0    0 0 0 1 1 
0 1 0 0 0    0 0 0 0 0 
0 1 0 0 0    0 1 0 1 0 
0 0 0 0 1    0 1 1 0 0 
0 1 1 0 0    0 0 1 0 0 
0 1 0 1 0    0 0 0 1 0 
0 0 0 0 1    0 0 1 0 0 
1 0 0 0 0    0 0 1 0 0 
0 1 0 0 0    0 1 0 0 0 

It's probably some silly mistake, but I just can't seem to find it. Any help would be really appreciated. <3

Note: the output of proc1 is probably just some garbage, because I get different output on every run.


  • You forgot to differentiate between send and receive types, i.e., the parameters of MPI_Type_vector will depend on the appropriate type. You need to do something as follows:

    MPI_Datatype acol, acoltype, bcol, bcoltype;
    if (myid == 0) {
        MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
        MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
    MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
    MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
    MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);

    From the sender point of view you want to create N blocks (i.e., N rows), with a size of 1 and a stride of N (i.e., N columns). Hence:

    MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);

    From the receiver point of view you want to create N blocks (i.e., N rows), with a size of 1 and a stride of mycols (i.e., mycols columns). Hence:

    MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);

    Side note you do need the MPI_Barrier after the MPI_Scatter since the latter is a blocking call already.

    The final code would look like the following:

    #include <stdio.h>
    #include <stdlib.h>
    #include <mpi.h>
        char** board_initialize(int n, int m)
            int k, l;
            char* bd = (char*)malloc(sizeof(char) * n * m);
            char** b = (char**)malloc(sizeof(char*) * n);
            for (k = 0; k < n; k++)
                b[k] = &bd[k * m];
            for (k = 0; k < n; k++)
                for (l = 0; l < m; l++)
                    b[k][l] = rand() < 0.25 * RAND_MAX;
            return b;
        void board_print(char** b, int n, int m)
            int k, l;
        //  system("@cls||clear");
            for (k = 0; k < n; k++)
                for (l = 0; l < m; l++)
                    printf("%d", b[k][l]);
       int main(int argc, char* argv[])
            int N = 10;
            int i, j;
            char * boardptr = NULL;                 // ptr to board
            char ** board;                          // board, 2D matrix, contignous memory allocation!
            int procs, myid;            
            int mycols;
            char ** myboard;                        // part of board that belongs to a process
            MPI_Init(&argc, &argv);                 // initiailzation
            MPI_Comm_rank(MPI_COMM_WORLD, &myid);   // process ID
            MPI_Comm_size(MPI_COMM_WORLD, &procs);  // number of processes
            // initialize global board
            if (myid == 0)
                board = board_initialize(N, N);
                boardptr = *board;
                board_print(board, N, N);
            // divide work
            mycols = N / procs;
            // initialize my structures
            myboard = board_initialize(N,mycols);
        MPI_Datatype acol, acoltype, bcol, bcoltype;
        if (myid == 0) {
                MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
        MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
        MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
            board_print(myboard, N, mycols);
            MPI_Finalize();         // finalize MPI
            return 0;

    Alternatively, and much easier in IMO is to divide by rows instead of columns the code would look like the following:

    int myrows = N / procs;
    myboard = board_initialize(myrows, N);
    MPI_Scatter (boardptr, myrows, MPI_CHAR, *myboard, myrows, MPI_CHAR, 0, MPI_COMM_WORLD);
    board_print(myboard, myrows, N);