Search code examples
cparallel-processingmpiopenmpi

An error occured in MPI_Recv while sending an array


#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
int main(int argc, char **argv)
{
    int N;
    scanf("%d", &N);
    double *a = (double *)malloc(N * sizeof(double));
    int i, rank, size, tag = 99, tag1 = 100;
    MPI_Status status;
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (rank == 0) 
    {
        for(int j=0;j<N;++j)
        {
            a[j] = j+0.1;
        }
        for (i = 1; i < size; i++)
        {
            MPI_Send(&N, 1, MPI_INT, i, tag1, MPI_COMM_WORLD);
            MPI_Send(a, N, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
        }
    }
    else 
    {
        MPI_Recv(&N, 1, MPI_INT, 0, tag1, MPI_COMM_WORLD, &status);
        MPI_Recv(a, N, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
        // for(int j=0;j<N*2;++j)
            // printf("%d %f\n", rank, a[j]);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    printf("Message from process %d : %f\n", rank, a[rank]);
    MPI_Finalize();
    return 0;
} 

I'm creating the array 'a' in 0th process and sending it to remaining proccesses. But I'm getting the following error upon doing this.

[nikhil:8599] *** An error occurred in MPI_Recv
[nikhil:8599] *** reported by process [4228579329,1]
[nikhil:8599] *** on communicator MPI_COMM_WORLD
[nikhil:8599] *** MPI_ERR_BUFFER: invalid buffer pointer
[nikhil:8599] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[nikhil:8599] ***    and potentially your MPI job)
[nikhil:08593] 2 more processes have sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[nikhil:08593] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages

Can anybody explain why I'm getting this error?

As you can see in the code there's a for loop containing a print statement which is commented. The weird thing is... upon uncommenting that loop. It's working fine.


Solution

  • Thoughts:

    1. MPI_Init should be the first thing in your program.
    2. Only one rank should scanf.
    3. N is not communicated across ranks, so you are allocating memory of undefined size.
    4. Define variables as close to their point of usage as possible. Putting int i at the top of your function is a disaster waiting to happen.
    5. The barrier at the end is unnecessary.
    6. All the ranks need to allocate their own memory.

    That gets us to this code:

    #include <stdio.h>
    #include <stdlib.h>
    #include "mpi.h"
    
    int main(int argc, char **argv){
        MPI_Init(&argc, &argv);
    
        const int tag = 99;
        const int tag1 = 100;
    
        int rank, size;
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
        double *a; //Pointer to the memory we will allocate
        int N;
    
        if (rank == 0){
            scanf("%d", &N);
    
            a = (double *)malloc(N * sizeof(double));
    
            for(int j=0;j<N;++j){
                a[j] = j+0.1;
            }
            for (int i = 1; i < size; i++){
                MPI_Send(&N, 1, MPI_INT, i, tag1, MPI_COMM_WORLD);
                MPI_Send(a, N, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
            }
        } else {
            MPI_Status status;
            MPI_Recv(&N, 1, MPI_INT, 0, tag1, MPI_COMM_WORLD, &status);
            //Have to allocate memory on all ranks
            a = (double *)malloc(N * sizeof(double)); 
            MPI_Recv(a, N, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
            // for(int j=0;j<N*2;++j)
                // printf("%d %f\n", rank, a[j]);
        }
    
        printf("Message from process %d : %f\n", rank, a[rank]);
    
        MPI_Finalize();
        return 0;
    } 
    

    Doing it better

    The broadcast command is your friend here:

    #include <stdio.h>
    #include <stdlib.h>
    #include "mpi.h"
    
    #define MPI_Error_Check(x) {const int err=x; if(x!=MPI_SUCCESS) { fprintf(stderr, "MPI ERROR %d at %d.", err, __LINE__);}}
    
    int main(int argc, char **argv){
        MPI_Init(&argc, &argv);
    
        int rank, size;
        MPI_Error_Check(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
        MPI_Error_Check(MPI_Comm_size(MPI_COMM_WORLD, &size));
    
        int N;
        if (rank==0){
            scanf("%d", &N);
        }
    
        MPI_Error_Check(MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD));
    
        double *a = (double *)malloc(N * sizeof(double));
    
        if(rank==0){
            for(int j=0;j<N;++j){
                a[j] = j+0.1;
            }
        }
    
        printf("Message from process %d : N=%d\n", rank, N);
    
        MPI_Error_Check(MPI_Bcast(a, N, MPI_DOUBLE, 0, MPI_COMM_WORLD));
    
        fprintf(stderr, "Message from process %d : %f\n", rank, a[rank]);
    
        free(a);
    
        MPI_Finalize();
        return 0;
    } 
    

    Doing It Even Better

    The fastest form of communication is no communication at all. In your case, once the value N is known each rank can recreate the data on its own:

    #include <stdio.h>
    #include <stdlib.h>
    #include "mpi.h"
    
    #define MPI_Error_Check(x) {const int err=x; if(x!=MPI_SUCCESS) { fprintf(stderr, "MPI ERROR %d at %d.", err, __LINE__);}}
    
    int main(int argc, char **argv){
        MPI_Init(&argc, &argv);
    
        int rank, size;
        MPI_Error_Check(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
        MPI_Error_Check(MPI_Comm_size(MPI_COMM_WORLD, &size));
    
        int N;
        if (rank==0){
            scanf("%d", &N);
        }
    
        MPI_Error_Check(MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD));
    
        double *a = (double *)malloc(N * sizeof(double));
    
        for(int j=0;j<N;++j){
            a[j] = j+0.1;
        }
    
        printf("Message from process %d : N=%d\n", rank, N);
    
        fprintf(stderr, "Message from process %d : %f\n", rank, a[rank]);
    
        free(a);
    
        MPI_Finalize();
        return 0;
    }