Search code examples
cperformanceparallel-processingmpihpc

Implementing MPI_Reduce with MPI_Send and MPI_Recv leads to wrong results


I am working on a program that uses MPI_Send() and MPI_Recv() to replace MPI_Reduce().

I get everything to run except the final bits of the code where it gives the PI approximate, error and runtime. I also don't get the correct sum value after receiving.

I believe there is something going wrong on the MPI_Recv() end but I could be wrong. I am only using 2 processors while running this. The program works fine without PI initialized to a value when using MPI_Reduce.

#include "mpi.h"
#include <stdio.h>
#include <math.h>
 
int main( int argc, char *argv[])
{
    int n, i;
    double PI25DT = 3.141592653589793238462643;
    double pi, h, sum, x;
 
    int size, rank;
    double startTime, endTime;
 
    /* Initialize MPI and get number of processes and my number or rank*/
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&size);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
 
    /* Processor zero sets the number of intervals and starts its clock*/
    if (rank==0)
    {
       n=600000000;
       startTime=MPI_Wtime();
       for (int i = 0; i < size; i++) {
           if (i != rank) {
               MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
           }
       }
    } 
    /* Broadcast number of intervals to all processes */
    else 
    {
        MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
 
    /* Calculate the width of intervals */
    h   = 1.0 / (double) n;
 
    /* Initialize sum */
    sum = 0.0;
    /* Step over each inteval I own */
    for (i = rank+1; i <= n; i += size)
    {
        /* Calculate midpoint of interval */
        x = h * ((double)i - 0.5);
        /* Add rectangle's area = height*width = f(x)*h */
        sum += (4.0/(1.0+x*x))*h;
    }
    /* Get sum total on processor zero */
    //MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
    MPI_Send(&pi, 1, MPI_SUM, 0, 0, MPI_COMM_WORLD);
    
    if (rank == 0) 
    {
        double total_sum = 0;
        for (int i = 0; i < size; i++) 
        {
            MPI_Recv(&sum, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            MPI_Recv(&pi, 1, MPI_SUM, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            total_sum += sum;
            printf("Total Sum is %lf\n", total_sum);
        }
    }
    
    /* Print approximate value of pi and runtime*/
    if (rank==0)
    {
       printf("pi is approximately %.16f, Error is %e\n",
                       pi, fabs(pi - PI25DT));
       endTime=MPI_Wtime();
       printf("runtime is=%.16f",endTime-startTime);
    }
    MPI_Finalize();
    return 0;
}

Solution

  • This

    MPI_Send(&pi, 1, MPI_SUM, 0, 0, MPI_COMM_WORLD);
                     ^^^^^^^
    

    and

     MPI_Recv(&pi, 1, MPI_SUM, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                      ^^^^^^^
    

    are wrong the MPI_Send and MPI_Recv expect as third parameter MPI_Datatype not a MPI_OP (i.e., MPI_SUM).

    But looking at your code what you actually want to do is to replace those calls by:

    double pi = sum;
    if (myid != 0) {
            MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
    }
    else {
        for (int i = 1; i < numprocs; i++) {
            MPI_Recv(&value, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            pi += value;
            }
    }
    

    To replace the behavior of the MPI_Reduce.

    A running example:

    #include "mpi.h"
    #include <stdio.h>
    #include <math.h>
     
    int main( int argc, char *argv[])
    {
        int n, i;
        double PI25DT = 3.141592653589793238462643;
        double h, sum, x;
     
        int numprocs, myid;
        double startTime, endTime;
     
        /* Initialize MPI and get number of processes and my number or rank*/
        MPI_Init(&argc,&argv);
        MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
        MPI_Comm_rank(MPI_COMM_WORLD,&myid);
     
        /* Processor zero sets the number of intervals and starts its clock*/
        if (myid==0) {
           n=600000000;
           startTime=MPI_Wtime();
           for (int i = 0; i < numprocs; i++) {
               if (i != myid) {
                   MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
               }
           }
        } 
        else {
            MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }
     
        /* Calculate the width of intervals */
        h   = 1.0 / (double) n;
     
        /* Initialize sum */
        sum = 0.0;
        /* Step over each inteval I own */
        for (i = myid+1; i <= n; i += numprocs) {
            /* Calculate midpoint of interval */
            x = h * ((double)i - 0.5);
            /* Add rectangle's area = height*width = f(x)*h */
            sum += (4.0/(1.0+x*x))*h;
        }
        /* Get sum total on processor zero */
        //MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
        double value = 0;
        double pi = sum;
        if (myid != 0) {
                MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
        }
        else {
            for (int i = 1; i < numprocs; i++) {
                MPI_Recv(&value, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                pi += value;
                }
        }
        
        /* Print approximate value of pi and runtime*/
        if (myid==0) {
           printf("pi is approximately %.16f, Error is %e\n",
                           pi, fabs(pi - PI25DT));
           endTime=MPI_Wtime();
           printf("runtime is=%.16f",endTime-startTime);
        }
        MPI_Finalize();
        return 0;
    }
    

    the output (2 processes):

    pi is approximately 3.1415926535898993, Error is 1.061373e-13
    runtime is=1.3594319820404053