Search code examples
cmpihpc

MPI_Wtime() shows that my program is taking 5 seconds but its such a short input data


The code i wrote in C for matrix multiplication in MPI shows that my code is taking 5 seconds approx in global time but when i run the same thing in python mpi4py it takes very less time like few milliseconds, what is the problem with mpi in C, because it doesnt feel like 5 seconds when I run it in Linux shell,the output comes really fast but still shows the globaltime as 5 seconds.the code below is in C

#define N 4
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"


void print_results(char *prompt, int a[N][N]);

int main(int argc, char *argv[])
{
    int i, j, k, rank, size, tag = 99, sum = 0;
    int a[N][N];
    int b[N][N];
    int c[N][N];
    int aa[N],cc[N];
    int row,col;
    int dest = 0;
    int source;
    double time1, time2, duration, global;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    time1 = MPI_Wtime();
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if(rank == 0){
        

        printf("enter the number of row =");    
        scanf("%d",&row);    
        printf("enter the number of column =");    
        scanf("%d",&col);    

        srand(time(NULL));
        for(i=0;i<row;i++) {
            for(j=0;j<col;j++){
                a[i][j] = rand() % 10;
            }
        }

        srand(time(NULL));

        for(i=0;i<row;i++){
            for(j=0;j<col;j++){
                b[i][j] = rand() % 10;
            }
        }
    }

    MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);

    MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);

    MPI_Barrier(MPI_COMM_WORLD);

          for (i = 0; i < N; i++)
            {
                    for (j = 0; j < N; j++)
                    {
                            sum = sum + aa[j] * b[j][i];               
                    }
                    cc[i] = sum;
                    sum = 0;
            }

    MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Barrier(MPI_COMM_WORLD);

    time2 = MPI_Wtime();
    duration = time2 - time1;
    MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    if(rank == 0) {
        printf("Global runtime is %f\n",global);
    }
    printf("Runtime at %d is %f \n", rank,duration);       
    MPI_Finalize();
    if (rank == 0)                      
      print_results("C = ", c);
}

void print_results(char *prompt, int a[N][N])
{
    int i, j;

    printf ("\n\n%s\n", prompt);
    for (i = 0; i < N; i++) {
            for (j = 0; j < N; j++) {
                    printf(" %d", a[i][j]);
            }
            printf ("\n");
    }
    printf ("\n\n");
}

The output it gives is

4
4
enter the number of row =enter the number of column =Global runtime is 5.975327
Runtime at 0 is 1.493793 
Runtime at 1 is 1.493793 
Runtime at 2 is 1.493877 
Runtime at 3 is 1.493865 


C = 
 78 83 142 116
 128 138 236 194
 39 49 112 71
 96 109 204 156

Please let me know if there is some problem with the code!!


Solution

  • As discussed in the comment I have changed the position of time1 = MPI_Wtime(); and included a Barrier.

    Take a look at the modified code :

    #define N 4
    #include <stdio.h>
    #include <math.h>
    #include <stdlib.h>
    #include <time.h>
    #include "mpi.h"
    
    
    void print_results(char *prompt, int a[N][N]);
    
    int main(int argc, char *argv[])
    {
        int i, j, k, rank, size, tag = 99, sum = 0;
        int a[N][N];
        int b[N][N];
        int c[N][N];
        int aa[N],cc[N];
        int row,col;
        int dest = 0;
        int source;
        double time1, time2, duration, global;
        MPI_Status status;
    
        MPI_Init(&argc, &argv);
        
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
        if(rank == 0){
            
    
            printf("enter the number of row =");    
            scanf("%d",&row);    
            printf("enter the number of column =");    
            scanf("%d",&col);    
    
            srand(time(NULL));
            for(i=0;i<row;i++) {
                for(j=0;j<col;j++){
                    a[i][j] = rand() % 10;
                }
            }
    
            srand(time(NULL));
    
            for(i=0;i<row;i++){
                for(j=0;j<col;j++){
                    b[i][j] = rand() % 10;
                }
            }
        }
        MPI_Barrier(MPI_COMM_WORLD);
        time1 = MPI_Wtime();
        MPI_Scatter(a, N*N/size, MPI_INT, aa, N*N/size, MPI_INT,0,MPI_COMM_WORLD);
    
        MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
    
        MPI_Barrier(MPI_COMM_WORLD);
    
              for (i = 0; i < N; i++)
                {
                        for (j = 0; j < N; j++)
                        {
                                sum = sum + aa[j] * b[j][i];               
                        }
                        cc[i] = sum;
                        sum = 0;
                }
    
        MPI_Gather(cc, N*N/size, MPI_INT, c, N*N/size, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Barrier(MPI_COMM_WORLD);
    
        time2 = MPI_Wtime();
        duration = time2 - time1;
        MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
        if(rank == 0) {
            printf("Global runtime is %f\n",global);
        }
        printf("Runtime at %d is %f \n", rank,duration);       
        MPI_Finalize();
        if (rank == 0)                      
          print_results("C = ", c);
    }
    
    void print_results(char *prompt, int a[N][N])
    {
        int i, j;
    
        printf ("\n\n%s\n", prompt);
        for (i = 0; i < N; i++) {
                for (j = 0; j < N; j++) {
                        printf(" %d", a[i][j]);
                }
                printf ("\n");
        }
        printf ("\n\n");
    }
    

    By doing so you will not take into account the user input time and also plcing the Barrier before the first the first timing will ensure that all process have nearly identical starting.

    Also beware that you code only works with 4x4 matrix !

    Apart from that you sould have something like :

    mpirun -n 4 a.out
    enter the number of row =4
    enter the number of column =4
    Global runtime is 0.005867
    Runtime at 0 is 0.001474
    Runtime at 1 is 0.001464
    Runtime at 2 is 0.001464
    Runtime at 3 is 0.001466