Search code examples
cunixforkshared-memory

Why my Matrix Multipication code in c always give the garbage value? (using shared memory and fork)


This is my code in c to do Matrix Multiplication implement fork and shared memory. It seem like the value that I get is the mostly garbage value. Maybe I did not initialize the value of the array C which is the result array first. (I am beginner at C and this is my first time using the shared memory)

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/shm.h>
#include <sys/ipc.h>
#include <sys/stat.h>
#include <sys/wait.h>
#define N 3

void doMulT(int row,int col,int x ,int a[][N],int b[][N],int c[]);
int main()
{
    int Ma[N][N];
    int Mb[N][N];
    int segment_id;
    pid_t pid=-1;

        printf("Input the elements in Matrix A (%dx%d) : \n",N,N);
        for(int k = 0; k<N ;k++)
        {
                for(int d = 0; d<N ; d++)
                {
                        scanf("%d",&Ma[k][d]);
                }
        }
        printf("Input the elements in Matrix B (%dx%d) : \n",N,N);
        for(int k = 0; k<N ;k++)
        {
                for(int d = 0; d<N ; d++)
                {
                        scanf("%d",&Mb[k][d]);
                }
        }

    segment_id = shmget(IPC_PRIVATE,1024, S_IRUSR|S_IWUSR);
        int *C = (int*)shmat(segment_id,NULL,0);

    for(int i=0;i<N*N;i++)
    {
        if(pid != 0)
        {
            pid = fork();
        }
    }

    if(pid == 0)
    {
        for(int i = 0; i<N; i++)
        {
            for(int j =0; i<N; j++)
            {
                if(C[i*N+j]==0)
                {
                    doMulT(i,j,N,Ma,Mb,C);
                }
            }
        }
    }

    else
    {
        for(int k =0;k<N*N;k++)
        {
            wait(NULL);
        }
        printf("==========Result==========\n");
            for(int i=0; i<N; i++)
            {
                    for(int j=0; j<N; j++)
                    {
                            printf("%d",C[i*N+j]);
                            printf(" ");
                    }
                    printf("\n");
            }
        }
    shmdt(C);
        shmctl(segment_id, IPC_RMID, NULL);
}

void doMulT(int row,int col,int x ,int a[][N],int b[][N],int c[])
{
        for(int k=0; k<x; k++)
        {
             c[row*x+col]+=a[row][k]*b[k][col];
        }
}

The example of the input

Input the elements in Matrix A (3x3) :
1 1 1
2 2 2
3 3 3
Input the elements in Matrix B (3x3) :
1 1 1
2 2 2
3 3 3

The output is like

6 6 6
123482868 -745374 -2637821
456729394 -98475839 -2884829

Solution

  • The first part of this answer is intended to show the problems in the original program and ways to debug the problems.

    I added some printf output to see what the processes will do

        if(pid == 0)
        {
            for(int i = 0; i<N; i++)
            {
                for(int j =0; i<N; j++)
                {
                    if(C[i*N+j]==0)
                    {
                        printf("process %ld calculating (%d, %d)\n", (long)getpid(), i, j);
                        doMulT(i,j,N,Ma,Mb,C);
                    }
                }
            }
        }
    

    and get output like this:

    process 747 calculating (0, 204)                                                                                                                                                               
    process 746 calculating (0, 292)                                                                                                                                                               
    process 746 calculating (0, 293)                                                                                                                                                               
    process 747 calculating (0, 205)                                                                                                                                                               
    process 746 calculating (0, 294)                                                                                                                                                               
    process 747 calculating (0, 206)                                                                                                                                                               
    process 746 calculating (0, 295)                                                                                                                                                               
    process 747 calculating (0, 207)                                                                                                                                                               
    

    That means the loop for j is wrong.

    There is a typo:

                for(int j =0; i<N; j++)
    

    must be

                for(int j =0; j<N; j++) // j<N  instead of  i<N
    

    In addition I used memset to initialize the shared memory with 0.

    After fixing the loop I get

    process 238 calculating (0, 0)                                                                                                                                                              
    process 238 calculating (0, 1)                                                                                                                                                              
    process 238 calculating (0, 2)                                                                                                                                                              
    process 238 calculating (1, 0)                                                                                                                                                              
    process 238 calculating (1, 1)                                                                                                                                                              
    process 238 calculating (1, 2)                                                                                                                                                              
    process 238 calculating (2, 0)                                                                                                                                                              
    process 238 calculating (2, 1)                                                                                                                                                              
    process 238 calculating (2, 2)                                                                                                                                                              
    ==========Result==========                                                                                                                                                                  
    6 6 6                                                                                                                                                                                       
    12 12 12                                                                                                                                                                                    
    18 18 18                                                                                                                                                                                    
    

    This means that one child did all the work before others got active.

    A delay in the processing loop leaves a chance for other processes to get some work.

    Note: This is not meant as a real solution. The main purpose is to show that you cannot rely on the system to distribute the work between the processes.

    It also shows that the matrix multiplication is not a good use case for multiprocessing because the calculation of a matrix cell (at least for a small matrix as in your example) is apparently much faster than the creation of a new process.

        if(pid == 0)
        {
            for(int i = 0; i<N; i++)
            {
                for(int j =0; j<N; j++)
                {
                    if(C[i*N+j]==0)
                    {
                        printf("process %ld calculating (%d, %d)\n", (long)getpid(), i, j);
                        doMulT(i,j,N,Ma,Mb,C);
                        sleep(1); // give other processes a chance to do some work
                    }
                }
            }
        }
    

    For a good solution you have to implement a way to assign a specific task to every child process. One option would be to pass the loop variable i (or maybe two separate loop variables for row and column) to the processing function in the child process as shown in the following version.

    #include <stdio.h>
    #include <unistd.h>
    #include <sys/types.h>
    #include <sys/shm.h>
    #include <sys/ipc.h>
    #include <sys/stat.h>
    #include <sys/wait.h>
    #include <memory.h>
    #include <errno.h>
    #include <stdlib.h>
    
    #define N 3
    #define SIZE 1024
    
    void doMulT(int row,int col,int x ,int a[][N],int b[][N],int c[]);
    int main()
    {
        int Ma[N][N] = {
            {1, 1, 1},
            {2, 2, 2},
            {3, 3, 3}
        };
        int Mb[N][N] = {
            {1, 1, 1},
            {2, 2, 2},
            {3, 3, 3}
        };
        int segment_id;
        pid_t pid=-1;
    
    #if 0
            printf("Input the elements in Matrix A (%dx%d) : \n",N,N);
            for(int k = 0; k<N ;k++)
            {
                    for(int d = 0; d<N ; d++)
                    {
                            scanf("%d",&Ma[k][d]);
                    }
            }
            printf("Input the elements in Matrix B (%dx%d) : \n",N,N);
            for(int k = 0; k<N ;k++)
            {
                    for(int d = 0; d<N ; d++)
                    {
                            scanf("%d",&Mb[k][d]);
                    }
            }
    #endif
    
        segment_id = shmget(IPC_PRIVATE, SIZE, S_IRUSR|S_IWUSR);
            int *C = (int*)shmat(segment_id,NULL,0);
    
        memset(C, 0, SIZE);
        
        for(int i=0; (pid != 0) && (i<N); i++)
        {
            for(int j=0; (pid != 0) && (j<N); j++)
            {
                if(pid != 0)
                {
                    pid = fork();
                    if(pid == 0)
                    {
                        printf("process %ld calculating (%d, %d)\n", (long)getpid(), i, j);
                        doMulT(i,j,N,Ma,Mb,C);
                        exit(0);
                    }
                }
            }
        }
    
    
        if(pid != 0)
        {
            while(wait(NULL) != -1 || (errno != ECHILD))
            {
                ;
            }
            printf("==========Result==========\n");
                for(int i=0; i<N; i++)
                {
                        for(int j=0; j<N; j++)
                        {
                                printf("%d",C[i*N+j]);
                                printf(" ");
                        }
                        printf("\n");
                }
            }
        shmdt(C);
            shmctl(segment_id, IPC_RMID, NULL);
    }
    
    void doMulT(int row,int col,int x ,int a[][N],int b[][N],int c[])
    {
            for(int k=0; k<x; k++)
            {
                 c[row*x+col]+=a[row][k]*b[k][col];
            }
    }
    

    The output looks like this:

    process 7489 calculating (0, 0)                                                                                                                                                             
    process 7495 calculating (2, 0)                                                                                                                                                             
    process 7496 calculating (2, 1)                                                                                                                                                             
    process 7497 calculating (2, 2)                                                                                                                                                             
    process 7493 calculating (1, 1)                                                                                                                                                             
    process 7490 calculating (0, 1)                                                                                                                                                             
    process 7494 calculating (1, 2)                                                                                                                                                             
    process 7492 calculating (1, 0)                                                                                                                                                             
    process 7491 calculating (0, 2)                                                                                                                                                             
    ==========Result==========                                                                                                                                                                  
    6 6 6                                                                                                                                                                                       
    12 12 12                                                                                                                                                                                    
    18 18 18