Search code examples
cforkshared-memory

Sorting with shared memory with child processes


I'm trying to use child processes in my quicksort, such that the left half sorts in one child and the right half in another child. I had it working prior to the shmget implementation but now I believe I'm corrupting the array somewhere because all my values become zero after printing the array. Sorry if I'm making some dumb mistake somewhere, I'm trying to learn how to use fork and shmget but have been running into some trouble. I'm trying to take in a text file as a command line argument and given a delimiter such as ';' I have to remove the delimiter and identify the numbers in between, place them in an array and sort them using child processes. I have the parsing working, and the quicksort worked but now that I'm trying to implement the shared memory I've ran into some problems.

Thanks

I've looked at a few different examples, but the one that this is mostly based off is the geeksforgeeks example with mergesorting with fork. https://www.geeksforgeeks.org/concurrent-merge-sort-in-shared-memory/

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "fileParser.h"
#include "dataManagement.h"

int main(int argc, char *argv[]){
    char *file = argv[1];
    char delimiter = argv[2][0];
    MyArray *theArray = getArray(file, delimiter);

    size_t SHM_SIZE = theArray->length;

    theArray->key = IPC_PRIVATE;


    if((theArray->shmid = shmget(theArray->key, SHM_SIZE, IPC_CREAT | 0666)) < 0){
        perror("shmget");
        _exit(-1);
    }

    if ((theArray->shm_array = shmat(theArray->shmid, NULL, 0)) == (int *) -1)
    {
        perror("shmat");
        _exit(1);
    }

    printArray(theArray, theArray->length);
    quickSortFork(theArray, 0, theArray->length-1);
    printArray(theArray, theArray->length);

    if (shmdt(theArray->shm_array) == -1)
    {
        perror("shmdt");
        _exit(1);
    }

    if (shmctl(theArray->shmid, IPC_RMID, NULL) == -1)
    {
        perror("shmctl");
        _exit(1);
    }

    return 0;
}

dataManagement.h


#include <unistd.h>
#include <sys/wait.h>
#include "fileParser.h"

int partition(MyArray *arr, int low, int high);
void quickSortFork(MyArray *arr, int low, int high);
void swap(MyArray *arr, int a, int b);


void printArray(MyArray *arr, int length) {
    for(int i = 0; i < length; i++){
        printf("%d  ", arr->shm_array[i]);
    }
    printf("\n");
}


void quickSortFork(MyArray *arr, int low, int high){
    pid_t lpid, rpid;
    rpid = fork();
    if(low < high){
        int partitionIndex = partition(arr,low, high);
        if(rpid < 0){
            perror("Right child not created.\n");
            exit(-1);
        } else if(rpid == 0 ){
            printf("I am the right child!\tMy process id: %d\n",getpid());
            quickSortFork(arr, partitionIndex + 1, high);
            exit(EXIT_SUCCESS);
        } else {
            lpid = fork();
            if(lpid < 0){
                perror("Left child not created.\n");
            } else if (lpid == 0) {
                quickSortFork(arr, low, partitionIndex - 1);
                printf("I am the left child!\tMy process id: %d\n", getpid());
                exit(EXIT_SUCCESS);
            }
        }

        int status;

        waitpid(rpid, &status, 0);
        waitpid(lpid, &status, 0);
    }
}


int partition(MyArray *arr, int low, int high){
    int i = low, j = high;
    int pivot = arr->shm_array[(low+high)/2];
    while(i < j){
        while(arr->shm_array[i] < pivot)
            i++;
        while(arr->shm_array[j] > pivot)
            j--;
        if(i < j){
            swap(arr,i,j);
        }
    }
    return i;
}


void swap(MyArray *arr, int a, int b){
    int temp = arr->shm_array[a];
    arr->shm_array[a] = arr->shm_array[b];
    arr->shm_array[b] = temp;
}

fileParser.h


int findFileLength(FILE *inFile, char delimiter);
int *parseFileToArray(FILE *inFile, char delimiter, int length);

typedef struct {
    int shmid;
    key_t key;
    int length;
    int *shm_array;
} MyArray;


MyArray *getArray(char *fileName, char delimiter){
    FILE *numberFile = fopen(fileName, "r"); // open for reading

    if (numberFile == NULL) // unable to open file
        return NULL;
    MyArray *array = malloc(sizeof(MyArray));
    array->length = findFileLength(numberFile, delimiter);
    array->shm_array = parseFileToArray(numberFile, delimiter,array->length);

    return array;

}


int findFileLength(FILE *inFile, char delimiter){
    char c;
    int length = 0;
    c = fgetc(inFile);
    while(c != EOF){
        if(c != delimiter && c != '\n'){
            length++;
            while((c = fgetc(inFile)) != EOF && c != '\n' && c != delimiter);

        } else {
            c = fgetc(inFile);
        }
    }
    rewind(inFile); // resets the file pointer to the start
    return length;
}


int *parseFileToArray(FILE *inFile, char delimiter, int length){
    int *parsedFile = malloc(sizeof(int) * length);
    char c;
    char *stringInt = malloc(sizeof(char) * 100); // string that is used to combine multiple characters and convert to an integer
    int stringIntP = 0, parsedArrayP = 0; // pointers for our arrays, the first is for the string that determines the integer, the second is for our resulting array
    c = fgetc(inFile);
    while(c != EOF){
        if(c != delimiter && c != '\n'){

            for(;c != '\n' && c != delimiter; (c = fgetc(inFile)) != EOF){
                stringInt[stringIntP++] = c;
            }
            stringIntP = 0;
            parsedFile[parsedArrayP++] = atoi(stringInt); // convert string number to integer value
            memset(stringInt, 0, 100);  // clear the string that builds the integer value from chars
        } else {
            c = fgetc(inFile);
        }
    }
    for(int i = 0; i < length; i++){
        printf("%d ", parsedFile[i]);
    }
    printf("\n");
    fclose(inFile); // close the file after using
    free(stringInt);
    return parsedFile;
}

Expected output: the array passed in unsorted first. Then the array sorted.

Actual output: array filled with 0's and program doesn't finish executing


Solution

  • There are several bugs. I was able to [finally] find them all and a working version is below.

    Here's a summary:

    1. In the fork/sort function, the rpid = fork(); is done above the main if statement. If that if is false, a zombie process is created.
    2. The size of the shared area is too small. It is only the number of elements and not sizeof(int) * number_of_elements
    3. The data is read into a non-shared area. The shared area is then created and the pointer to the actual [non-shared] data is lost. There is no copy of the data into the shared area
    4. In the fork/sort function, the call to the partition function is done after the first fork call, so it is called by both parent and child, so they conflict/race.
    5. There are way too many processes being created and some of the fork calls fail because there are no more free process slots.

    (1) As I mentioned above rpid = fork(); needs to go after if (low < high) to prevent the creation of a zombie process if the if statement is false. More about this below in section (4).


    (2) Your shared memory area is too small. It causes a segfault during final printing.

    This is incorrect:

    size_t SHM_SIZE = theArray->length;
    

    It needs to be:

    size_t SHM_SIZE = sizeof(int) * theArray->length;
    

    (3) You are creating theArray in non-shared memory from the call to getArray.

    It sets shm_array from a call to parseFileToArray. This is still in non-shared memory.

    Later, to get the shared area, you do:

    theArray->shm_array = shmat(theArray->shmid, NULL, 0)
    

    This returned value of shm_array is now in shared memory, but the data is still in the old value of shm_array [again, in non-shared memory]. The pointer to the actual data is lost forever.

    To fix this, you'll need something like:

    int *shm_array;
    if ((shm_array = shmat(theArray->shmid, NULL, 0)) == (int *) -1) {
        perror("shmat");
        _exit(1);
    }
    
    int *oldptr = theArray->shm_array;
    for (int idx = 0;  idx < theArray->length;  ++idx)
        shm_array[idx] = oldptr[idx];
    free(oldptr);
    
    theArray->shm_array = shm_array;
    

    Of course, when you get the program working, it would be better to move the shm* calls into the low level function that does the [non-shared] malloc for shm_array, so you can eliminate the extra copy operation.


    (4) In your fork routine, you are calling:

    int partitionIndex = partition(arr, low, high);
    

    You do this after the fork, so both parent and rpid child are trying to do the partition operation so they are conflicting.

    So, quickSortFork needs to start out with:

    if (low < high) {
        int partitionIndex = partition(arr, low, high);
    
        rpid = fork();
    

    (5) You are creating way too many processes and the fork calls start to fail due to unavailable process slots.

    This is probably why the program appears to hang.

    This would probably not be observable with a small number of array elements, but would occur if the array is large enough (e.g. 100,000 elements)


    Here is a working version [with some extra debug code].

    To solve the final fork problem, I created a quickSortStd that does not use fork and called that instead.

    One way to handle the issue of too many fork calls is to have quickSortFork keep track of the recursion depth and call the non-fork version after the depth gets high enough.

    As a general rule, adding more processes/threads after a certain number becomes counter productive because the overhead of switching between processes overshadows the benefits of parallelism. This is a tuning option.

    I added a simple version of that idea to quickSortFork and it seems to work, so adjust the depth limit to suit your needs.

    #include <unistd.h>
    #include <sys/wait.h>
    #include <sys/ipc.h>
    #include <sys/shm.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <memory.h>
    
    typedef struct {
        int shmid;
        key_t key;
        int length;
        int *shm_array;
    } MyArray;
    
    int findFileLength(FILE * inFile, char delimiter);
    int *parseFileToArray(FILE * inFile, char delimiter, int length);
    int partition(MyArray * arr, int low, int high);
    void quickSortFork(MyArray * arr, int low, int high);
    void quickSortStd(MyArray * arr, int low, int high);
    void swap(MyArray * arr, int a, int b);
    
    void
    prtnice(const char *who,int *arr,int length)
    {
        int hang = 0;
    
        printf("%s: LENGTH %d\n",who,length);
    
        for (int i = 0; i < length; i++) {
            if (hang == 0)
                printf("%s/%8.8d:",who,i);
    
            printf(" %d", arr[i]);
    
            ++hang;
            if (hang == 10) {
                printf("\n");
                hang = 0;
            }
        }
        printf("\n");
    }
    
    MyArray *
    getArray(char *fileName, char delimiter)
    {
        FILE *numberFile = fopen(fileName, "r");    // open for reading
    
        if (numberFile == NULL)             // unable to open file
            return NULL;
        MyArray *array = malloc(sizeof(MyArray));
    
        array->length = findFileLength(numberFile, delimiter);
        array->shm_array = parseFileToArray(numberFile, delimiter, array->length);
    
        return array;
    }
    
    int
    findFileLength(FILE * inFile, char delimiter)
    {
        char c;
        int length = 0;
    
        c = fgetc(inFile);
        while (c != EOF) {
            if (c != delimiter && c != '\n') {
                length++;
                while ((c = fgetc(inFile)) != EOF && c != '\n' && c != delimiter);
    
            }
            else {
                c = fgetc(inFile);
            }
        }
        rewind(inFile);                     // resets the file pointer to the start
    
        return length;
    }
    
    int *
    parseFileToArray(FILE * inFile, char delimiter, int length)
    {
        int *parsedFile = malloc(sizeof(int) * length);
        char c;
        char *stringInt = malloc(sizeof(char) * 100);   // string that is used to combine multiple characters and convert to an integer
        int stringIntP = 0,
            parsedArrayP = 0;               // pointers for our arrays, the first is for the string that determines the integer, the second is for our resulting array
    
        c = fgetc(inFile);
        while (c != EOF) {
            if (c != delimiter && c != '\n') {
    
                for (; c != '\n' && c != delimiter; (c = fgetc(inFile)) != EOF) {
                    stringInt[stringIntP++] = c;
                }
                stringIntP = 0;
                parsedFile[parsedArrayP++] = atoi(stringInt);   // convert string number to integer value
                memset(stringInt, 0, 100);  // clear the string that builds the integer value from chars
            }
            else {
                c = fgetc(inFile);
            }
        }
    
        prtnice("INP",parsedFile,length);
    
        fclose(inFile);                     // close the file after using
        free(stringInt);
    
        return parsedFile;
    }
    
    void
    printArray(const char *who,MyArray * arr, int length)
    {
        prtnice(who,arr->shm_array,length);
    }
    
    void
    quickSortFork(MyArray * arr, int low, int high)
    {
        pid_t lpid,
         rpid;
    
        static int depth = 0;
        if (depth++ > 5) {
            quickSortStd(arr,low,high);
            --depth;
            return;
        }
    
        printf("Fork: ENTER low=%d high=%d\n",low,high);
    
        if (low < high) {
            int partitionIndex = partition(arr, low, high);
    
            rpid = fork();
            if (rpid < 0) {
                perror("Right child not created.\n");
                exit(-1);
            }
    
            if (rpid == 0) {
                printf("I am the right child!\tMy process id: %d\n", getpid());
                quickSortFork(arr, partitionIndex + 1, high);
                exit(EXIT_SUCCESS);
            }
    
            lpid = fork();
            if (lpid < 0) {
                perror("Left child not created.\n");
                exit(-1);
            }
    
            if (lpid == 0) {
                quickSortFork(arr, low, partitionIndex - 1);
                printf("I am the left child!\tMy process id: %d\n", getpid());
                exit(EXIT_SUCCESS);
            }
    
            int status;
            printf("Fork: WAIT rpid=%d\n",rpid);
            waitpid(rpid, &status, 0);
            printf("Fork: WAIT lpid=%d\n",lpid);
            waitpid(lpid, &status, 0);
        }
    
        --depth;
    
        printf("Fork: EXIT low=%d high=%d\n",low,high);
    }
    
    void
    quickSortStd(MyArray * arr, int low, int high)
    {
        pid_t lpid,
         rpid;
    
        printf("Std: ENTER low=%d high=%d\n",low,high);
    
        if (low < high) {
            int partitionIndex = partition(arr, low, high);
            quickSortStd(arr, partitionIndex + 1, high);
            quickSortStd(arr, low, partitionIndex - 1);
        }
    
        printf("Std: EXIT low=%d high=%d\n",low,high);
    }
    
    int
    partition(MyArray * arr, int low, int high)
    {
        int i = low,
            j = high;
        int pivot = arr->shm_array[(low + high) / 2];
    
        while (i < j) {
            while (arr->shm_array[i] < pivot)
                i++;
            while (arr->shm_array[j] > pivot)
                j--;
            if (i < j) {
                swap(arr, i, j);
            }
        }
        return i;
    }
    
    void
    swap(MyArray * arr, int a, int b)
    {
        int temp = arr->shm_array[a];
    
        arr->shm_array[a] = arr->shm_array[b];
        arr->shm_array[b] = temp;
    }
    
    int
    main(int argc, char *argv[])
    {
        char *file = argv[1];
        char delimiter = argv[2][0];
        MyArray *theArray = getArray(file, delimiter);
    
    #if 0
        size_t SHM_SIZE = theArray->length;
    #else
        size_t SHM_SIZE = sizeof(int) * theArray->length;
    #endif
    
        setlinebuf(stdout);
    
        theArray->key = IPC_PRIVATE;
    
        if ((theArray->shmid = shmget(theArray->key, SHM_SIZE, IPC_CREAT | 0666)) < 0) {
            perror("shmget");
            _exit(-1);
        }
    
        printArray("BEF",theArray, theArray->length);
    
        int *shm_array;
        if ((shm_array = shmat(theArray->shmid, NULL, 0)) == (int *) -1) {
            perror("shmat");
            _exit(1);
        }
    
        int *oldptr = theArray->shm_array;
        for (int idx = 0;  idx < theArray->length;  ++idx)
            shm_array[idx] = oldptr[idx];
        free(oldptr);
    
        theArray->shm_array = shm_array;
    
        printArray("SHM",theArray, theArray->length);
    #if 1
        quickSortFork(theArray, 0, theArray->length - 1);
    #else
        quickSortStd(theArray, 0, theArray->length - 1);
    #endif
        printArray("AFT",theArray, theArray->length);
    
        if (shmdt(theArray->shm_array) == -1) {
            perror("shmdt");
            _exit(1);
        }
    
        if (shmctl(theArray->shmid, IPC_RMID, NULL) == -1) {
            perror("shmctl");
            _exit(1);
        }
    
        return 0;
    }