Search code examples
cmpims-mpi

Difference between MPICH and MS MPI


I implemented a Sudoku solver which runs as expected with MPICH on my Linux machine. Every process is working, which can be seen on the task manager. The log file confirms everything too.

But somehow I get a weird behavior when I compile and run the same program on a Windows machine. When I take a look on the running processes more than half of all processes are idle. After hours of debugging I just could not figure out why this happens.

So I thought that maybe MPICH and MS MPI have significant differences responsible for this, but I could not find anything about it. So here I am asking you guys.

I know that, the code is everything but efficient. It's the first version.

Code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <mpi.h>
#include <errno.h>
#include <unistd.h>

#define GETVALUE(x) x-48
#define LOG 0

struct sudoku_stack {
    int maxsize;
    int top;
    int** sudokus;
};

struct stack {
    int maxsize;
    int top;
    int* list;
};

int* read_sudoku(char* file, int* r_size, int* m_size, int* v_size);
void print_sudoku(int* sudoku, int m_size, int v_size);
void teardown(struct sudoku_stack* sudoku_stack_ptr, struct stack* idle_stack);//struct stack* idle_stack, struct stack* running_stack, struct sudoku_stack* sudoku_stack_ptr);##

int is_safe(int* grid, int i, int num, int r_size, int m_size, int v_size);
int is_safe_first_empty_cell(int *sudoku, int num, int r_size, int m_size, int v_size);
int is_solved(int* sudoku, int v_size); 
void insert_to_first_empty_cell(int* sudoku, int num, int v_size);

struct sudoku_stack* init_sudoku_stack(int capacity, int v_size);
int sudoku_stack_empty(struct sudoku_stack *stack_ptr);
int sudoku_stack_size(struct sudoku_stack *stack_ptr);
int sudoku_stack_full(struct sudoku_stack *stack_ptr);
void push_sudoku(struct sudoku_stack *sudoku_stack_ptr, int* sudoku, int v_size);
int* peek_sudoku(struct sudoku_stack *sudoku_stack_ptr);
int* pop_sudoku(struct sudoku_stack *sudoku_stack_ptr);
void print_stack_sudokus(struct sudoku_stack* stack_ptr, int m_size, int v_size);
void free_sudoku_stack(struct sudoku_stack* stack_ptr);

struct stack* init_stackk(int capacity);
int stack_empty(struct stack* stack_ptr);
int stack_size(struct stack* stack_ptr);
int stack_full(struct stack* stack_ptr);
void push(struct stack* stack_ptr, int i);
int peek(struct stack* stack_ptr);
int pop(struct stack* stack_ptr);
void free_stack(struct stack* stack_ptr);

void init_idle_stack(struct stack* stack_ptr);
void init_worker_list(int* list, int size);

int main(int argc, char** argv) {
    int rank, env_size;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &env_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    double starttime, endtime;

    starttime = MPI_Wtime();

    if(rank == 0) {
        int r_size, m_size, v_size;
 
        int* sudoku = read_sudoku(argv[1], &r_size, &m_size, &v_size);
        int dim[3] = { r_size, m_size, v_size };

        MPI_Bcast(dim, 3, MPI_INT, rank, MPI_COMM_WORLD);

        struct stack* idle_stack = init_stackk(env_size-1); 
        struct sudoku_stack* sudoku_stack_ptr = init_sudoku_stack(10000, v_size); // only master process manages the stack

        push_sudoku(sudoku_stack_ptr, sudoku, v_size);
 
        while(1) {    
            int idle_stack_size = stack_size(idle_stack);
            int _sudoku_stack_empty = sudoku_stack_empty(sudoku_stack_ptr);

            if (stack_full(idle_stack)) {
                for(int i = 1; i < stack_size(idle_stack); i++) {
                    MPI_Send(&i, 1, MPI_INT, i, 3, MPI_COMM_WORLD);
                }
            }

            if (!_sudoku_stack_empty && idle_stack_size > 0) {
                for(int i = 0; i < idle_stack_size; i++) {
                    int _rank = pop(idle_stack);
                    int* _sudoku = pop_sudoku(sudoku_stack_ptr);
                    if(LOG) {
                        printf("PUSHED FROM IDLE %d\n", _rank);
                        printf("STACK SIZE: %d\n", sudoku_stack_size(sudoku_stack_ptr));
                    }
                    MPI_Send(_sudoku, v_size, MPI_INT, _rank, 0, MPI_COMM_WORLD);
                    if(sudoku_stack_empty(sudoku_stack_ptr))
                        break;
                }
            }

            MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            if(LOG) {
                printf("MSG FROM %d\n", status.MPI_SOURCE);
            }
            if(status.MPI_TAG == 0) {
                if(!sudoku_stack_empty(sudoku_stack_ptr)) {
                    int i;
                    if(LOG) {
                        printf("SENDING TASK TO %d\n", status.MPI_SOURCE);
                    }
                    MPI_Recv(&i, 1, MPI_INT, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                    int *next_sudoku = pop_sudoku(sudoku_stack_ptr);
                    MPI_Send(next_sudoku, v_size, MPI_INT, status.MPI_SOURCE, 0, MPI_COMM_WORLD);
                } else {
                    int i;
                    if(LOG) {
                        printf("PUSHED ONTO IDLE %d\n", status.MPI_SOURCE);
                    }
                    MPI_Recv(&i, 1, MPI_INT, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                    push(idle_stack, status.MPI_SOURCE); 
                }

            } else if(status.MPI_TAG == 1) {
                int count;
                MPI_Get_count(&status, MPI_INT, &count);
                
                int* recv_sudokus = (int*)malloc(count * sizeof(int));
                MPI_Recv(recv_sudokus, count, MPI_INT, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);

                for(int i = 0; i < count; i+=v_size) {
                    push_sudoku(sudoku_stack_ptr, &recv_sudokus[i], v_size);  
                }
                free(recv_sudokus);

            } else if(status.MPI_TAG == 2) {
                int count;
                MPI_Get_count(&status, MPI_INT, &count);
                
                MPI_Recv(sudoku, count, MPI_INT, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status);
                
                for(int i = 1; i < env_size; i++) {
                    if(i != status.MPI_SOURCE) {
                        MPI_Send(&i, 1, MPI_INT, i, 3, MPI_COMM_WORLD);
                    }
                }

                free_stack(idle_stack);
                free_sudoku_stack(sudoku_stack_ptr);
                
                break;
            }
        }

        print_sudoku(sudoku, m_size, v_size);
        
        free(sudoku);

    } else {
        int dim[3];

        MPI_Bcast(dim, 3, MPI_INT, 0, MPI_COMM_WORLD);

        int r_size = dim[0];
        int m_size = dim[1];
        int v_size = dim[2];

        int* sudoku2 = (int*)malloc(sizeof(int)*v_size);
        int* possible_sudokus = (int*)malloc(sizeof(int)*m_size*v_size);  
        int* sudoku_cp = (int*)malloc(sizeof(int)*v_size);
        
        if (sudoku_cp == NULL || possible_sudokus == NULL || sudoku2 == NULL) {
            printf("ERROR\n");
            exit(1);
        }

        while(1) {
            int i = 0;
            MPI_Send(&i, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);

            MPI_Recv(sudoku2, v_size, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);

            if(status.MPI_TAG == 3) {
                break;
            }

            int index = 0;
            int solved = 0;
            char printed = 0;

            for(int i = 1; i <= m_size; i++) {
                int is_safe_res = is_safe_first_empty_cell(sudoku2, i, r_size, m_size, v_size);
                if(printed == 0 && LOG) {
                    printf("PROCESS %d WORKING\n", rank);
                    printed=1;
                }
                if(is_safe_res) {
                    memcpy(sudoku_cp, sudoku2, sizeof(int)*v_size);
                    insert_to_first_empty_cell(sudoku_cp, i, v_size);
                    memcpy(&(possible_sudokus[index]), sudoku_cp, sizeof(int)*v_size);
                    index+=v_size;
                    if(is_solved(sudoku_cp, v_size)){
                        solved = 1;
                        break;
                    }
                }
                
            }

            if(!solved && index != 0) {
                MPI_Send(possible_sudokus, index, MPI_INT, 0, 1, MPI_COMM_WORLD);
            } else if (solved) {
                MPI_Send(possible_sudokus, index, MPI_INT, 0, 2, MPI_COMM_WORLD);
                break;
            }
        }

        free(sudoku2);
        free(possible_sudokus);
        free(sudoku_cp);
    }

    if (rank == 0) {
        endtime = MPI_Wtime();
        printf("time: %f\n", endtime - starttime);
    }

    MPI_Finalize();
    return 0;
}

void init_worker_list(int* list, int size) {
    for(int i = 0; i < size; i++) {
        list[i]=i;
    }
}

void teardown(struct sudoku_stack* sudoku_stack_ptr, struct stack* idle_stack) {//struct stack* idle_stack, struct stack* running_stack, struct sudoku_stack* sudoku_stack_ptr) {##
    free_sudoku_stack(sudoku_stack_ptr);
    free_stack(idle_stack);
}

void init_idle_stack(struct stack* stack_ptr) {
    for(int i = 1; i <= stack_ptr->maxsize; i++) {
        push(stack_ptr, i);
    }
}

void free_stack(struct stack* stack_ptr) {
    free(stack_ptr->list);
    free(stack_ptr);
}

int pop(struct stack* stack_ptr) {
    if(stack_empty(stack_ptr)) {
        printf("[!] Stack empty - POP\n");
        exit(EXIT_FAILURE);
    }
    return stack_ptr->list[stack_ptr->top--];
}

int peek(struct stack* stack_ptr) {
    if(stack_empty(stack_ptr)) {
        printf("[!] Stack empty - PEEK\n");
        exit(EXIT_FAILURE);
    }
    return stack_ptr->list[stack_ptr->top];
}

void push(struct stack* stack_ptr, int i) {
    if(stack_full(stack_ptr)) {
        printf("[!] Stack overflow - PUSH\n");
        exit(EXIT_FAILURE);
    }
    stack_ptr->list[++stack_ptr->top] = i;
}

int stack_full(struct stack* stack_ptr) {
    return stack_ptr->top == stack_ptr->maxsize-1;
}

int stack_size(struct stack* stack_ptr) {
    return stack_ptr->top+1;
}

int stack_empty(struct stack* stack_ptr) {
    return stack_ptr->top == -1;
}

struct stack* init_stackk(int capacity) {
    struct stack *stack_ptr = (struct stack*)malloc(sizeof(struct stack));
    if (stack_ptr == NULL) {
        printf("INIT STACK ERROR\n");
    }

    stack_ptr->maxsize = capacity;
    stack_ptr->top = -1;
    stack_ptr->list = (int*)malloc(capacity*sizeof(int));

    return stack_ptr;
}

void print_stack_sudokus(struct sudoku_stack* stack_ptr, int m_size, int v_size) {
    for(int i = 0; i <= stack_ptr->top; i++) {
        print_sudoku(stack_ptr->sudokus[i], m_size, v_size);
        printf("\n");
    }
}

void free_sudoku_stack(struct sudoku_stack* stack_ptr) {
    //free(stack_ptr->sudokus);
    for(int i = 0; i < stack_ptr->maxsize; i++) {
        free(stack_ptr->sudokus[i]);
    }
    free(stack_ptr);
}

int* pop_sudoku(struct sudoku_stack *sudoku_stack_ptr) {
    if(sudoku_stack_empty(sudoku_stack_ptr)) {
        printf("[!] Stack empty - POP SUDOKU\n");
        exit(EXIT_FAILURE);
    }
    return sudoku_stack_ptr->sudokus[sudoku_stack_ptr->top--];
}

int* peek_sudoku(struct sudoku_stack *sudoku_stack_ptr) {
    if(sudoku_stack_empty(sudoku_stack_ptr)) {
        printf("[!] Stack empty - PEEK SUDOKU\n");
        exit(EXIT_FAILURE);
    }
    return sudoku_stack_ptr->sudokus[sudoku_stack_ptr->top];
}

void push_sudoku(struct sudoku_stack *sudoku_stack_ptr, int* sudoku, int v_size) {
    if(sudoku_stack_full(sudoku_stack_ptr)) {
        printf("[!] Stack overflow - PUSH SUDOKU\n");
        exit(EXIT_FAILURE);
    }
    memcpy(sudoku_stack_ptr->sudokus[++sudoku_stack_ptr->top], sudoku, sizeof(int)*v_size);
}

int sudoku_stack_full(struct sudoku_stack *sudoku_stack_ptr) {
    return sudoku_stack_ptr->top == sudoku_stack_ptr->maxsize-1;
}

int sudoku_stack_size(struct sudoku_stack *sudoku_stack_ptr) {
    return sudoku_stack_ptr->top+1;
}

int sudoku_stack_empty(struct sudoku_stack *sudoku_stack_ptr) {
    return sudoku_stack_ptr->top == -1;
}

struct sudoku_stack* init_sudoku_stack(int capacity, int v_size) {
    struct sudoku_stack *stack_ptr = (struct sudoku_stack*)malloc(sizeof(struct sudoku_stack));

    stack_ptr->maxsize = capacity;
    stack_ptr->top = -1;
    stack_ptr->sudokus = (int**)malloc(capacity*sizeof(int*));
    for(int i = 0; i < capacity; i++) {
        stack_ptr->sudokus[i] = (int*)malloc(v_size*sizeof(int));
    }

    return stack_ptr;
}

int is_safe(int* sudoku, int i, int num, int r_size, int m_size, int v_size){
    if (sudoku[i] != 0) {
        return 0;
    }

    int index = (i/m_size)*m_size;
    for (int i = index; i < index+m_size; i++) { 
        if (sudoku[i] == num) {
            return 0;
        }
    }

    // Check column
    index = i%m_size;
    for(int i = index; i < index+m_size*(m_size-1); i+=m_size) {
        if(sudoku[i] == num) {
            return 0;
        }
    }

    // Check box
    int d_rl = i % r_size;
    i = i - d_rl;
    int d_or = i / m_size;
    int h = d_or % r_size;
    index = i - h * m_size;
    for(int x = index; x < index + (m_size*(r_size-1)) + r_size; x+=m_size){
        for(int y = x; y < x+r_size; y++) {
            if(sudoku[y] == num) {
                return 0;
            }
        }

    }
    
    return 1;
}

void insert_to_first_empty_cell(int* sudoku, int num, int v_size) {
    for(int i = 0; i < v_size; i++) {
        if(sudoku[i] == 0) {
            sudoku[i] = num;
            break;
        }
    }
}

int is_safe_first_empty_cell(int *sudoku, int num, int r_size, int m_size, int v_size) {
    // Determine first empty cell position
    int found = 0;
    int i;
    for(i = 0; i < v_size; i++) {
        if(sudoku[i] == 0) {
            found = 1;
            break;
        }
    }

    if(!found) {
        return -1;
    }

    int index = (i/m_size)*m_size;
    for (int i = index; i < index+m_size; i++) { 
        if (sudoku[i] == num) {
            return 0;
        }
    }

    // Check column
    index = i%m_size;
    for(int i = index; i <= index+m_size*(m_size-1); i+=m_size) {
        if(sudoku[i] == num) {
            return 0;
        }
    }

    // Check box
    int d_rl = i % r_size;
    i = i - d_rl;
    int d_or = i / m_size;
    int h = d_or % r_size;
    index = i - h * m_size;
    for(int x = index; x < index + (m_size*(r_size-1)) + r_size; x+=m_size){
        for(int y = x; y < x+r_size; y++) {
            if(sudoku[y] == num) {
                return 0;
            }
        }

    }
    
    return 1;
}

int is_solved(int* sudoku, int v_size) {
    for (int i = 0; i < v_size; i++) {
        if(sudoku[i] == 0)
            return 0;
    }
    return 1;
}

void print_sudoku(int* sudoku, int m_size, int v_size) {
    for(int i = 0; i < v_size; i++) {
        printf("%d ", sudoku[i]);
        if((i+1) % m_size == 0) {
            printf("\n");
        }
    }
}

int* read_sudoku(char* file, int* r_size, int* m_size, int* v_size) {
    FILE* fp;
    char r_size_c[3];

    if ((fp = fopen(file, "r+")) == NULL) {
        //fprintf(stderr, "error opening file\n", file);
        printf("error opening file\n");
        exit(1);
    }

    int c = fgetc(fp);
    *r_size = GETVALUE(c);
    *m_size = *r_size * *r_size;
    *v_size = *m_size * *m_size;

    int* sudoku = (int*)malloc(sizeof(int) * *v_size);
    int idx = 0;
    char aux[3];


    while(feof(fp) == 0) {
        c = fgetc(fp);
        if(GETVALUE(c) >= 0) {
            char aux_idx = 0;
            while(GETVALUE(c) >= 0) {
                //printf("%d ", GETVALUE(c));
                aux[aux_idx++] = c;
                c = fgetc(fp);
            }
            aux[aux_idx] = '\0';
            sudoku[idx] = atoi(aux);
            idx++;

            memset(aux, 0, 3);
        }
    }

    fclose(fp);

    return sudoku;
}


Solution

  • Your manager processes uses MPI_Probe with MPI_ANY_SOURCE. I'm guessing that the MS MPI somehow favors certain processes, so you will always find them first. Are you sure that the individual solves take long enough? If every processes basically immediately says "I'm done, give me a new one" then you may always find the lowest numbered process, because it is always ready.

    How to solve this: idea #1: instead of blocking sends/receives, use non-blocking ones, and use MPI_Waitsome which can handle multiple incoming requests. Idea #2: instead of probing for any source, cycle Iprobe through the processes, so that if you just handled a request from process 5, you will next check if process 6 is ready.