Search code examples
cperformanceoptimizationopenmpn-queens

How to optimize N-queen with openmp - C


I am learning OpenMP and found the following code to solve N-Queens problem. I want to make faster solution with parallel code, but I don't know how to do it. Everytime I tried something, it is only go to worse and slower solution.

This is code:

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

#define BOARD_SIZE 15

int board[20],solutions;
 
int main() {
  double start_time, end_time, result_time;
  void queen(int row,int n);

  start_time = omp_get_wtime();

  queen(1, BOARD_SIZE);

  end_time = omp_get_wtime();
    result_time = end_time - start_time;
    printf("Time = %.3f seconds\n", result_time);
  printf("Number of solutions: %d\n", solutions);
  return 0;
}
  
/*funtion to check conflicts
If no conflict for desired postion returns 1 otherwise returns 0*/
int place(int row, int column) {
  for(int i=1;i<=row-1;++i) {
    //checking column and diagonal conflicts
    if(board[i]==column)
      return 0;
    else
      if(abs(board[i]-column)==abs(i-row))
      return 0;
  }
  
  return 1; //no conflicts
}
 
//function to check for proper positioning of queen
void queen(int row,int n) {
  for(int column=1;column<=n;++column) {
    if(place(row,column)) {
    board[row]=column; //no conflicts so place queen
    if(row==n) //dead end
      solutions++;
    else //try queen with next position
      queen(row+1,n);
    }
  }
}

What can I do for optimize this code?


Solution

  • To parallelize your code with OpenMP I did the following:

    1. I have added #pragma omp parallel and #pragma omp single before the first call of function queen.
    2. To avoid data race I added #pragma omp atomic before solutions++; (Note that I have also tried using reduction, but in that case tasks have to be synchronized, which makes it a slower option).
    3. To make parallel recursive calls I had to create a (private) copy of the board (and to pass its pointer to function queen). To create tasks I used #pragma omp task if(row<4). Note that if(row<4) is used to limit the number of tasks created.

    The measured speedup is about 6x on 8 cores.

    #include <stdio.h>
    #include <stdlib.h>
    #include <omp.h>
    
    #define BOARD_SIZE 15
    
    int solutions;
    
    int main() {
      double start_time, end_time, result_time;
      int board[BOARD_SIZE+1];
      void queen(int row,int n, int*);
    
      
        start_time = omp_get_wtime();
    
        #pragma omp parallel
        #pragma omp single
        queen(1, BOARD_SIZE, board);
    
        end_time = omp_get_wtime();
            result_time = end_time - start_time;
            printf("Time = %.3f seconds\n", result_time);
        printf("Number of solutions: %d\n", solutions);
      
      return 0;
    }
      
    /*funtion to check conflicts
    If no conflict for desired postion returns 1 otherwise returns 0*/
    int place(int row, int column, int* board) {
      for(int i=1;i<=row-1;++i) {
        //checking column and diagonal conflicts
        if(board[i]==column)
          return 0;
        else
          if(abs(board[i]-column)==abs(i-row))
          return 0;
      }  
      return 1; //no conflicts
    }
     
    //function to check for proper positioning of queen
    void queen(int row,int n, int* board) {
      for(int column=1;column<=n;++column) {
        if(place(row,column, board)) { 
            if(row==n) //dead end
            {
                #pragma omp atomic
                solutions++;
            }        
            else
            //try queen with next position
            {
                int local_board[BOARD_SIZE+1];
                for(int i=1;i<row;i++) local_board[i]=board[i];  //copy board to local_board
                local_board[row]=column; //no conflicts so place queen            
                #pragma omp task if(row<4)
                queen(row+1,n, local_board);
            }
        }
      }
    }
    

    Please indicate if you need more explanation.