Search code examples
c++openmpxeon-phi

How can I parallelize the inner loops with openMP?


Perhaps the solution to my problem is very obvious. I want to accelerate the following code using openMP (into Intel Xeon Phi), but I can't.

int c=0, d=0, e=0, i;
#pragma opm parallel for private(c, d, e)
for(i=0; i < columns; i++)
{
  if((left_side[rule*columns + i] > 0) || (right_side[rule*columns + i] > 0))
  {
    for(c=0; c < rows; c++)
    {
      if(left_side[i + c * columns] > 0)
      {
        if(flag[c] == 0)
        {
          r_prob[c] = c_vect[c];
          flag[c] = 1;
          for(d=0; d < columns; d++)
          {
            switch(left_side[c * columns + d])
            {
              case 0:
                break;
              case 1:
                r_prob[c] *= M_in[d] * 1.0;
                break;
              case 2:
                r_prob[c] *= (M_in[d] * (M_in[d] - 1)) * .5;
                break;
              default:
                for(e=1; e <= left_side[c * columns + d]; e++)
                  r_prob[c] *= M_in[d] * 1.0 / (e * 1.0);
                break;
            }
          }
        }
      }
    }
  }
}
//where r_prob, M_in, left_side, right_side, c_vect and flag are array that are in input.

This code doesn't run correctly, the values in r_prob are wrong. How I can parallelize this code?


Solution

  • your code has an error because the first FOR creating multiple threads, and these simultaneously written to the array r_prob, flag. The solution could be:

    int c=0, d=0, e=0, i;
    #pragma opm parallel for private(c, d, e)
     for(i=0; i < columns; i++)
     {
    
     if((left_side[rule*columns + i] > 0) || (right_side[rule*columns + i] > 0))
     {
      for(c=0; c < rows; c++)
      {
      if(left_side[i + c * columns] > 0)
      {
        if(flag[c] == 0)
        {
           #pragma omp critical //Only one thread at a time can enter 
          { 
          r_prob[c] = c_vect[c];
          flag[c] = 1;
          }
    
          for(d=0; d < columns; d++)
          {
            switch(left_side[c * columns + d])
            {
              case 0:
                break;
              case 1:
                 #pragma omp critical //Only one thread at a time can enter 
                 { 
                r_prob[c] *= M_in[d] * 1.0;
                  }
                break;
              case 2:
                 #pragma omp critical //Only one thread at a time can enter 
                 { 
                r_prob[c] *= (M_in[d] * (M_in[d] - 1)) * .5;
                 }
                break;
              default:
                for(e=1; e <= left_side[c * columns + d]; e++)
                   #pragma omp critical //Only one thread at a time can enter 
                   {
                  r_prob[c] *= M_in[d] * 1.0 / (e * 1.0);
                  }
                break;
               }
              }
           }
         }
       }
     }
    }//where r_prob, M_in, left_side, right_side, c_vect and flag are array that are in input.