Search code examples
c++optimizationopenmpcplex

Is it possible to optimize my code for openmp?


I don't have a deep understandin of how it works. I've been able to paralellize the following portion of code:

```
    float opt = 0;
    int cont = 0;
   #pragma omp parallel for schedule(dynamic)
    for(int p=0; p < S; p=p+4){
     //cout << p << endl;

        IloEnv env;
        IloModel mod(env);
        IloCplex cplex(mod);

        IloArray<IloNumVarArray> C(env, T);
        IloArray<IloArray<IloNumVarArray>> X(env, N);
        IloArray<IloArray<IloNumVarArray>> B(env, N);
        IloArray<IloArray<IloNumVarArray>> V(env, N);


      // cout << "1" << endl;
        for(int j = 0; j < T; j++)
            C[j] = IloNumVarArray(env, 4, 0, 1, ILOINT);

      //cout << "2" << endl;
        for(int i= 0; i < N; i++){
            X[i] = IloArray<IloNumVarArray>(env, T+1);
            B[i] = IloArray<IloNumVarArray>(env, T+1);
            V[i] = IloArray<IloNumVarArray>(env, T+1);

            for(int j= 0; j <= T; j++){
                X[i][j] = IloNumVarArray(env, 4);
                B[i][j] = IloNumVarArray(env, 4);
                V[i][j] = IloNumVarArray(env, 4);

                for(int k=0; k < 4; k++){
                    X[i][j][k] = IloNumVar(env, 0, IloInfinity, ILOFLOAT);
                    B[i][j][k] = IloNumVar(env, 0, IloInfinity, ILOFLOAT);
                    V[i][j][k] = IloNumVar(env, 0, IloInfinity, ILOFLOAT);

                }

            }

        }


        //===================================================
        // Maximize P[i][j][k]*X[i][j][k]
        //====================================================
      //cout << "3"<< endl;
        IloExpr fo(env);
        IloExpr penalty1(env);
        IloExpr penalty2(env);
        for(int i=0; i < N; i++){
            for(int k=0; k < 4; k++){
                fo += float(1.0/S)*P[i][T][p+k]*X[i][T][k];

            }

            if(iteration != 0){



                 for(int k = 0; k < 4; k++){
                    for(int j = 0; j < T-2; j++){


                        if(lambda1[i][j][p+k] !=0)
                        penalty1 += lambda1[i][j][p+k]*(X[i][j][0] - X_at[i][j][p+k]);

                        penalty1 += (1/2)*rho*(X[i][j][0]  - 2*X[i][j][0]*X_at[i][j][p+k] - X_at[i][j][p+k]*X_at[i][j][p+k]);


                    }

                }

            }

            else{

                penalty1 += 0;
               // penalty2 += 0;

            }


        }

        IloAdd(mod, IloMaximize(env, fo - penalty1));
        penalty1.end();
        fo.end();


        //========================================================
        //  sum X[i][0][k]*P[i][0][k] forall i=1,...,N, k=1,...,S
        //========================================================
      //  cout << "4" << endl;

        for(int k=0; k < 4; k++){
            IloExpr constraint1(env);
            for(int i=0; i <N; i++){
                constraint1 += P[i][0][p+k]*X[i][0][k];

                }
            mod.add(constraint1 == Q);
            constraint1.end();

        }


        //======================================================================================
        // X[i][j][k] = X[i][j-1][k] + B[i][j][k] - V[i][j][k] for i=1,...,N,j=1,...,T,,k=1,...S
        //======================================================================================
      //  cout << "5" << endl;
        for(int i=0; i < N; i++){

            for(int j=0; j <= T; j++){

                for(int k=0; k < 4; k++){
                    IloExpr constraint2(env);

                    if(j==0){

                        constraint2 += X[i][j][k] - B[i][j][k];
                        mod.add(V[i][j][k] == 0);
                    }



                    else{

                        constraint2 += X[i][j][k] - X[i][j-1][k] - B[i][j][k] + V[i][j][k];
                    }

                    mod.add(constraint2 == 0);
                    constraint2.end();

                }

            }

        }

        //=========================================================================================
        // sum P[i][j][k]V[i][j][k] - sum P[i][j][k]B[i][j][k] + ft = lt forall j=1,...,T, k=1,...,S
        //=========================================================================================
     // cout << "6" << endl;
        for(int j = 1; j <= T; j++){


            for(int k = 0; k < 4; k++){

                IloExpr constraint3_1(env);
                IloExpr constraint3_2(env);
                for(int i = 0; i < N; i++){

                    constraint3_1 += P[i][j][p+k]*B[i][j][k];
                    constraint3_2 += P[i][j][p+k]*V[i][j][k];
                }
                mod.add(constraint3_2 - constraint3_1 + ft[j-1] == lt[j-1]);
                constraint3_1.end();
                constraint3_2.end();

            }

        }

        //============================================================================================
        // X[i][j][k]*P[i][j][k] < pi * sum X[i][j][k]*P[i][j][k] forall i=1,...,N,j=1,...T, k=1,...,S
        // ===========================================================================================
      // cout << "7" << endl;
        for(int i = 0; i < N; i++){

            for(int j = 1; j <= T; j++){

                for(int k = 0; k < 4; k++){
                    IloExpr constraint4(env);
                    for(int m = 0; m < N; m++){
                        constraint4 += X[m][j][k]*P[m][j][p+k];
                    }
                    mod.add(X[i][j][k]*P[i][j][p+k] <= pi[i]*constraint4);
                    constraint4.end();

                }


            }

        }

        //==================================================================
        //K(Lt - Ft) - sum P[i][j][k]*X[i][j][k] <= MC[j][k] forall j=1,...,T, k=1,...,S
        //==================================================================
       // cout << "8" << endl;

        for(int j = 1; j <= T; j++){

            for(int k = 0; k < 4; k++){
                IloExpr constraint5(env);
                for(int i = 0; i < N; i++){
                    constraint5 += X[i][j][k]*P[i][j][p+k];
                }

                mod.add(K*(Lt[j-1] - Ft[j-1]) - constraint5 - M*C[j-1][k] <= 0);
                constraint5.end();

            }

        }

        //============================================================
        // sum C[j][k] <= 2 forall j=1,...,T-2, k=1,...,S
        //=============================================================
       // cout << "9" << endl;

        for(int j=0; j < T-2; j++){

            for(int k=0; k < 4; k++){

                IloExpr constraint6(env);
                for(int t = 0; t <= 2; t++){
                    constraint6 += C[j+t][k];
                }

                mod.add(constraint6 <= 2);
                constraint6.end();

            }

        }


for(int i=0; i < N; i++){

    for(int j=0; j < T; j++){

        if(j == T-1){
            mod.add(X[i][j][0] - X[i][j][1] == 0);
            mod.add(X[i][j][2] - X[i][j][3] == 0);
        }

        else{
        mod.add(X[i][j][0] - X[i][j][1] == 0);
        mod.add(X[i][j][1] - X[i][j][2] == 0);
        mod.add(X[i][j][2] - X[i][j][3] == 0);
        }
    }
}

        IloTimer crono(env);// Variável para coletar o tempo
        //cplex.setParam(IloCplex::Param::Benders::Strategy, 3);
        crono.start();
        cplex.setWarning(env.getNullStream());
        cplex.setOut(env.getNullStream()); // Eliminar os logs do solver
       // cout << "***" << endl;
        cplex.solve();
        opt = opt + cplex.getObjValue();
        crono.stop();



      // cout << "10" << endl;

        //a solucao da variavel de decisao X e colocada em matriz
        for(int i=0; i < N; i++){

            for(int j=0; j <= T; j++){
                X1[i][j][p] = cplex.getValue(X[i][j][0]);
                X1[i][j][p+1] = cplex.getValue(X[i][j][1]);
                X1[i][j][p+2] = cplex.getValue(X[i][j][2]);
                X1[i][j][p+3] = cplex.getValue(X[i][j][3]);
            }

        }

        /*
        for(int j=0; j <= T; j++){

            output1 << cplex.getValue(X[0][j][0]) << '\t';
            output2 << cplex.getValue(X[1][j][0]) << '\t';

        }*/
        cont = cont + 1;
        //cout << "!" << omp_in_parallel() << endl;
        output1 << endl;
        output2 << endl;
        env.end();



    }```

Using 3 threads, the code runs approximatelly 46% faster. Is there anything I can change on my code to get a speedup ? I've also noticed that increasing the number of threads doesn't make any difference.


Solution

  • In your code you solve multiple optimization problems in parallel. This is fine but there is a catch: By default a single CPLEX instance will use as many threads as your machine has cores and will try to keep them busy. So if you solve N models in parallel, then on each core N instances of CPLEX will compete for computing resources. This is usually not a good idea since CPLEX processes are more or less completely CPU bound.

    One option to speed up your code would be to avoid that multiple CPLEX threads run on the same core. For example, if you have 12 cores and run 3 solves in parallel, then make sure that each of these solves uses only 4 threads.

    In order to limit the number of threads for a single solve use parameter IloCplex::Param::Threads.