Search code examples
fortranopenmpdo-whileconvergence

Replacing do-while loops as convergence query in Fortran OpenMP


I have build a program that uses Differential Evolution to optimize atom positions in regard to their pair-wise potential and now want to parallelize it with OpenMP to which I am quite new. The Differential Evolution uses an overall do-while loop in which a convergence query is used as the exit condition.

That means

  1. I know that I can not simply !$OMP PARALLEL DO the do-while loop
  2. I can not predict at what point the loop is terminated
  3. following iterations would also meet the condition. The following is my unparallelized code:

    implicit none
    integer, parameter :: dp = selected_real_kind(15, 307)
    real(kind = dp) :: sum_dif, sum, sum_old, final_toler, F, CR, d1, d2, d3, max_step, this_step, scale, randomtest
    integer :: pop, dim, arfa, beta, gama, delt, bi, jrand, kf, ki, kj, kg, dim_low, i, g, num_Ti, idum
    logical :: monitor_progress, history
    integer, dimension(0:) :: index
    real(kind=dp), dimension(0:,0:) :: depop, tempop 
    real(kind=dp), dimension(0:) :: best,temp_best, proj, d_fx, t_fx
    real(kind=dp) :: midpoint
    logical :: best_DE, use_maxstep
    integer*2 :: best_DE_num
    
    sum_dif = 0.0
    do while ((abs(sum_old - sum + sum_dif)) > final_toler) !UTILIZE DIFFERENTIAL EVOLUTION until convergence
        ! ↑ enclosing convergence loop
        sum_old = sum
        !**initialize DE-Parameters**
        if (best_DE) then
            F = 0.2_dp + 0.6_dp * ran2(idum)
            CR = 0.6_dp + 0.4_dp * ran2(idum)
        else
            F = 0.4_dp + 0.4_dp * ran2(idum)
            CR = 0.7_dp + 0.2_dp * ran2(idum)
        end if
        !******
        !** Create Mutant Population AND Perform Population Update**
        do i = 0, pop - 1
            do
                arfa = RNGgen(pop)
                if (arfa /= i) exit
            end do
            do
                beta = RNGgen(pop)
                if (beta /= arfa) then
                    if (beta /= i) exit
                end if
            end do
            do
                gama = RNGgen(pop)
                if (gama /= beta) then
                    if (gama /= arfa) then
                        if (gama /= i) exit
                    end if
                end if
            end do
            do
                delt = RNGgen(pop)
                if (delt /= gama) then
                    if (delt /= beta) then
                        if (delt /= arfa) then
                            if (delt /= i) exit !loops will be continued until arfa!=beta!=gama!=delt!=i
                        end if
                    end if
                end if
            end do
            jrand = RNGgen(dim + 1)/3 
            !**Create mutant population per atom not per dim
            kf = 0
    
            do while (kf < dim)
                randomtest = ran2(idum)
                if ((randomtest <= CR).or.(kf == jrand)) then 
                    !**Create hybrid child**
                d1 = F * (depop(arfa, kf) - depop(gama, kf) + best_DE_num * (depop(beta, kf) - depop(delt, kf)))
                d2 = F * (depop(arfa, kf + 1) - depop(gama, kf + 1) + best_DE_num * (depop(beta, kf + 1) - depop(delt, kf + 1)))
                d3 = F * (depop(arfa, kf + 2) - depop(gama, kf + 2) + best_DE_num * (depop(beta, kf + 2) - depop(delt, kf + 2)))
                    !******
                    if (use_maxstep) then
                        this_step = d1 * d1 + d2 * d2 + d3 * d3!norm^2
                        if (this_step > max_step) then
                            scale = sqrt(max_step/this_step)
                            d1 = d1 * scale
                            d2 = d2 * scale
                            d3 = d3 * scale
                        end if
                    end if !end if use_maxstep
                    tempop(i, kf) = best_DE_num * best(kf)+(1 - best_DE_num) * depop(beta, kf) + d1
                    tempop(i, kf + 1) = best_DE_num * best(kf + 1)+(1 - best_DE_num) * depop(beta, kf + 1) + d2
                    tempop(i, kf + 2) = best_DE_num * best(kf + 2)+(1 - best_DE_num) * depop(beta, kf + 2) + d3
                else
                    tempop(i, kf) = depop(i, kf)
                    tempop(i, kf + 1) = depop(i, kf + 1)
                    tempop(i, kf + 2) = depop(i, kf + 2)
                end if
                kf = kf + 3
            end do !end dim do loop
            !******
            call trans_to_cent(tempop(i,:), midpoint, midpoint, midpoint, 0, dim_low)
            !******
            !**Evaluate Objective Function for Mutant
            tempop(i, dim) = Analytic_U(num_Ti, tempop(i,:))
            t_fx(i) = tempop(i, dim) !store tempop fx values
            !******
        end do !end pop do loop
        do i = 0, pop - 1
            d_fx(i) = depop(i, dim) !store depop fx values
        end do
        !******
        !**SORTED MUTANT REPLACEMENT**
        index = Max_DelF(d_fx, t_fx)
        do kg = 0, pop - 1
            if (tempop(index(kg), dim) < depop(kg, dim)) then
                depop(kg,:) = tempop(index(kg),:)
            end if
            d_fx(kg) = depop(kg, dim) 
        end do
        !******
        !**Obtain total cost function values for tolerance comparison**
        sum = 0
        do ki = 0, pop - 1
            sum = sum + depop(ki, dim)
        end do
        sum = sum/pop !calculate average energy value            
        !******
        !**Obtain global best**
        do kj = 0, pop - 1
            if (best(dim) > depop(kj, dim)) then
                best = depop(kj,:)
                bi = kj
            end if !determine "best" vector
        end do
        !******
        if (monitor_progress) then
            print*, "Progress (min = 1.0): ", (abs(sum_old - sum + sum_dif))/final_toler
        end if
        g = g + 1 !increment iteration counter
    end do !end generation (while) loop
    

Here, the loop on the top is the one in question. The exit condition triggers when the energy difference from one iteration to the next one is below a certain threshold. The code includes several other do-loops inside of this one but they should be parallelizable without major problems.

My question now is: can the enclosing loop be parallelized without giving up most of the performance boost or will there be still a boost if I try to exclude it from the parallel region?

If I do that I also could exclude the generation of mutant population variables arfa, beta, gama, delt because their generation would have to be done with !$OMP CRITICAL with a considerable overhead anyway since they are randomly build with arfa!=beta!=gama!=delt!, right? I am using the random_number intrinsic with random seeds in my RNGgen function. My compiler is gfortran.


Solution

  • The example you provided was not complete: it did not compile. I decided to construct a small (complete) program that does something which I think is similar. I hope it helps!

    The program starts up a parallell session, in which new populations are found. Whenever a better population is found than the best so far, the best population is updated. The iteration stops when the global calculation spends too many iterations between consecutive improvements.

    In this program, every next population is constructed completely from scratch. In your program, there is a more advanced generation of the next population. favoring 'better' populations over 'worse' ones.

    In a naive parallallization, each thread will follow its own path through the search space, and it will not 'learn' from what the other threads have found out. To exchange search information between threads, you need to design a method and then program it. To do so seemed outside of the scope of this question.

    Here comes the program:

    program optimize_population
    use Population ! contains the target function
    use omp_lib
    implicit none
        integer, parameter :: really_long = 100000
        real(kind=dp) :: best, targetFun
        real(kind=dp) :: pop(2,npop), best_pop(2,npop)
        integer, allocatable :: iter(:)
        integer :: i, nt, it, ierr, last_improvement 
    
        call initTarget()  ! initialize the target function
    
        ! Allocate an iteration count for every thread
        nt = omp_get_max_threads()
        allocate(iter(nt), stat=ierr)
        if (ierr/=0) stop('allocation error')
        iter = 0
    
    
        best = -1e10
        last_improvement = 0
    
    !$OMP PARALLEL PRIVATE(it, pop, i, targetFun)
          it = omp_get_thread_num()+1  ! thread number
          do
              iter(it) = iter(it) + 1
    
              ! Create a new population
              do i = 1,npop
                 pop(1,i) = rand()
                 pop(2,i) = rand()
              end do
    
              ! Evaluate target function  
              targetFun = popFun(pop)
    
              if (targetFun>best) then
              ! If this is the best population so far,
              !    then register this
    
    !$OMP          CRITICAL
                      best_pop = pop
                      best = targetFun
                      print '(a,i0,a,i7,a,1p,e13.5)', '[',it,'] iteration ',sum(iter),' Best score until now: ',TargetFun
                      last_improvement = sum(iter) ! remember the global iteration count for the last time an improved population was found
    !$OMP          END CRITICAL
              end if
    
              ! Done when further improvement takes too long
              if (last_improvement < sum(iter) - really_long) exit
          end do
    !$OMP END PARALLEL
    
        ! Report the best population found
        targetFun = popFun(best_pop)
        print '(a,1p,e13.5)', 'Best score found: ',targetFun
        print '(a,1p,e13.5)', '    best population found:'
        do i = 1,npop
           print '(1p,10(a,e13.5))', '    (',best_pop(1,i),',',best_pop(2,i),')'
        end do
    
    end program  optimize_population
    

    The program needs the target-function, which is provided by the module Population, below:

    module Population
    integer, parameter :: npop  = 20, ncenter = 3
    integer, parameter :: dp = kind(1d0)
    real(kind=dp)      :: center(2,ncenter)
    contains
    
        subroutine initTarget()
        implicit none
           integer :: i
           do i = 1,ncenter
              center(1,i) = rand()
              center(2,i) = rand()
           end do
           print '(a,i0,a)', &
              'Looking for a population of ',npop,' points in the unit square,'
           print '(a,i0,a)', &
              'equally spread out in space, but clustered around the points'
           print '(1p,10(a,e13.5))', &
              '    (',center(1,1),',',center(2,1), &
              ('),    (',center(1,i),',',center(2,i), i=2,ncenter-1), &
              ')    and (',center(1,ncenter),',',center(2,ncenter),')'
        end subroutine initTarget
    
    
        function popFun(pop) result(targetFun)
        implicit none
            real(kind=dp), intent(in) :: pop(:,:)
            real(kind=dp) :: targetFun
    
            integer :: i,j
            real(kind=dp) :: sum_center, sum_dist
    
            sum_dist = 0
            sum_center = 0
            do i = 1,npop
               do j = i+1,npop
                  sum_dist   = sum_dist + (pop(1,i)-pop(1,j))**2 + (pop(2,i)-pop(2,j))**2
               end do
               do j = 1,ncenter
                  sum_center = sum_center + (pop(1,i)-center(1,j))**2 + (pop(2,i)-center(2,j))**2
               end do
            end do
    
            targetFun = sum_dist - sum_center
        end function popFun
    
    end module Population