Search code examples
parallel-processingfortranopenmphpc

The openmp matrix multiplication


I try to write a Openmp based matrix multiplication code. The multiplication of matrix mm and matrix mmt is diagonal matrix and equal to one. I try normal calculation and Openmp. The normal result is correct, however the Openmp result is wrong. I think it should be relative to the Openmp utilization.

program main
implicit none
double precision,dimension(0:8,0:8)::MM,MMT,MTEMP

MM=reshape((/ 1 ,   1   ,   1   ,   1   ,   1   ,   1   ,   1   ,   1   ,   1   ,   &
-4  ,   -1  ,   -1  ,   -1  ,   -1  ,   2   ,   2   ,   2   ,   2   ,   &
4   ,   -2  ,   -2  ,   -2  ,   -2  ,   1   ,   1   ,   1   ,   1   ,   &
0   ,   1   ,   0   ,   -1  ,   0   ,   1   ,   -1  ,   -1  ,   1   ,   &
0   ,   -2  ,   0   ,   2   ,   0   ,   1   ,   -1  ,   -1  ,   1   ,   &
0   ,   0   ,   1   ,   0   ,   -1  ,   1   ,   1   ,   -1  ,   -1  ,   &
0   ,   0   ,   -2  ,   0   ,   2   ,   1   ,   1   ,   -1  ,   -1  ,   &
0   ,   1   ,   -1  ,   1   ,   -1  ,   0   ,   0   ,   0   ,   0   ,   &
0   ,   0   ,   0   ,   0   ,   0   ,   1   ,   -1  ,   1   ,   -1  /),shape(MM))



MMT=1.d0/36.d0 * reshape  ((/ 4 ,   -4  ,   4   ,   0   ,   0   ,   0   ,   0   ,   0   ,   0   ,   &
                4   ,   -1  ,   -2  ,   6   ,   -6  ,   0   ,   0   ,   9   ,   0   ,   &
                4   ,   -1  ,   -2  ,   0   ,   0   ,   6   ,   -6  ,   -9  ,   0   ,   &
                4   ,   -1  ,   -2  ,   -6  ,   6   ,   0   ,   0   ,   9   ,   0   ,   &
                4   ,   -1  ,   -2  ,   0   ,   0   ,   -6  ,   6   ,   -9  ,   0   ,   &
                4   ,   2   ,   1   ,   6   ,   3   ,   6   ,   3   ,   0   ,   9   ,   &
                4   ,   2   ,   1   ,   -6  ,   -3  ,   6   ,   3   ,   0   ,   -9  ,   &
                4   ,   2   ,   1   ,   -6  ,   -3  ,   -6  ,   -3  ,   0   ,   9   ,   &
                4   ,   2   ,   1   ,   6   ,   3   ,   -6  ,   -3  ,   0   ,   -9  /),shape(MMT))

!$OMP PARALLEL
          call multi(mm,mmt,mtemp)

                PRINT*,MTEMP
!$OMP END PARALLEL


endprogram main

subroutine multi(m1,m2,m3)
double precision,dimension(0:8,0:8)::m1,m2,m3
double precision,dimension(0:80)::mm1,mm2
DOUBLE PRECISION::TEMP
integer::i,j,k
!$OMP DO
do j=0,8
    do i=0,8
        mm1(j*9+i)=m1(i,j)
        mm2(i*9+j)=m2(i,j)
    enddo
enddo
!$OMP ENDDO
!$OMP DO PRIVATE(TEMP,I,J,K)
do j=0,8
    do i=0,8
        temp=0
        do k=0,8
            temp=temp+mm1(j*9+k)*mm2(i*9+k)
        enddo
        m3(i,j)=temp
    enddo
enddo
!$OMP ENDDO
return


endsubroutine

I give the normal version below, and if you compute this, the result is diagnonal 1 matrix.

program main
implicit none
double precision,dimension(0:8,0:8)::MM,MMT,MTEMP

MM=reshape((/ 1 ,   1   ,   1   ,   1   ,   1   ,   1   ,   1   ,   1   ,   1   ,   &
-4  ,   -1  ,   -1  ,   -1  ,   -1  ,   2   ,   2   ,   2   ,   2   ,   &
4   ,   -2  ,   -2  ,   -2  ,   -2  ,   1   ,   1   ,   1   ,   1   ,   &
0   ,   1   ,   0   ,   -1  ,   0   ,   1   ,   -1  ,   -1  ,   1   ,   &
0   ,   -2  ,   0   ,   2   ,   0   ,   1   ,   -1  ,   -1  ,   1   ,   &
0   ,   0   ,   1   ,   0   ,   -1  ,   1   ,   1   ,   -1  ,   -1  ,   &
0   ,   0   ,   -2  ,   0   ,   2   ,   1   ,   1   ,   -1  ,   -1  ,   &
0   ,   1   ,   -1  ,   1   ,   -1  ,   0   ,   0   ,   0   ,   0   ,   &
0   ,   0   ,   0   ,   0   ,   0   ,   1   ,   -1  ,   1   ,   -1  /),shape(MM))



MMT=1.d0/36.d0 * reshape  ((/ 4 ,   -4  ,   4   ,   0   ,   0   ,   0   ,   0   ,   0   ,   0   ,   &
                4   ,   -1  ,   -2  ,   6   ,   -6  ,   0   ,   0   ,   9   ,   0   ,   &
                4   ,   -1  ,   -2  ,   0   ,   0   ,   6   ,   -6  ,   -9  ,   0   ,   &
                4   ,   -1  ,   -2  ,   -6  ,   6   ,   0   ,   0   ,   9   ,   0   ,   &
                4   ,   -1  ,   -2  ,   0   ,   0   ,   -6  ,   6   ,   -9  ,   0   ,   &
                4   ,   2   ,   1   ,   6   ,   3   ,   6   ,   3   ,   0   ,   9   ,   &
                4   ,   2   ,   1   ,   -6  ,   -3  ,   6   ,   3   ,   0   ,   -9  ,   &
                4   ,   2   ,   1   ,   -6  ,   -3  ,   -6  ,   -3  ,   0   ,   9   ,   &
                4   ,   2   ,   1   ,   6   ,   3   ,   -6  ,   -3  ,   0   ,   -9  /),shape(MMT))


                call multi(mm,mmt,mtemp)

                PRINT*,MTEMP



endprogram main

subroutine multi(m1,m2,m3)
double precision,dimension(0:8,0:8)::m1,m2,m3
double precision,dimension(0:80)::mm1,mm2
DOUBLE PRECISION::TEMP
integer::i,j,k

do j=0,8
    do i=0,8
        mm1(j*9+i)=m1(i,j)
        mm2(i*9+j)=m2(i,j)
    enddo
enddo


do j=0,8
    do i=0,8
        temp=0
        do k=0,8
            temp=temp+mm1(j*9+k)*mm2(i*9+k)
        enddo
        m3(i,j)=temp
    enddo
enddo

return


  endsubroutine

Solution

  • To solve your problem one solution is to place the parallel region inside the multi subroutine. This code gives the same result as the serial one:

    subroutine multi(m1,m2,m3)
    double precision,dimension(0:8,0:8)::m1,m2,m3
    double precision,dimension(0:80)::mm1,mm2
    DOUBLE PRECISION::TEMP
    integer::i,j,k
    !$OMP PARALLEL
        !$OMP DO
        do j=0,8
            do i=0,8
                mm1(j*9+i)=m1(i,j)
                mm2(i*9+j)=m2(i,j)
            enddo
        enddo
        !$OMP ENDDO
        !$OMP DO PRIVATE(TEMP,I,J,K)
        do j=0,8
            do i=0,8
                temp=0
                do k=0,8
                    temp=temp+mm1(j*9+k)*mm2(i*9+k)
                enddo
                m3(i,j)=temp
            enddo
        enddo
        !$OMP ENDDO
    !$OMP END PARALLEL
    return
    

    Note that the workload is very small, so it may not be faster than the serial code. Note also that if you do not need mm1 and mm2 arrays later then you do not have to calculate them:

    subroutine multi(m1,m2,m3)
    double precision,dimension(0:8,0:8)::m1,m2,m3
    DOUBLE PRECISION::TEMP
    integer::i,j,k
    !$OMP PARALLEL
        !$OMP DO PRIVATE(TEMP,I,J,K)
        do j=0,8
            do i=0,8
                temp=0
                do k=0,8
                    temp=temp+m1(k,j)*m2(i,k)
                enddo
                m3(i,j)=temp
            enddo
        enddo
        !$OMP ENDDO
    !$OMP END PARALLEL
    return