I try to write a Openmp based matrix multiplication code. The multiplication of matrix mm and matrix mmt is diagonal matrix and equal to one. I try normal calculation and Openmp. The normal result is correct, however the Openmp result is wrong. I think it should be relative to the Openmp utilization.
program main
implicit none
double precision,dimension(0:8,0:8)::MM,MMT,MTEMP
MM=reshape((/ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , &
-4 , -1 , -1 , -1 , -1 , 2 , 2 , 2 , 2 , &
4 , -2 , -2 , -2 , -2 , 1 , 1 , 1 , 1 , &
0 , 1 , 0 , -1 , 0 , 1 , -1 , -1 , 1 , &
0 , -2 , 0 , 2 , 0 , 1 , -1 , -1 , 1 , &
0 , 0 , 1 , 0 , -1 , 1 , 1 , -1 , -1 , &
0 , 0 , -2 , 0 , 2 , 1 , 1 , -1 , -1 , &
0 , 1 , -1 , 1 , -1 , 0 , 0 , 0 , 0 , &
0 , 0 , 0 , 0 , 0 , 1 , -1 , 1 , -1 /),shape(MM))
MMT=1.d0/36.d0 * reshape ((/ 4 , -4 , 4 , 0 , 0 , 0 , 0 , 0 , 0 , &
4 , -1 , -2 , 6 , -6 , 0 , 0 , 9 , 0 , &
4 , -1 , -2 , 0 , 0 , 6 , -6 , -9 , 0 , &
4 , -1 , -2 , -6 , 6 , 0 , 0 , 9 , 0 , &
4 , -1 , -2 , 0 , 0 , -6 , 6 , -9 , 0 , &
4 , 2 , 1 , 6 , 3 , 6 , 3 , 0 , 9 , &
4 , 2 , 1 , -6 , -3 , 6 , 3 , 0 , -9 , &
4 , 2 , 1 , -6 , -3 , -6 , -3 , 0 , 9 , &
4 , 2 , 1 , 6 , 3 , -6 , -3 , 0 , -9 /),shape(MMT))
!$OMP PARALLEL
call multi(mm,mmt,mtemp)
PRINT*,MTEMP
!$OMP END PARALLEL
endprogram main
subroutine multi(m1,m2,m3)
double precision,dimension(0:8,0:8)::m1,m2,m3
double precision,dimension(0:80)::mm1,mm2
DOUBLE PRECISION::TEMP
integer::i,j,k
!$OMP DO
do j=0,8
do i=0,8
mm1(j*9+i)=m1(i,j)
mm2(i*9+j)=m2(i,j)
enddo
enddo
!$OMP ENDDO
!$OMP DO PRIVATE(TEMP,I,J,K)
do j=0,8
do i=0,8
temp=0
do k=0,8
temp=temp+mm1(j*9+k)*mm2(i*9+k)
enddo
m3(i,j)=temp
enddo
enddo
!$OMP ENDDO
return
endsubroutine
I give the normal version below, and if you compute this, the result is diagnonal 1 matrix.
program main
implicit none
double precision,dimension(0:8,0:8)::MM,MMT,MTEMP
MM=reshape((/ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , &
-4 , -1 , -1 , -1 , -1 , 2 , 2 , 2 , 2 , &
4 , -2 , -2 , -2 , -2 , 1 , 1 , 1 , 1 , &
0 , 1 , 0 , -1 , 0 , 1 , -1 , -1 , 1 , &
0 , -2 , 0 , 2 , 0 , 1 , -1 , -1 , 1 , &
0 , 0 , 1 , 0 , -1 , 1 , 1 , -1 , -1 , &
0 , 0 , -2 , 0 , 2 , 1 , 1 , -1 , -1 , &
0 , 1 , -1 , 1 , -1 , 0 , 0 , 0 , 0 , &
0 , 0 , 0 , 0 , 0 , 1 , -1 , 1 , -1 /),shape(MM))
MMT=1.d0/36.d0 * reshape ((/ 4 , -4 , 4 , 0 , 0 , 0 , 0 , 0 , 0 , &
4 , -1 , -2 , 6 , -6 , 0 , 0 , 9 , 0 , &
4 , -1 , -2 , 0 , 0 , 6 , -6 , -9 , 0 , &
4 , -1 , -2 , -6 , 6 , 0 , 0 , 9 , 0 , &
4 , -1 , -2 , 0 , 0 , -6 , 6 , -9 , 0 , &
4 , 2 , 1 , 6 , 3 , 6 , 3 , 0 , 9 , &
4 , 2 , 1 , -6 , -3 , 6 , 3 , 0 , -9 , &
4 , 2 , 1 , -6 , -3 , -6 , -3 , 0 , 9 , &
4 , 2 , 1 , 6 , 3 , -6 , -3 , 0 , -9 /),shape(MMT))
call multi(mm,mmt,mtemp)
PRINT*,MTEMP
endprogram main
subroutine multi(m1,m2,m3)
double precision,dimension(0:8,0:8)::m1,m2,m3
double precision,dimension(0:80)::mm1,mm2
DOUBLE PRECISION::TEMP
integer::i,j,k
do j=0,8
do i=0,8
mm1(j*9+i)=m1(i,j)
mm2(i*9+j)=m2(i,j)
enddo
enddo
do j=0,8
do i=0,8
temp=0
do k=0,8
temp=temp+mm1(j*9+k)*mm2(i*9+k)
enddo
m3(i,j)=temp
enddo
enddo
return
endsubroutine
To solve your problem one solution is to place the parallel region inside the multi
subroutine. This code gives the same result as the serial one:
subroutine multi(m1,m2,m3)
double precision,dimension(0:8,0:8)::m1,m2,m3
double precision,dimension(0:80)::mm1,mm2
DOUBLE PRECISION::TEMP
integer::i,j,k
!$OMP PARALLEL
!$OMP DO
do j=0,8
do i=0,8
mm1(j*9+i)=m1(i,j)
mm2(i*9+j)=m2(i,j)
enddo
enddo
!$OMP ENDDO
!$OMP DO PRIVATE(TEMP,I,J,K)
do j=0,8
do i=0,8
temp=0
do k=0,8
temp=temp+mm1(j*9+k)*mm2(i*9+k)
enddo
m3(i,j)=temp
enddo
enddo
!$OMP ENDDO
!$OMP END PARALLEL
return
Note that the workload is very small, so it may not be faster than the serial code. Note also that if you do not need mm1
and mm2
arrays later then you do not have to calculate them:
subroutine multi(m1,m2,m3)
double precision,dimension(0:8,0:8)::m1,m2,m3
DOUBLE PRECISION::TEMP
integer::i,j,k
!$OMP PARALLEL
!$OMP DO PRIVATE(TEMP,I,J,K)
do j=0,8
do i=0,8
temp=0
do k=0,8
temp=temp+m1(k,j)*m2(i,k)
enddo
m3(i,j)=temp
enddo
enddo
!$OMP ENDDO
!$OMP END PARALLEL
return