In a parallel loop, I process a shared array using a subroutine to which I pass the array and the current private-do index as parameters, but the program crashes with an array out-of-bounds error. How to correctly call a subroutine to process a shared array and pass the parallel loop index to it?
Code in the main.F:
PROGRAM TESTER
USE OMP_LIB
USE PRINTER
INTEGER, PARAMETER:: N = 5
REAL*4,DIMENSION(:),ALLOCATABLE, SAVE :: ARG_1, ARG_2
REAL*4,DIMENSION(:),ALLOCATABLE:: RES
C=======================================================================
C$OMP THREADPRIVATE(ARG_1, ARG_2)
C=======================================================================
ALLOCATE(RES(N))
PRINT *,'MAIN: "RES" IS ALLOCATED = ',
> ALLOCATED(RES)
C$OMP PARALLEL PRIVATE(I) SHARED(RES) NUM_THREADS(2)
ALLOCATE(ARG_1(N))
PRINT *,'MAIN: "ARG_1" IS ALLOCATED = ',
> ALLOCATED(ARG_1)
ALLOCATE(ARG_2(N))
PRINT *,'MAIN: "ARG_2" IS ALLOCATED = ',
> ALLOCATED(ARG_2)
C Step 1:Initialize working arrays:
CALL WORK1(ARG_1,N, ARG_2,N)
CALL WORK2(ARG_1,N, ARG_2,N)
C Step 2: Print working arrays:
CALL PRINT_ARR(ARG_1,N)
CALL PRINT_ARR(ARG_2,N)
PRINT *,'===================================='
C Step 3: Parallel Loop:
c-----------------------------------------------------------------------
C$OMP DO
DO I=1,N
CALL WORK3(RES,I,ARG_1(I),ARG_2(I))
ENDDO
C$OMP END DO
CALL PRINT_ARR(RES,N)
c-----------------------------------------------------------------------
C$OMP END PARALLEL
DEALLOCATE(ARG_1,ARG_2)
DEALLOCATE(RES)
END PROGRAM TESTER
Code of work.F file:
SUBROUTINE WORK1(ARG_ARR_1,DIM_1,ARG_ARR_2,DIM_2)
INTEGER DIM_1, DIM_2,I,J
REAL*4 ARG_ARR_2(DIM_2)
REAL*4 ARG_ARR_1(DIM_1)
REAL*4 ARG1, ARG2
REAL*4,DIMENSION(:),ALLOCATABLE:: ARG_ARR_3
SAVE
c-----------------------------------------------------------------------
C$OMP THREADPRIVATE (I)
c-----------------------------------------------------------------------
DO I=1,DIM_1
ARG_ARR_1(I)= 1.0
ENDDO
RETURN
ENTRY WORK2 (ARG_ARR_1,DIM_1,ARG_ARR_2,DIM_2)
DO I=1,DIM_2
ARG_ARR_2(I)= 2.0
ENDDO
RETURN
ENTRY WORK3 (ARG_ARR_3,J,ARG1,ARG2)
ARG_ARR_3(J)= ARG1+ARG2
RETURN
END SUBROUTINE WORK1
And module.f code:
MODULE PRINTER
CONTAINS
SUBROUTINE PRINT_ARR(ARR_VAR,SIZE)
REAL*4,DIMENSION(:),ALLOCATABLE:: ARR_VAR
INTEGER SIZE
INTEGER,SAVE:: J
c-----------------------------------------------------------------------
C$OMP THREADPRIVATE(J)
c-----------------------------------------------------------------------
DO J=1,SIZE
PRINT *,'ARR_VAR(',J,')=',ARR_VAR(J)
ENDDO
FLUSH(6)
END SUBROUTINE PRINT_ARR
END MODULE PRINTER
My compile and run commands:
gfortran -fopenmp -O0 -g -fcheck=all -fbacktrace -c module1.f work.F main.F
gfortran -fopenmp *.o -o a.x
./a.x
My output:
MAIN: "RES" IS ALLOCATED = T
MAIN: "ARG_1" IS ALLOCATED = T
MAIN: "ARG_2" IS ALLOCATED = T
ARR_VAR( 1 )= 1.00000000
ARR_VAR( 2 )= 1.00000000
ARR_VAR( 3 )= 1.00000000
ARR_VAR( 4 )= 1.00000000
ARR_VAR( 5 )= 1.00000000
ARR_VAR( 1 )= 2.00000000
ARR_VAR( 2 )= 2.00000000
ARR_VAR( 3 )= 2.00000000
ARR_VAR( 4 )= 2.00000000
ARR_VAR( 5 )= 2.00000000
====================================
MAIN: "ARG_1" IS ALLOCATED = T
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
MAIN: "ARG_2" IS ALLOCATED = T
ARR_VAR( 1 )= 1.00000000
ARR_VAR( 2 )= 1.00000000
ARR_VAR( 3 )= 1.00000000
ARR_VAR( 4 )= 1.00000000
ARR_VAR( 5 )= 1.00000000
ARR_VAR( 1 )= 2.00000000
ARR_VAR( 2 )= 2.00000000
ARR_VAR( 3 )= 2.00000000
ARR_VAR( 4 )= 2.00000000
ARR_VAR( 5 )= 2.00000000
====================================
At line 21 of file work.F
Fortran runtime error: Index '4' of dimension 1 of array 'arg_arr_3' above upper bound of 2
Error termination. Backtrace:
#0 0x7f1e90ed3ad0 in ???
#1 0x7f1e90ed2c35 in ???
#2 0x7f1e90c8051f in ???
at ./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0
#3 0x55d38d47e43d in master.0.work1
at .../work.F:21
#4 0x55d38d47e04f in work3_
at .../work.F:20
#5 0x55d38d47dae6 in MAIN__._omp_fn.0
at .../main.F:44
#6 0x7f1e90e7aa15 in ???
#7 0x55d38d47d45b in tester
at .../main.F:18
#8 0x55d38d47d58d in main
at .../main.F:2
Segmentation fault (core dumped)
I use gfortran: gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04)
Thank you very much for your answer and comments, @PierU and @IanBush ! I apologize for the long response!
I would like to supplement my words regarding keywords threadprivate
and save
:
SAVE
statement, without a list of variables and acting for all locals variables in scope. In my example here I simulate this environment and for critical variables such as loop counters/array iterators I am forced to use threadprivate
statement.SAVE
+ THREADPRIVATE
variables lists) should be used as rarely as possible. In my target code, these subroutines are called inside a large OpenMP loop and mark explicitly some their local variables as private
is impossible.entry
: it is part of legacy environment and I would like to place all code in modules and avoid a lot of problems, but now I don’t have the time for that.As a result, I was able to make my own solution, which I present below. The point is in the correct description of shared variables: array Y must be shared (as a module variable).
main.F:
PROGRAM TESTER
USE OMP_LIB
USE PRINTER
REAL*4,DIMENSION(:),ALLOCATABLE, SAVE :: ARG_1, ARG_2
REAL*4,DIMENSION(:),ALLOCATABLE:: RES
C=======================================================================
C$OMP THREADPRIVATE(ARG_1, ARG_2)
C=======================================================================
ALLOCATE(RES(N))
ALLOCATE(Y(N))
PRINT *,'MAIN: "RES" IS ALLOCATED = ',
> ALLOCATED(RES)
c-----------------------------------------------------------------------
C$OMP PARALLEL PRIVATE(I) SHARED(Y) NUM_THREADS(2)
c-----------------------------------------------------------------------
ALLOCATE(ARG_1(N))
PRINT *,'MAIN: "ARG_1" IS ALLOCATED = ',
> ALLOCATED(ARG_1)
ALLOCATE(ARG_2(N))
PRINT *,'MAIN: "ARG_2" IS ALLOCATED = ',
> ALLOCATED(ARG_2)
C Initialize working arrays:
CALL WORK1(ARG_1,N, ARG_2,N)
CALL WORK2(ARG_1,N, ARG_2,N)
C Step 1: Print working arrays:
CALL PRINT_ARR(ARG_1,N)
CALL PRINT_ARR(ARG_2,N)
PRINT *,'===================================='
FLUSH(6)
C Step 2: Parallel Loop:
c-----------------------------------------------------------------------
C$OMP DO
DO I=1,N
c RES(I)=ARG_1(I) + ARG_2(I)
CALL WORK3(I,ARG_1(I),ARG_2(I))
ENDDO
C$OMP END DO
CALL PRINT_ARR(Y,N)
DEALLOCATE(ARG_1,ARG_2)
c-----------------------------------------------------------------------
C$OMP END PARALLEL
c-----------------------------------------------------------------------
DEALLOCATE(RES)
END PROGRAM TESTER
work.F:
SUBROUTINE WORK1(ARG1_W1,DIM_1,ARG2_W2,DIM_2)
USE PRINTER
c------ Input arguments: -----------------------------------------------
INTEGER DIM_1, DIM_2, J
REAL*4 ARG2_W2(DIM_2)
REAL*4 ARG1_W1(DIM_1)
c dummy arguments for WORK3:
REAL*4 ARG1_W3, ARG2_W3
c------ Locals: --------------------------------------------------------
INTEGER I
SAVE I
c------ OpenMP spells: -------------------------------------------------
c$OMP THREADPRIVATE (I)
c-----------------------------------------------------------------------
DO I=1,DIM_1
ARG1_W1(I) = 1.0
ENDDO
RETURN
ENTRY WORK2 (ARG1_W1,DIM_1,ARG2_W2,DIM_2)
DO I=1,DIM_2
ARG2_W2(I) = 2.0
ENDDO
RETURN
ENTRY WORK3 (J,ARG1_W3,ARG2_W3)
Y(J)= ARG1_W3 + ARG2_W3
RETURN
END SUBROUTINE WORK1
module1.f:
MODULE PRINTER
INTEGER, PARAMETER:: N = 5
c NB: array Y is shared!
REAL*4,DIMENSION(:),ALLOCATABLE::Y
CONTAINS
SUBROUTINE PRINT_ARR(ARR_VAR,SIZE)
REAL*4,DIMENSION(:),ALLOCATABLE:: ARR_VAR
INTEGER SIZE
INTEGER,SAVE:: J
c------ OpenMP spells: -------------------------------------------------
c$OMP THREADPRIVATE(J)
c-----------------------------------------------------------------------
DO J=1,SIZE
PRINT *,'ARR_VAR(',J,')=',ARR_VAR(J)
ENDDO
FLUSH(6)
END SUBROUTINE PRINT_ARR
END MODULE PRINTER
My compilation and run commands:
sudo rm -R -f {*.o,*.x,*.mod}
gfortran -fopenmp -O0 -g -fcheck=all -fbacktrace -c module1.f work.F main.F
gfortran -fopenmp *.o -o a.x
./a.x
My output (there may be some disorder in the output due to multiple threads):
MAIN: "RES" IS ALLOCATED = T
MAIN: "ARG_1" IS ALLOCATED = T
MAIN: "ARG_2" IS ALLOCATED = T
ARR_VAR( 1 )= 1.00000000
ARR_VAR( 2 )= 1.00000000
ARR_VAR( 3 )= 1.00000000
ARR_VAR( 4 )= 1.00000000
ARR_VAR( 5 )= 1.00000000
ARR_VAR( 1 )= 2.00000000
ARR_VAR( 2 )= 2.00000000
ARR_VAR( 3 )= 2.00000000
ARR_VAR( 4 )= 2.00000000
ARR_VAR( 5 )= 2.00000000
====================================
MAIN: "ARG_1" IS ALLOCATED = T
MAIN: "ARG_2" IS ALLOCATED = T
ARR_VAR( 1 )= 1.00000000
ARR_VAR( 2 )= 1.00000000
ARR_VAR( 3 )= 1.00000000
ARR_VAR( 4 )= 1.00000000
ARR_VAR( 5 )= 1.00000000
ARR_VAR( 1 )= 2.00000000
ARR_VAR( 2 )= 2.00000000
ARR_VAR( 3 )= 2.00000000
ARR_VAR( 4 )= 2.00000000
ARR_VAR( 5 )= 2.00000000
====================================
ARR_VAR( 1 )= 3.00000000
ARR_VAR( 2 )= 3.00000000
ARR_VAR( 3 )= 3.00000000
ARR_VAR( 4 )= 3.00000000
ARR_VAR( 5 )= 3.00000000
ARR_VAR( 1 )= 3.00000000
ARR_VAR( 2 )= 3.00000000
ARR_VAR( 3 )= 3.00000000
ARR_VAR( 4 )= 3.00000000
ARR_VAR( 5 )= 3.00000000