The following code compiles in gfortran, with a warning about large_array
being larger than the limit for a stack variable, stating that the array will be moved to static memory and is therefore not threadsafe:
subroutine stack_size_warning
implicit none
real :: large_array(65536)
print *, large_array
end subroutine stack_size_warning
This subroutine however compiles with no errors or warnings, and I can call it with n values larger than 65536 without issue, at least in simple cases.
subroutine no_warning(n)
implicit none
integer :: n
real :: automatic_array(n)
print *, automatic_array
end subroutine no_warning
Is this second array threadsafe? Where is the memory allocated for automatic_array
in this second subroutine? Is the memory allocated and deallocated on every call making it slower than if it was on the stack or if a preallocated array was passed in as a dummy argument?
I wrote the following program to test 3 scenarios, a subroutine with a small array on the stack, another with a large array over the stack limit and thus stored in static memory, and a third where a dummy argument specifies the size of an array defined inside the routine.
Here is that program:
program main
implicit none
call small
call large
call automatic(65536)
end program main
subroutine small
implicit none
real :: small_array(10)
small_array=1.
print *, small_array
end subroutine small
subroutine large
implicit none
real :: large_array(65536)
large_array=1.
print *, large_array
end subroutine large
subroutine automatic(n)
implicit none
integer :: n
real :: automatic_array(n)
automatic_array=1.
print *, automatic_array
end subroutine automatic
Using steve
's recommendation I compiled with a tree dump as follows:
gfortran array_dim_test.f90 -o array_dim_test -fdump-tree-original
The full dump is at the end, but to summarize what I see, the automatic
subroutine has a try/finally block. In the try block, a call to malloc allocates the memory, and in the finally block, the memory is freed. So I guess this memory is allocated and deallocated on the heap with every call to the subroutine. This intuitively makes sense as how else would the program know what to do with this array that lives only in the subroutine, and whose size is defined in a call to the subroutine, but it is interesting to see the explicit calls in the tree dump. This would appear to be thread-safe then, but perhaps also not the most efficient thing to do if this routine is called many times with the same array size parameter, allocating and deallocating memory with every call.
Here is the tree dump:
__attribute__((fn spec (". w ")))
void automatic (integer(kind=4) & restrict n)
{
void * restrict D.3964;
integer(kind=8) ubound.0;
integer(kind=8) size.1;
real(kind=4)[0:D.3961] * restrict automatic_array;
integer(kind=8) D.3961;
bitsizetype D.3962;
sizetype D.3963;
try
{
ubound.0 = (integer(kind=8)) *n;
size.1 = NON_LVALUE_EXPR <ubound.0>;
size.1 = MAX_EXPR <size.1, 0>;
D.3961 = size.1 + -1;
D.3962 = (bitsizetype) (sizetype) NON_LVALUE_EXPR <size.1> * 32;
D.3963 = (sizetype) NON_LVALUE_EXPR <size.1> * 4;
D.3964 = (void * restrict) __builtin_malloc (MAX_EXPR <(unsigned long) (size.1 * 4), 1>);
automatic_array = (real(kind=4)[0:D.3961] * restrict) D.3964;
{
integer(kind=8) D.3940;
D.3940 = ubound.0;
{
integer(kind=8) S.2;
S.2 = 1;
while (1)
{
if (S.2 > D.3940) goto L.1;
(*automatic_array)[S.2 + -1] = 1.0e+0;
S.2 = S.2 + 1;
}
L.1:;
}
}
{
struct __st_parameter_dt dt_parm.3;
dt_parm.3.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.3.common.line = 27;
dt_parm.3.common.flags = 128;
dt_parm.3.common.unit = 6;
_gfortran_st_write (&dt_parm.3);
{
integer(kind=8) D.3944;
struct array01_real(kind=4) parm.4;
D.3944 = ubound.0;
parm.4.span = 4;
parm.4.dtype = {.elem_len=4, .rank=1, .type=3};
parm.4.dim[0].lbound = 1;
parm.4.dim[0].ubound = D.3944;
parm.4.dim[0].stride = 1;
parm.4.data = (void *) &(*automatic_array)[0];
parm.4.offset = -1;
_gfortran_transfer_array_write (&dt_parm.3, &parm.4, 4, 0);
}
_gfortran_st_write_done (&dt_parm.3);
}
}
finally
{
__builtin_free ((void *) automatic_array);
}
}
__attribute__((fn spec (". ")))
void large ()
{
static real(kind=4) large_array[65536];
{
integer(kind=8) S.5;
S.5 = 1;
while (1)
{
if (S.5 > 65536) goto L.2;
large_array[S.5 + -1] = 1.0e+0;
S.5 = S.5 + 1;
}
L.2:;
}
{
struct __st_parameter_dt dt_parm.6;
dt_parm.6.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.6.common.line = 19;
dt_parm.6.common.flags = 128;
dt_parm.6.common.unit = 6;
_gfortran_st_write (&dt_parm.6);
{
struct array01_real(kind=4) parm.7;
parm.7.span = 4;
parm.7.dtype = {.elem_len=4, .rank=1, .type=3};
parm.7.dim[0].lbound = 1;
parm.7.dim[0].ubound = 65536;
parm.7.dim[0].stride = 1;
parm.7.data = (void *) &large_array[0];
parm.7.offset = -1;
_gfortran_transfer_array_write (&dt_parm.6, &parm.7, 4, 0);
}
_gfortran_st_write_done (&dt_parm.6);
}
}
__attribute__((fn spec (". ")))
void small ()
{
real(kind=4) small_array[10];
{
integer(kind=8) S.8;
S.8 = 1;
while (1)
{
if (S.8 > 10) goto L.3;
small_array[S.8 + -1] = 1.0e+0;
S.8 = S.8 + 1;
}
L.3:;
}
{
struct __st_parameter_dt dt_parm.9;
dt_parm.9.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.9.common.line = 12;
dt_parm.9.common.flags = 128;
dt_parm.9.common.unit = 6;
_gfortran_st_write (&dt_parm.9);
{
struct array01_real(kind=4) parm.10;
parm.10.span = 4;
parm.10.dtype = {.elem_len=4, .rank=1, .type=3};
parm.10.dim[0].lbound = 1;
parm.10.dim[0].ubound = 10;
parm.10.dim[0].stride = 1;
parm.10.data = (void *) &small_array[0];
parm.10.offset = -1;
_gfortran_transfer_array_write (&dt_parm.9, &parm.10, 4, 0);
}
_gfortran_st_write_done (&dt_parm.9);
}
}
__attribute__((fn spec (". ")))
void MAIN__ ()
{
small ();
large ();
{
static integer(kind=4) C.3993 = 65536;
automatic (&C.3993);
}
}
__attribute__((externally_visible))
integer(kind=4) main (integer(kind=4) argc, character(kind=1) * * argv)
{
static integer(kind=4) options.11[7] = {2116, 4095, 0, 1, 1, 0, 31};
_gfortran_set_args (argc, argv);
_gfortran_set_options (7, &options.11[0]);
MAIN__ ();
return 0;
}