Search code examples
memoryfortrangfortran

is a Fortran subroutine with a dummy argument specified size array thread safe


The following code compiles in gfortran, with a warning about large_array being larger than the limit for a stack variable, stating that the array will be moved to static memory and is therefore not threadsafe:

subroutine stack_size_warning
  implicit none
  real :: large_array(65536)
  print *, large_array
end subroutine stack_size_warning

This subroutine however compiles with no errors or warnings, and I can call it with n values larger than 65536 without issue, at least in simple cases.

subroutine no_warning(n)
  implicit none
  integer :: n
  real :: automatic_array(n)
  print *, automatic_array
end subroutine no_warning

Is this second array threadsafe? Where is the memory allocated for automatic_array in this second subroutine? Is the memory allocated and deallocated on every call making it slower than if it was on the stack or if a preallocated array was passed in as a dummy argument?


Solution

  • I wrote the following program to test 3 scenarios, a subroutine with a small array on the stack, another with a large array over the stack limit and thus stored in static memory, and a third where a dummy argument specifies the size of an array defined inside the routine.

    Here is that program:

    program main
      implicit none
      call small
      call large
      call automatic(65536)
    end program main
    
    subroutine small
      implicit none
      real :: small_array(10)
      small_array=1.
      print *, small_array
    end subroutine small
    
    subroutine large
      implicit none
      real :: large_array(65536)
      large_array=1.
      print *, large_array
    end subroutine large
    
    subroutine automatic(n)
      implicit none
      integer :: n
      real :: automatic_array(n)
      automatic_array=1.
      print *, automatic_array
    end subroutine automatic
    

    Using steve's recommendation I compiled with a tree dump as follows:

    gfortran array_dim_test.f90 -o array_dim_test -fdump-tree-original
    

    The full dump is at the end, but to summarize what I see, the automatic subroutine has a try/finally block. In the try block, a call to malloc allocates the memory, and in the finally block, the memory is freed. So I guess this memory is allocated and deallocated on the heap with every call to the subroutine. This intuitively makes sense as how else would the program know what to do with this array that lives only in the subroutine, and whose size is defined in a call to the subroutine, but it is interesting to see the explicit calls in the tree dump. This would appear to be thread-safe then, but perhaps also not the most efficient thing to do if this routine is called many times with the same array size parameter, allocating and deallocating memory with every call.

    Here is the tree dump:

    __attribute__((fn spec (". w ")))
    void automatic (integer(kind=4) & restrict n)
    {
      void * restrict D.3964;
      integer(kind=8) ubound.0;
      integer(kind=8) size.1;
      real(kind=4)[0:D.3961] * restrict automatic_array;
      integer(kind=8) D.3961;
      bitsizetype D.3962;
      sizetype D.3963;
    
      try
        {
          ubound.0 = (integer(kind=8)) *n;
          size.1 = NON_LVALUE_EXPR <ubound.0>;
          size.1 = MAX_EXPR <size.1, 0>;
          D.3961 = size.1 + -1;
          D.3962 = (bitsizetype) (sizetype) NON_LVALUE_EXPR <size.1> * 32;
          D.3963 = (sizetype) NON_LVALUE_EXPR <size.1> * 4;
          D.3964 = (void * restrict) __builtin_malloc (MAX_EXPR <(unsigned long) (size.1 * 4), 1>);
          automatic_array = (real(kind=4)[0:D.3961] * restrict) D.3964;
          {
            integer(kind=8) D.3940;
    
            D.3940 = ubound.0;
            {
              integer(kind=8) S.2;
    
              S.2 = 1;
              while (1)
                {
                  if (S.2 > D.3940) goto L.1;
                  (*automatic_array)[S.2 + -1] = 1.0e+0;
                  S.2 = S.2 + 1;
                }
              L.1:;
            }
          }
          {
            struct __st_parameter_dt dt_parm.3;
    
            dt_parm.3.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
            dt_parm.3.common.line = 27;
            dt_parm.3.common.flags = 128;
            dt_parm.3.common.unit = 6;
            _gfortran_st_write (&dt_parm.3);
            {
              integer(kind=8) D.3944;
              struct array01_real(kind=4) parm.4;
    
              D.3944 = ubound.0;
              parm.4.span = 4;
              parm.4.dtype = {.elem_len=4, .rank=1, .type=3};
              parm.4.dim[0].lbound = 1;
              parm.4.dim[0].ubound = D.3944;
              parm.4.dim[0].stride = 1;
              parm.4.data = (void *) &(*automatic_array)[0];
              parm.4.offset = -1;
              _gfortran_transfer_array_write (&dt_parm.3, &parm.4, 4, 0);
            }
            _gfortran_st_write_done (&dt_parm.3);
          }
        }
      finally
        {
          __builtin_free ((void *) automatic_array);
        }
    }
    
    
    __attribute__((fn spec (". ")))
    void large ()
    {
      static real(kind=4) large_array[65536];
    
      {
        integer(kind=8) S.5;
    
        S.5 = 1;
        while (1)
          {
            if (S.5 > 65536) goto L.2;
            large_array[S.5 + -1] = 1.0e+0;
            S.5 = S.5 + 1;
          }
        L.2:;
      }
      {
        struct __st_parameter_dt dt_parm.6;
    
        dt_parm.6.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
        dt_parm.6.common.line = 19;
        dt_parm.6.common.flags = 128;
        dt_parm.6.common.unit = 6;
        _gfortran_st_write (&dt_parm.6);
        {
          struct array01_real(kind=4) parm.7;
    
          parm.7.span = 4;
          parm.7.dtype = {.elem_len=4, .rank=1, .type=3};
          parm.7.dim[0].lbound = 1;
          parm.7.dim[0].ubound = 65536;
          parm.7.dim[0].stride = 1;
          parm.7.data = (void *) &large_array[0];
          parm.7.offset = -1;
          _gfortran_transfer_array_write (&dt_parm.6, &parm.7, 4, 0);
        }
        _gfortran_st_write_done (&dt_parm.6);
      }
    }
    
    
    __attribute__((fn spec (". ")))
    void small ()
    {
      real(kind=4) small_array[10];
    
      {
        integer(kind=8) S.8;
    
        S.8 = 1;
        while (1)
          {
            if (S.8 > 10) goto L.3;
            small_array[S.8 + -1] = 1.0e+0;
            S.8 = S.8 + 1;
          }
        L.3:;
      }
      {
        struct __st_parameter_dt dt_parm.9;
    
        dt_parm.9.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
        dt_parm.9.common.line = 12;
        dt_parm.9.common.flags = 128;
        dt_parm.9.common.unit = 6;
        _gfortran_st_write (&dt_parm.9);
        {
          struct array01_real(kind=4) parm.10;
    
          parm.10.span = 4;
          parm.10.dtype = {.elem_len=4, .rank=1, .type=3};
          parm.10.dim[0].lbound = 1;
          parm.10.dim[0].ubound = 10;
          parm.10.dim[0].stride = 1;
          parm.10.data = (void *) &small_array[0];
          parm.10.offset = -1;
          _gfortran_transfer_array_write (&dt_parm.9, &parm.10, 4, 0);
        }
        _gfortran_st_write_done (&dt_parm.9);
      }
    }
    
    
    __attribute__((fn spec (". ")))
    void MAIN__ ()
    {
      small ();
      large ();
      {
        static integer(kind=4) C.3993 = 65536;
    
        automatic (&C.3993);
      }
    }
    
    
    __attribute__((externally_visible))
    integer(kind=4) main (integer(kind=4) argc, character(kind=1) * * argv)
    {
      static integer(kind=4) options.11[7] = {2116, 4095, 0, 1, 1, 0, 31};
    
      _gfortran_set_args (argc, argv);
      _gfortran_set_options (7, &options.11[0]);
      MAIN__ ();
      return 0;
    }