Search code examples
segmentation-faultfortranmpiderived-types

Seg fault sending MPI derived types with allocatable arrays in fortran


I am having troubles with a Fortran program where the master task sends an array of structs which has an allocatable array to the slaves. The slaves receive the array and print it successfully, however, after that the program crashes. GDB debugger shows the message below

Program received signal SIGSEGV, Segmentation fault. __GI___libc_free (mem=0x2) at malloc.c:2931

Certainly, I am missing something. Here is my code

program test_type  

use mpi

implicit none

type mytype
real,allocatable::x(:)
integer::a
end type mytype

type(mytype),allocatable::y(:)
integer::n,i,ierr,myid,ntasks,status
integer :: datatype0, ntasktype, oldtypes(2), blockcounts(2) 
integer, allocatable :: oldtypes2(:), blockcounts2(:), datatype(:)
integer(KIND=MPI_ADDRESS_KIND) :: offsets(2)
integer(KIND=MPI_ADDRESS_KIND), allocatable :: offsets2(:)
integer(kind=MPI_ADDRESS_KIND) :: extent

call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world,myid,ierr)
call mpi_comm_size(mpi_comm_world,ntasks,ierr)

n=2
allocate(y(ntasks))
allocate(oldtypes2(ntasks), blockcounts2(ntasks))
allocate(offsets2(ntasks), datatype(ntasks))
do i=1,ntasks
 allocate(y(i)%x(n))
 y(i)%x=0.
 y(i)%a=80
enddo

    if(myid==0)then
     do i=1,ntasks
      call random_number(y(i)%x)
      y(i)%a=myid
      write(0,*) "y(",i,") in process", myid, y(i)%x, y(i)%a
     enddo
    endif

   ! (1) Create a separate structure datatype for each record
   do i=1,ntasks
    call mpi_get_address(y(i)%x,offsets(1),ierr)
    call mpi_get_address(y(i)%a,offsets(2),ierr)
    offsets=offsets-offsets(1)

    oldtypes=(/ mpi_real,mpi_integer /)
    blockcounts=(/ n,1 /)

    call mpi_type_create_struct(2,blockcounts,offsets,oldtypes,datatype(i),ierr) 
   end do

   ! (2) Create a structure of structures that describes the whole array
   do i=1,ntasks
    call MPI_GET_ADDRESS(     y(i)%x, offsets2(i), ierr)
   enddo
   offsets2 = offsets2 - offsets2(1)
   do i=1,ntasks
    oldtypes2(i)=datatype(i)
    blockcounts2(i)=1
   enddo
   call mpi_type_create_struct(ntasks,blockcounts2,offsets2,oldtypes2,ntasktype,ierr) 
  call mpi_type_commit(ntasktype, ierr)

  ! (2.1) Free the intermediate datatypes
  do i=1,ntasks
   call MPI_TYPE_FREE(datatype(i), ierr)
  enddo

 ! (3) Send the array
 if(myid==0) then   
  do i=1,ntasks-1 
   call MPI_SEND(y(1)%x, 1, ntasktype, &
            i, 2, MPI_COMM_WORLD, ierr)
  enddo
  do i=1,ntasks-1 
   write(0,*) "sent", y(i)%x,y(i)%a
  enddo
else
 call MPI_RECV(y(1)%x,1, ntasktype, 0, 2, MPI_COMM_WORLD, status, ierr)
 do i=1,ntasks
  write(0,*) "task(",myid,") received", i,y(i)%x,y(i)%a
 enddo
end if

deallocate(y)
deallocate(oldtypes2, blockcounts2)
deallocate(offsets2,datatype)
call mpi_finalize(ierr)

end program

Solution

  • From man MPI_Recv

    Fortran Syntax
           USE MPI
           ! or the older form: INCLUDE 'mpif.h'
           MPI_RECV(BUF, COUNT, DATATYPE, SOURCE, TAG, COMM, STATUS, IERROR)
                <type>    BUF(*)
                INTEGER   COUNT, DATATYPE, SOURCE, TAG, COMM
                INTEGER   STATUS(MPI_STATUS_SIZE), IERROR
    

    Your issue is a memory corruption on non zero ranks, because you declared

    integer :: status
    

    instead of

    integer :: status(MPI_STATUS_SIZE)
    

    As a side note, you can simplify your code by directly creating a derived datatypes with 2*ntasks elements, and then use MPI_BOTTOM as both send and receive buffer.

    If you really want to manipulate offsets, you should use MPI_Aint_diff() instead of the - operator.

    [EDIT] Here is a revamped/simplified version that uses MPI_BOTTOM

    program test_type  
    
    use mpi
    
    implicit none
    
    type mytype
    real,allocatable::x(:)
    integer::a
    end type mytype
    
    type(mytype),allocatable::y(:)
    integer::n,i,ierr,myid,ntasks,status(MPI_STATUS_SIZE)
    integer :: ntasktype
    integer, allocatable :: oldtypes(:), blockcounts(:)
    integer(KIND=MPI_ADDRESS_KIND), allocatable :: offsets(:)
    
    call mpi_init(ierr)
    call mpi_comm_rank(mpi_comm_world,myid,ierr)
    call mpi_comm_size(mpi_comm_world,ntasks,ierr)
    
    n=2
    allocate(y(ntasks))
    allocate(oldtypes(2*ntasks), blockcounts(2*ntasks))
    allocate(offsets(2*ntasks))
    do i=1,ntasks
     allocate(y(i)%x(n))
     y(i)%x=0.
     y(i)%a=80
    enddo
    
    if(myid==0)then
      do i=1,ntasks
        call random_number(y(i)%x)
        y(i)%a=myid
        write(0,*) "y(",i,") in process", myid, y(i)%x, y(i)%a
       enddo
    endif
    
    do i=1,ntasks
      call mpi_get_address(y(i)%x,offsets(2*i-1),ierr)
      call mpi_get_address(y(i)%a,offsets(2*i  ),ierr)
    
      oldtypes(2*i-1) = mpi_real
      oldtypes(2*i  ) = mpi_integer
    
      blockcounts(2*i-1) = n
      blockcounts(2*i  ) = 1
    end do
    
    call mpi_type_create_struct(2*ntasks,blockcounts,offsets,oldtypes,ntasktype,ierr) 
    call mpi_type_commit(ntasktype, ierr)
    
    ! (3) Send the array
    if(myid==0) then   
      do i=1,ntasks-1 
        call MPI_SEND(MPI_BOTTOM, 1, ntasktype, &
                      i, 2, MPI_COMM_WORLD, ierr)
      enddo
      do i=1,ntasks-1 
       write(0,*) "sent", y(i)%x,y(i)%a
      enddo
    else
      call MPI_RECV(MPI_BOTTOM,1, ntasktype, 0, 2, MPI_COMM_WORLD, status, ierr)
      do i=1,ntasks
        write(0,*) "task(",myid,") received", i,y(i)%x,y(i)%a
      enddo
    end if
    
    do i=1, ntasks
      deallocate(y(i)%x)
    enddo
    deallocate(y)
    deallocate(oldtypes, blockcounts)
    deallocate(offsets)
    
    call mpi_finalize(ierr)
    
    end program