Search code examples
mpimpi-io

MPI-IO write to file in non contiguous pattern


I am having trouble in writing a parallel MPI I/O program that will write in a particular pattern. I was able to have process 0 write integers 0-9, process 1 write integers 10-19, process 2 write integers 20-29, etc.

proc 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
proc 1: [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
proc 2: [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
proc 3: [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]
result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20

Here is the code that accomplishes that:

int main(int argc, char *argv[]) {
    // MPI_Finalize();

    int i, rank, size, offset;
    MPI_File fhw;
    MPI_Status status;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    int N = size * 10;
    int buf[N];
    for ( i = 0; i < N; ++i ) {
        buf[i] = rank * 10 + i;
    }

    offset = rank * (N/size) * sizeof(int);
    MPI_File_open(MPI_COMM_WORLD, "datafile", MPI_MODE_CREATE|MPI_MODE_WRONLY,
                MPI_INFO_NULL, &fhw);
    printf("(%d) Writing to file...\n", rank);
    printf("\nRank: (%d), Offset: %d\n", rank, offset);
    MPI_File_write_at(fhw, offset, buf, (N/size), MPI_INT, &status);
    MPI_File_close(&fhw);

    MPI_Finalize();

    return 0;
}

However, I am confused about how I should produce the following result:

// starting out:
proc 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
proc 1: [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
proc 2: [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
proc 3: [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]
// proc 0 writes first 2 slots, then proc 1 writes next 2, etc.
result: [0, 1, 10, 11, 20, 21, 30, 31, 2, 3, 12, 13, 22, 23, ..., 8, 9, 18, 19, 28, 29, 38, 29]

I have tried using MPI_File_set_view while looking up examples and documentation online for the past few hours but cannot get it to work. Can someone guide me in the right direction?


Solution

  • As you had understood you need to setup a view...

    Then some small mistake in your code: 1) Do you really need a buf of more that 10 number for each processes? 2) The offset in MPI_File_wite_at is note in byte, but in number of element (in respect of the element size of your view)

    So to set up the view you only need 1 line:

    #include "mpi.h"
    #include <cstdio>
    
    int main(int argc, char *argv[]) {
        // MPI_Finalize();                                                                                                      
    
        int i, rank, size, offset;
        MPI_File fhw;
        MPI_Status status;
        MPI_Init(&argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        int N = 10; 
        int buf[N];
        for ( i = 0; i < N; ++i ) {
            buf[i] = rank * 10 + i;
        }
    
        offset = 10*rank;
        MPI_File_open(MPI_COMM_WORLD,"datafile",MPI_MODE_CREATE|MPI_MODE_WRONLY,
                    MPI_INFO_NULL, &fhw);
        printf("(%d) Writing to file...\n", rank);
        printf("\nRank: (%d), Offset: %d\n", rank, offset);
        MPI_File_set_view( fhw,0,MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
        MPI_File_write_at(fhw, offset, buf, N, MPI_INT, &status);
        MPI_File_close(&fhw);
    
        MPI_Finalize();
    
        return 0;
    }
    

    Then you can do exactly the same using MPI_File_write :-), and setting the view differently for each processes, just replace the view and write:

    MPI_File_set_view(fhw,offset*sizeof(int),MPI_INT,MPI_INT,
                            "native",MPI_INFO_NULL ) ;
    MPI_File_write_at(fhw, 0, buf, N, MPI_INT, &status);
    

    or simply:

    MPI_File_set_view(fhw,offset*sizeof(int),MPI_INT,MPI_INT,
                            "native",MPI_INFO_NULL ) ;
    MPI_File_write(fhw, buf, N, MPI_INT, &status);
    

    NOTE: in view the offset is in byte, while in write it is in size of elements of the view.... Might be a bit confusing :-)

    Then the magics:

    You need to write blocks of 2 ints with a stride of 2*size, and you have N/2 of those blocks, so you create a type:

    MPI_Type_vector(N/2, 2 , size*2,  MPI_INT, &ftype);
    MPI_Type_commit(&ftype);
    

    and set the view:

    MPI_File_set_view( fhw, rank*2*sizeof(int), MPI_INT, ftype, "native", MPI_INFO_NULL ) ;
    

    then you have to think that in memory you have a continuous storage of your data, to fit your view, it has to be something as N/2 blocks, so you create a datatype:

    MPI_Type_contiguous(2,   MPI_INT, &mtype);
    MPI_Type_commit(&mtype);
    

    Then you are ready for the write:

    MPI_File_write(fhw, buf, N/2, mtype, &status);
    MPI_File_close(&fhw);
    

    And so the entire code will become:

    #include "mpi.h"
    #include <cstdio>
    
    int main(int argc, char *argv[]) {
        int i, rank, size, offset;
        MPI_File fhw;
        MPI_Status status;
        MPI_Init(&argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        int N = 10; //need to be even!!!
        int buf[N];
        for ( i = 0; i < N; ++i ) {
            buf[i] = rank * N + i;
        }
    
        offset = 10*rank;
        MPI_File_open(MPI_COMM_WORLD, "datafile", MPI_MODE_CREATE|MPI_MODE_WRONLY,
                        MPI_INFO_NULL, &fhw);
        printf("(%d) Writing to file...\n", rank);
        printf("\nRank: (%d), Offset: %d\n", rank, offset);
    
        MPI_Datatype ftype,mtype;
        MPI_Type_vector(N/2, 2 , size*2,  MPI_INT, &ftype);
        MPI_Type_commit(&ftype);
    
        MPI_File_set_view( fhw, rank*2*sizeof(int), MPI_INT, ftype,
                             "native",MPI_INFO_NULL ) ;
    
        MPI_Type_contiguous(2,   MPI_INT, &mtype);
        MPI_Type_commit(&mtype);
    
        MPI_File_write(fhw, buf, N/2, mtype, &status);
        MPI_File_close(&fhw);
    
        MPI_Finalize();
        return 0;
    }