I am trying to develop a parallel random walker simulation with MPI and C++.
In my simulation, each process can be thought of as a cell which can contain particles (random walkers). The cells are aligned in one dimension with periodic boundary conditions (i.e. ring topology).
In each time step, a particle can stay in its cell or go into the left or right neighbour cell with a certain probability. To make it a bit easier, only the last particle in each cell's list can walk. If the particle walks, it has to be sent to the process with the according rank (MPI_Isend + MPI_Probe + MPI_Recv + MPI_Waitall).
However, after the first step my particles start disappearing, i.e. the messages are getting 'lost' somehow.
Below is a minimal example (sorry if it's still rather long). To better track the particle movements, each particle has an ID which corresponds to the rank of the process in which it started. After each step, each cell prints the IDs of the particles stored in it.
#include <mpi.h>
#include <vector>
#include <iostream>
#include <random>
#include <string>
#include <sstream>
#include <chrono>
#include <algorithm>
using namespace std;
class Particle
{
public:
int ID; // this is the rank of the process which initialized the particle
Particle () : ID(0) {};
Particle (int ID) : ID(ID) {};
};
stringstream msg;
string msgString;
int main(int argc, char** argv)
{
// Initialize the MPI environment
MPI_Init(NULL, NULL);
// Get the number of processes
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Get the rank of the process
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
// communication declarations
MPI_Status status;
// get the ranks of neighbors (periodic boundary conditions)
int neighbors[2];
neighbors[0] = (world_size + world_rank - 1) % world_size; // left neighbor
neighbors[1] = (world_size + world_rank + 1) % world_size; // right neighbor
// declare particle type
MPI_Datatype type_particle;
MPI_Type_contiguous (1, MPI_INT, &type_particle);
MPI_Type_commit (&type_particle);
// every process inits 1 particle with ID = world_rank
vector<Particle> particles;
particles.push_back (Particle(world_rank));
// obtain a seed from the timer
typedef std::chrono::high_resolution_clock myclock;
myclock::time_point beginning = myclock::now();
myclock::duration d = myclock::now() - beginning;
unsigned seed2 = d.count();
default_random_engine generator (seed2);
uniform_real_distribution<double> distribution (0, 1);
// ------------------------------------------------------------------
// begin time loop
//-------------------------------------------------------------------
for (int t=0; t<10; t++)
{
// ------------------------------------------------------------------
// 1) write a message string containing the current list of particles
//-------------------------------------------------------------------
// write the rank and the particle IDs into the msgString
msg << "rank " << world_rank << ": ";
for (auto& i : particles)
{
msg << i.ID << " ";
}
msg << "\n";
msgString = msg.str();
msg.str (string()); msg.clear ();
// to print the messages in order, the messages are gathered by root (rank 0) and then printed
// first, gather nums to root
int num = msgString.size();
int rcounts[world_size];
MPI_Gather( &num, 1, MPI_INT, rcounts, 1, MPI_INT, 0, MPI_COMM_WORLD);
// root now has correct rcounts, using these we set displs[] so
// that data is placed contiguously (or concatenated) at receive end
int displs[world_size];
displs[0] = 0;
for (int i=1; i<world_size; ++i)
{
displs[i] = displs[i-1]+rcounts[i-1]*sizeof(char);
}
// create receive buffer
int rbuf_size = displs[world_size-1]+rcounts[world_size-1];
char *rbuf = new char[rbuf_size];
// gather the messages
MPI_Gatherv( &msgString[0], num, MPI_CHAR, rbuf, rcounts, displs, MPI_CHAR,
0, MPI_COMM_WORLD);
// root prints the messages
if (world_rank == 0)
{
cout << endl << "step " << t << endl;
for (int i=0; i<rbuf_size; i++)
cout << rbuf[i];
}
// ------------------------------------------------------------------
// 2) send particles randomly to neighbors
//-------------------------------------------------------------------
Particle snd_buf;
int sndDest = -1;
// 2a) if there are particles left, prepare a message. otherwise, proceed to step 2b)
if (!particles.empty ())
{
// write the last particle in the list to a buffer
snd_buf = particles.back ();
// flip a coin. with a probability of 50 %, the last particle in the list gets sent to a random neighbor
double rnd = distribution (generator);
if (rnd <= .5)
{
particles.pop_back ();
// pick random neighbor
if (rnd < .25)
{
sndDest = neighbors[0]; // send to the left
}
else
{
sndDest = neighbors[1]; // send to the right
}
}
}
// 2b) always send a message to each neighbor (even if it's empty)
MPI_Request requests[2];
for (int i=0; i<2; i++)
{
int dest = neighbors[i];
MPI_Isend (
&snd_buf, // void* data
sndDest==dest ? 1 : 0, // int count <---------------- send 0 particles to every neighbor except the one specified by sndDest
type_particle, // MPI_Datatype
dest, // int destination
0, // int tag
MPI_COMM_WORLD, // MPI_Comm
&requests[i]
);
}
// ------------------------------------------------------------------
// 3) probe and receive messages from each neighbor
//-------------------------------------------------------------------
for (int i=0; i<2; i++)
{
int src = neighbors[i];
// probe to determine if the message is empty or not
MPI_Probe (
src, // int source,
0, // int tag,
MPI_COMM_WORLD, // MPI_Comm comm,
&status // MPI_Status* status
);
int nRcvdParticles = 0;
MPI_Get_count (&status, type_particle, &nRcvdParticles);
// if the message if non-empty, receive it
if (nRcvdParticles > 0) // this proc can receive max. 1 particle from each neighbor
{
Particle rcv_buf;
MPI_Recv (
&rcv_buf, // void* data
1, // int count
type_particle, // MPI_Datatype
src, // int source
0, // int tag
MPI_COMM_WORLD, // MPI_Comm comm
MPI_STATUS_IGNORE // MPI_Status* status
);
// add received particle to the list
particles.push_back (rcv_buf);
}
}
MPI_Waitall (2, requests, MPI_STATUSES_IGNORE);
}
// ------------------------------------------------------------------
// end time loop
//-------------------------------------------------------------------
// Finalize the MPI environment.
MPI_Finalize();
if (world_rank == 0)
cout << "\nMPI_Finalize()\n";
return 0;
}
I ran the simulation with 8 processes and below is a sample of the output. In step 1, it still seems to work well, but beginning with step 2 the particles begin disappearing.
step 0
rank 0: 0
rank 1: 1
rank 2: 2
rank 3: 3
rank 4: 4
rank 5: 5
rank 6: 6
rank 7: 7
step 1
rank 0: 0
rank 1: 1
rank 2: 2 3
rank 3:
rank 4: 4 5
rank 5:
rank 6: 6 7
rank 7:
step 2
rank 0: 0
rank 1:
rank 2: 2
rank 3:
rank 4: 4
rank 5:
rank 6: 6 7
rank 7:
step 3
rank 0: 0
rank 1:
rank 2: 2
rank 3:
rank 4:
rank 5:
rank 6: 6
rank 7:
step 4
rank 0: 0
rank 1:
rank 2: 2
rank 3:
rank 4:
rank 5:
rank 6: 6
rank 7:
I have no ideas what's wrong with the code... Somehow, the combination MPI_Isend + MPI_Probe + MPI_Recv + MPI_Waitall seems not to work... Any help is really appreciated!
There is an error in your code. The following logic (irrelevant code and arguments omitted) is wrong:
MPI_Probe(..., &status);
MPI_Get_count (&status, type_particle, &nRcvdParticles);
// if the message if non-empty, receive it
if (nRcvdParticles > 0)
{
MPI_Recv();
}
MPI_Probe
does not remove zero-sized messages from the message queue. The only MPI calls that do so is MPI_Recv
and the combination of MPI_Irecv
+ MPI_Test
/MPI_Wait
. You must receive all messages, including zero-sized ones, otherwise they will prevent the reception of further messages with the same (source, tag) combination. Although reception of a zero-sized message writes nothing into the receive buffer, it removes the message envelope from the queue and the next matching message could be received.
Solution: move the call to MPI_Recv
before the conditional operator.