For practice I've implemented Conways' Game of Life in C++ with updates to the "world" being handled with parallel processing. I'm using SFML for graphics.
Adding multi-threading definitely made it run faster (at least on this 4-core machine) but I noticed it has issues. If I run it in a Debug
configuration from Visual Studio 2017 it seems to start very slow, but after running for 2 seconds it suddenly gets faster and runs smoothly. But if I run it in Release
configuration then it runs even faster than Debug
did but every half a second or so it "hitches" or stutters and doesn't run smoothly like I expected.
What could be causing these two behavior issues and how can I fix them?
GameOfLife.cpp:
#include "GameOfLife.h"
#include <iostream>
#include <vector>
#include <math.h>
#include <thread>
#include <mutex>
#include <SFML/Graphics.hpp>
class GameOfLife
{
public:
GameOfLife(int sizeX, int sizeY);
uint8_t & getCell(int x, int y);
sf::Vector2i get2D(int i);
void doUpdate(int start, int end);
virtual ~GameOfLife() = default;
void update();
std::vector<sf::Vector2i> getLiveCells();
private:
std::vector<uint8_t> world;
};
std::mutex updateListLock;
std::vector<sf::Vector2i> pendingUpdates;
sf::Vector2i worldSize;
GameOfLife::GameOfLife(int sizeX, int sizeY)
{
worldSize = sf::Vector2i(sizeX, sizeY);
// initialize world to specified size, all starting as dead
world = std::vector<uint8_t>(sizeX * sizeY, 0);
// reserve space for worst case (every cell needs to be updated)
pendingUpdates.reserve(sizeX * sizeY);
// place a glider
getCell(1, 3) = true;
getCell(2, 4) = true;
getCell(3, 2) = true;
getCell(3, 3) = true;
getCell(3, 4) = true;
// place a glider at top-center
int midX = std::floor(worldSize.x / 2);
getCell(midX + 1, 3) = true;
getCell(midX + 2, 4) = true;
getCell(midX + 3, 2) = true;
getCell(midX + 3, 3) = true;
getCell(midX + 3, 4) = true;
}
uint8_t& GameOfLife::getCell(int x, int y)
{
return world[y * worldSize.x + x];
}
sf::Vector2i GameOfLife::get2D(int index)
{
int y = std::floor(index / worldSize.x);
int x = index % worldSize.x;
return sf::Vector2i(x, y);
}
// Update the cells from position start (inclusive) to position end (exclusive).
void GameOfLife::doUpdate(int start, int end)
{
for (int i = start; i < end; i++)
{
auto pos = get2D(i);
// # of alive neighbors
int aliveCount = 0;
// check all 8 surrounding neighbors
for (int nX = -1; nX <= 1; nX++) // nX = -1, 0, 1
{
for (int nY = -1; nY <= 1; nY++) // nY = -1, 0, 1
{
// make sure to skip the current cell!
if (nX == 0 && nY == 0)
continue;
// wrap around to other side if neighbor would be outside world
int newX = (nX + pos.x + worldSize.x) % worldSize.x;
int newY = (nY + pos.y + worldSize.y) % worldSize.y;
aliveCount += getCell(newX, newY);
}
}
// Evaluate game rules on current cell
switch (world[i]) // is current cell alive?
{
case true:
if (aliveCount < 2 || aliveCount > 3)
{
std::lock_guard<std::mutex> lock(updateListLock);
pendingUpdates.push_back(pos); // this cell will be toggled to dead
}
break;
case false:
if (aliveCount == 3)
{
std::lock_guard<std::mutex> lock(updateListLock);
pendingUpdates.push_back(pos); // this cell will be toggled to alive
}
break;
}
}
}
void GameOfLife::update()
{
unsigned maxThreads = std::thread::hardware_concurrency();
// divide the grid into horizontal slices
int chunkSize = (worldSize.x * worldSize.y) / maxThreads;
// split the work into threads
std::vector<std::thread> threads;
for (int i = 0; i < maxThreads; i++)
{
int start = i * chunkSize;
int end;
if (i == maxThreads - 1) // if this is the last thread, endPos will be set to cover remaining "height"
end = worldSize.x * worldSize.y;
else
end = (i + 1) * chunkSize;
std::thread t([this, start, end] {
this->doUpdate(start, end);
});
threads.push_back(std::move(t));
}
for (std::thread & t : threads) {
if (t.joinable())
t.join();
}
// apply updates to cell states
for each (auto loc in pendingUpdates)
{
// toggle the dead/alive state of every cell with a pending update
getCell(loc.x, loc.y) = !getCell(loc.x, loc.y);
}
// clear updates
pendingUpdates.clear();
}
std::vector<sf::Vector2i> GameOfLife::getLiveCells()
{
std::vector<sf::Vector2i> liveCells;
liveCells.reserve(worldSize.x * worldSize.y); // reserve space for worst case (every cell is alive)
for (int i = 0; i < worldSize.x * worldSize.y; i++) {
auto pos = get2D(i);
if (world[i])
liveCells.push_back(sf::Vector2i(pos.x, pos.y));
}
return liveCells;
}
If you are going to multithread Game of Life, you should seriously consider double-buffering your world
state. Then the threads are only reading shared state, and only one thread writes to any given location.
class GameOfLife
{
public:
GameOfLife(sf::Vector2i size);
void update();
private:
void doUpdate(int start, int end);
uint8_t& getCell(sf::Vector2i pos);
sf::Vector2i getPos(int i);
std::vector<uint8_t> world;
std::vector<uint8_t> pendingWorld;
};
GameOfLife::GameOfLife(sf::Vector2i size)
: worldSize(size), world(size.x * size.y, false), pendingWorld(world)
{
// place a glider
getCell({1, 3}) = true;
getCell({2, 4}) = true;
getCell({3, 2}) = true;
getCell({3, 3}) = true;
getCell({3, 4}) = true;
// place a glider at top-center
int midX = std::floor(worldSize.x / 2);
getCell({midX + 1, 3}) = true;
getCell({midX + 2, 4}) = true;
getCell({midX + 3, 2}) = true;
getCell({midX + 3, 3}) = true;
getCell({midX + 3, 4}) = true;
}
uint8_t& GameOfLife::getCell(sf::Vector2i pos)
{
return world[pos.y * worldSize.x + pos.x];
}
sf::Vector2i GameOfLife::get2D(int index)
{
int y = index / worldSize.x;
int x = index % worldSize.x;
return { x, y };
}
// Update the cells from position start (inclusive) to position end (exclusive).
void GameOfLife::doUpdate(int start, int end)
{
for (int i = start; i < end; i++)
{
auto pos = get2D(i);
// # of alive neighbors
int aliveCount = 0;
// check all 8 surrounding neighbors
for (sf::Vector2i dp : { {1, 1}, {1, 0}, {1, -1}, {0, 1}, {0, -1}, {-1, 1}, {-1, 0}, {-1, -1} })
{
auto np = pos + dp;
// wrap around to other side if neighbor would be outside world
np.x %= worldSize.x;
np.y %= worldSize.y;
aliveCount += getCell(np);
}
// Evaluate game rules on current cell
bool stays = aliveCount == 2 || aliveCount == 3;
bool spawns = aliveCount == 3
pendingWorld[i] = world[i] ? stays : spawns;
}
}
void GameOfLife::update()
{
unsigned maxThreads = std::thread::hardware_concurrency();
// divide the grid into horizontal slices
int chunkSize = world.size() / maxThreads;
// split the work into threads
std::vector<std::thread> threads;
for (int i = 0; i < maxThreads; i++)
{
int start = i * chunkSize;
int end = std::min(world.size(), (i + 1) * chunksize);
threads.emplace_back(&GameOfLife::doUpdate, this, start, end);
}
for (std::thread & t : threads) {
t.join();
}
// apply updates
world.swap(pendingWorld);
}
If you have a C++17 compiler, I would avoid explicit threads, write doUpdate
in terms of a single index, and call std::for_each(std::execution::par_unseq, indexes.begin(), indexes.end(), [this](int i) { doUpdate(i); });