I'm trying to implement a simplified version of Lamport's Bakery Algorithm in C before I attempt to use it to solve a more complex problem.* The simplification I am making is that the lock is only shared by only two threads instead of N.
I set up two threads (via OpenMP to keep things simple) and they loop, attempting to increment a shared counter within their critical section. If everything goes according to plan, then the final counter value should be equal to the number of iterations. However, here's some example output:
count: 9371470 (expected: 10000000)
Doh! Something is broken, but what? My implementation is pretty textbook (for reference), so perhaps I'm misusing memory barriers? Did I forget to mark something as volatile?
My code:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <omp.h>
typedef struct
{
volatile bool entering[2];
volatile uint32_t number[2];
} SimpleBakeryLock_t;
inline void mb() { __sync_synchronize(); }
inline void lock(SimpleBakeryLock_t* l, int id)
{
int i = id, j = !id;
uint32_t ni, nj;
l->entering[i] = true;
mb();
ni = 1 + l->number[j];
l->number[i] = ni;
mb();
l->entering[i] = false;
mb();
while (l->entering[j]) {
mb();
}
nj = l->number[j];
mb();
while ((nj != 0) && (nj < ni || (nj == ni && j < i)))
{
nj = l->number[j]; // re-read
mb();
}
}
inline void unlock(SimpleBakeryLock_t* l, int id)
{
l->number[id] = 0;
mb();
}
SimpleBakeryLock_t x;
int main(void)
{
const uint32_t iterations = 10000000;
uint32_t count = 0;
bool once = false;
int i;
memset((void*)&x, 0, sizeof(x));
mb();
// set OMP_NUM_THREADS=2 in your environment!
#pragma omp parallel for schedule(static, 1) private(once, i)
for(uint32_t dummy = 0; dummy < iterations; ++dummy)
{
if (!once)
{
i = omp_get_thread_num();
once = true;
}
lock(&x, i);
{
count = count + 1;
mb();
}
unlock(&x, i);
}
printf("count: %u (expected: %u)\n", count, iterations);
return 0;
}
To compile and run (on Linux), do:
$ gcc -O3 -fopenmp bakery.c
$ export OMP_NUM_THREADS=2
$ ./a.out
I tracked down two problems and the code now works. Issues:
Here is the corrected code for completeness:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <omp.h>
#define cpu_relax() asm volatile ("pause":::"memory")
#define mb() asm volatile ("mfence":::"memory")
/* Simple Lamport bakery lock for two threads. */
typedef struct
{
volatile uint32_t entering[2];
volatile uint32_t number[2];
} SimpleBakeryLock_t;
void lock(SimpleBakeryLock_t* l, int id)
{
int i = id, j = !id;
uint32_t ni, nj;
l->entering[i] = 1;
mb();
ni = 1 + l->number[j];
l->number[i] = ni;
mb();
l->entering[i] = 0;
mb();
while (l->entering[j]) {
cpu_relax();
}
do {
nj = l->number[j];
} while ((nj != 0) && (nj < ni || (nj == ni && j < i)));
}
void unlock(SimpleBakeryLock_t* l, int id)
{
mb(); /* prevent critical section writes from leaking out over unlock */
l->number[id] = 0;
mb();
}
SimpleBakeryLock_t x;
int main(void)
{
const int32_t iterations = 10000000;
int32_t dummy;
uint32_t count = 0;
memset((void*)&x, 0, sizeof(x));
mb();
// set OMP_NUM_THREADS=2 in your environment!
#pragma omp parallel for schedule(static, 1)
for(dummy = 0; dummy < iterations; ++dummy)
{
int i = omp_get_thread_num();
lock(&x, i);
count = count + 1;
unlock(&x, i);
}
printf("count: %u (expected: %u)\n", count, iterations);
return 0;
}