c multithreading pthreads mutex condition-variable

Why does my pthread_cond_signal not immediately wake a blocked thread?

I'm writing a multithreaded program where an auxiliary thread runs after a condition is met, i.e., a certain number of elements existing in a data structure.

void*
gc_thread_handler(void *arg) {
    while (!done) {
        pthread_mutex_lock(&(gc.mutex));
        while (!gc.collect && !done) {
            pthread_cond_wait(&(gc.cond), &(gc.mutex));
        }
        pthread_mutex_unlock(&(gc.mutex));
        rcgc_activate();
    }

    return NULL;
}

I block on the waiting thread until it receives a signal sent by the following function.

void *
gc_alloc(size_t sz) {
    pthread_mutex_lock(&(gc.mutex));
    // ...
    gc.num_chunks++;
    if (gc.num_chunks > MAX_CHUNKS) {
        gc.collect = true;
        pthread_cond_signal(&(gc.cond));
    }
    pthread_mutex_unlock(&(gc.mutex));
    return ptr;
}

For some reason, however, it seems as if the signal does not immediately wake up the sleeping thread; the signals are repeatedly issued (because gc_alloc is repeatedly called). Eventually, the waiting thread does wake up and call gc_activate, but I don't really understand why it does not wake immediately.

MRE:

gc.h:

#ifndef RCGC_H
#define RCGC_H

#define RCGC_OWALLOC(ptr, sz) (rcgc_owalloc((void **) (ptr), (sz)));
#define RCGC_ASSIGN(dest, src) (rcgc_assign((void **) (dest), (void **) (src)));

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

struct rcgc_allocation {
    void                   *data;
    struct rcgc_allocation *next;
    size_t                  counter;
    bool                    is_free;
};

// Reference counted gc.
struct rcgc {
    struct rcgc_allocation *head;
    struct rcgc_allocation *tail;

    int num_chunks;
    bool collect;
    pthread_mutex_t mutex;
    pthread_cond_t cond;
};

void rcgc_init(void);
void *rcgc_alloc(size_t sz);
void *rcgc_owalloc(void **dest, size_t sz);
void rcgc_assign(void **dest, void **src);
void rcgc_activate(void);

#endif // RCGC_H

gc.c

#include "rcgc.h"

#define MAX_CHUNKS 1000

struct rcgc gc;

static struct rcgc_allocation *rcgc_search(void *ptr);

void
rcgc_init(void) {
    pthread_mutex_init(&(gc.mutex), NULL);
    pthread_cond_init(&(gc.cond), NULL);
    gc.head = gc.tail = NULL;
    gc.num_chunks     = 0;
    gc.collect        = false;
}

void *
rcgc_alloc(size_t sz) {
    pthread_mutex_lock(&(gc.mutex));
    struct rcgc_allocation *node = calloc(1, sizeof(struct rcgc_allocation));
    // Add to linked list.
    if (gc.head == NULL) {
        gc.head = gc.tail = node;
        gc.head->next = gc.tail->next = NULL;
    } else {
        gc.tail->next = node;
        gc.tail       = node;
    }

    // Now assign the ptr data.
    void *ptr     = calloc(1, sz);
    node->data    = ptr;
    node->is_free = false;
    node->counter = 1;

    // If we allocate more than enough chunks, send the signal.
    gc.num_chunks++;
    if (gc.num_chunks > MAX_CHUNKS && !gc.collect) {
        gc.collect = true;
        pthread_cond_signal(&(gc.cond));
    }

    pthread_mutex_unlock(&(gc.mutex));
    return ptr;
}

void *
rcgc_owalloc(void **dest, size_t sz) {
    pthread_mutex_lock(&(gc.mutex));
    // Determine where it exists in the tree.
    if (dest == NULL) {
        exit(EXIT_FAILURE);
    } else {
        struct rcgc_allocation *old_node = rcgc_search(*dest);
        if (old_node != NULL) {
            old_node->counter--;
        }
        pthread_mutex_unlock(&(gc.mutex));
        return rcgc_alloc(sz);
    }
}

void
rcgc_assign(void **dest, void **src) {
    pthread_mutex_lock(&(gc.mutex));

    // If "dest" is already in the tree, find it and decrement its ref ctr.
    if (dest != NULL) {
        struct rcgc_allocation *dest_alloc = rcgc_search(*dest);
        if (dest_alloc != NULL) {
            dest_alloc->counter--;
        }
    }

    // If we cannot find the source, then it does not exist.
    struct rcgc_allocation *src_alloc = rcgc_search(*src);
    src_alloc->counter++;
    *dest = *src;
    pthread_mutex_unlock(&(gc.mutex));
}

void
rcgc_activate(void) {
    pthread_mutex_lock(&(gc.mutex));
    for (struct rcgc_allocation *curr = gc.head; curr != NULL; curr = curr->next) {
        if (curr->counter == 0 && !curr->is_free) {
            free(curr->data);
            curr->data    = NULL;
            curr->is_free = true;
            gc.num_chunks--;
        }
    }
    gc.collect = false;
    pthread_mutex_unlock(&(gc.mutex));
}

static struct rcgc_allocation *
rcgc_search(void *ptr) {
    for (struct rcgc_allocation *curr = gc.head; curr != NULL; curr = curr->next) {
        if (ptr == curr->data) {
            return curr;
        }
    }
    return NULL;
}

main.c

#include <stdio.h>
#include <stdlib.h>

#include "rcgc.h"

extern struct rcgc gc;

static bool done = false;

void*
rcgc_thread_handler(void *arg) {
    while (!done) {
        pthread_mutex_lock(&(gc.mutex));
        while (!gc.collect && !done) {
            pthread_cond_wait(&(gc.cond), &(gc.mutex));
        }
        pthread_mutex_unlock(&(gc.mutex));
        rcgc_activate();
    }
    return NULL;
}

int
main(void) {
    rcgc_init();
    pthread_t pid;
    pthread_create(&pid, NULL, rcgc_thread_handler, NULL);

    // Create a few arbitrary allocations and assignments.
    int *arr1 = rcgc_alloc(sizeof(int) * 10);
    int *arr2 = NULL;

    // Do something for a while...
    double val = 0;
    while (val < 1) {
        val += 0.0001;
        arr1 = RCGC_OWALLOC(&arr1, sizeof(int));
        RCGC_ASSIGN(&arr2, &arr1);
    }

    done = true;
    pthread_cond_broadcast(&(gc.cond));
    pthread_join(pid, NULL);
    return 0;
}

Solution

There are several issues with your code, among them:

There is a data race involving main.c's static variable done. Since it is not atomic, both the original and the auxiliary thread access it, and at least one of the accesses is a write, all accesses to it must be synchronized. The easiest way to resolve that would probably be to just make it _Atomic to remove the need for synchronization.
void * can be viewed as a generic pointer, but void ** is not a generic double pointer. There is no such thing in C. As a result, your RCGC_OWALLOC and RCGC_ASSIGN macros set the stage for undefined behavior of rgc_owalloc() and rgc_assign(). Most casts for other than arithmetic reasons are semantically incorrect. C provides automatic conversions in most situations where converting is sensible at all.
rcgc_thread_handler() releases the mutex before calling rcgc_activate(), but rcgc_activate() immediately attempts to reacquire it. It is very likely to succeed immediately under many systems' default scheduling policies, but it is not certain to do so. It is possible for another thread swoop in and acquire the mutex first. Any number of times.
Similarly, rcgc_owalloc() releases the mutex immediately before calling rgc_alloc(), which immediately attempts to reacquire the mutex. Again, this leaves a crack for other threads to get in and run for an arbitrary amount of time.
the main thread does not ensure that the GC thread has reached its CV wait before it starts signaling the CV. It's ok to signal a CV that has no waiters, but such signals don't do anything. This is a plausible explanation for the GC thread to run rcgc_activate() fewer times than the main thread signals the CV.
the main thread spends almost all its time holding the mutex locked, and in particular, each time it releases the mutex, it very soon attempts to acquire it again. As another answer explains, with the default scheduling policy of most systems, if a thread attempts to reacquire a mutex soon enough after releasing it, it is pretty likely to seize it before any other thread trying to acquire it can do. This is also a plausible explanation for the GC thread to run rcgc_activate() fewer times than the main thread signals the CV.