c generics data-structures macros polymorphism

How to bypass multi-definition of a function name when implementing generic data structures with macros in C?

I am coding in a framework written in C and not compatible with C++. When implementing a data structure for sets as doubly-linked lists, I used macros like these to achieve generics.

#pragma once

/* Interface protocol:
 *   1. Do not modify contents of nodes in a list. Results should be regarded
 *      as returned instead of modified in the arguments. The old list shall
 *      not be used.
 *   2. Use RA_DECL_DL (T) to declare the doubly linked list and related oper
 *      for type T. If RA_DECL_DL (T) is used in an `.h' file, put it in
 *      #define GUARD_RA_FL_<T> #ifdef GUARD_RA_FL_<T> ... #endif
 *      to prevent re-declarations.
 *   3. A function int _<T>_eq (T x, T y) must be declared in that file, before
 *      RA_DECL_DL and RA_DEFN_DL.
 *   4. An RA_DL (T) can be used both as a set or a doubly linked list, but not
 *      simultaneously for a particular instance. */

#define RA_DECL_DL(T)                                                         \
  typedef struct _##T##_DL_ *T##_DL_t;                                        \
  struct _##T##_DL_                                                           \
  {                                                                           \
    T val;                                                                    \
    T##_DL_t next;                                                            \
    T##_DL_t prev;                                                            \
  };                                                                          \
  T##_DL_t _##T##_DL_empty ();                                                \
  T##_DL_t _##T##_DL_insert (T x, T##_DL_t lst);                              \
  T##_DL_t _##T##_DL_remove (T x, T##_DL_t lst);                              \
  int _##T##_DL_isIn (T x, T##_DL_t lst);                                     \
  int _##T##_DL_isEmpty (T##_DL_t lst);                                       \
  T##_DL_t _##T##_DL_union (T##_DL_t l1, T##_DL_t l2);                        \
  T##_DL_t _##T##_DL_intersect (T##_DL_t l1, T##_DL_t l2);                    \
  T##_DL_t _##T##_DL_diff (T##_DL_t l1, T##_DL_t l2);

#define RA_DEFN_DL(T)                                                         \
  T##_DL_t _##T##_DL_empty () { return NULL; }                                \
  T##_DL_t _##T##_DL_insert (T x, T##_DL_t lst)                               \
  {                                                                           \
    T##_DL_t ret = checked_malloc (sizeof (struct _##T##_DL_));               \
    if (lst == NULL)                                                          \
      {                                                                       \
        ret->val = x;                                                         \
        ret->next = NULL;                                                     \
        ret->prev = NULL;                                                     \
        return ret;                                                           \
      }                                                                       \
    else                                                                      \
      {                                                                       \
        for (T##_DL_t iter = lst; iter != NULL; iter = iter->next)            \
          {                                                                   \
            if (_##T##_eq (iter->val, x))                                     \
              return lst;                                                     \
          }                                                                   \
        ret->val = x;                                                         \
        ret->prev = NULL;                                                     \
        ret->next = lst;                                                      \
        lst->prev = ret;                                                      \
        return ret;                                                           \
      }                                                                       \
  }                                                                           \
  T##_DL_t _##T##_DL_remove (T x, T##_DL_t lst)                               \
  {                                                                           \
    T##_DL_t iter;                                                            \
    for (iter = lst; iter != NULL; iter = iter->next)                         \
      {                                                                       \
        if (_##T##_eq (iter->val, x))                                         \
          break;                                                              \
      }                                                                       \
    if (iter == NULL)                                                         \
      return lst;                                                             \
    if (iter->prev != NULL)                                                   \
      iter->prev->next = iter->next;                                          \
    if (iter->next != NULL)                                                   \
      iter->next->prev = iter->prev;                                          \
    return iter->prev == NULL ? lst->next : lst;                              \
  }                                                                           \
  int _##T##_DL_isIn (T x, T##_DL_t lst)                                      \
  {                                                                           \
    for (T##_DL_t iter = lst; iter != NULL; iter = iter->next)                \
      {                                                                       \
        if (_##T##_eq (iter->val, x))                                         \
          return 1;                                                           \
      }                                                                       \
    return 0;                                                                 \
  }                                                                           \
  T##_DL_t _##T##_DL_union (T##_DL_t l1, T##_DL_t l2)                         \
  {                                                                           \
    T##_DL_t ret = _##T##_DL_empty ();                                        \
    for (T##_DL_t iter = l1; iter != NULL; iter = iter->next)                 \
      ret = _##T##_DL_insert (iter->val, ret);                                \
    for (T##_DL_t iter = l2; iter != NULL; iter = iter->next)                 \
      ret = _##T##_DL_insert (iter->val, ret);                                \
    return ret;                                                               \
  }                                                                           \
  T##_DL_t _##T##_DL_intersect (T##_DL_t l1, T##_DL_t l2)                     \
  {                                                                           \
    T##_DL_t ret = _##T##_DL_empty ();                                        \
    for (T##_DL_t iter = l1; iter != NULL; iter = iter->next)                 \
      {                                                                       \
        if (_##T##_DL_isIn (iter->val, l2))                                   \
          ret = _##T##_DL_insert (iter->val, ret);                            \
      }                                                                       \
    return ret;                                                               \
  }                                                                           \
  T##_DL_t _##T##_DL_diff (T##_DL_t l1, T##_DL_t l2)                          \
  {                                                                           \
    T##_DL_t ret = _##T##_DL_empty ();                                        \
    for (T##_DL_t iter = l1; iter != NULL; iter = iter->next)                 \
      {                                                                       \
        if (!_##T##_DL_isIn (iter->val, l2))                                  \
          ret = _##T##_DL_insert (iter->val, ret);                            \
      }                                                                       \
    return ret;                                                               \
  }

#define RA_DL(T) T##_DL_t
#define RA_DL_empty(T) _##T##_DL_empty ()

#define RA_DL_insert(T, x, lst) _##T##_DL_insert (x, lst)
#define RA_DL_remove(T, x, lst) _##T##_DL_remove (x, lst)
#define RA_DL_isIn(T, x, lst) _##T##_DL_isIn (x, lst)
#define RA_DL_union(T, l1, l2) _##T##_DL_union (l1, l2)
#define RA_DL_intersect(T, l1, l2) _##T##_DL_intersect (l1, l2)
#define RA_DL_diff(T, l1, l2) _##T##_DL_diff (l1, l2)

The problem is: if for two files, say f1.c and f2.c, I write RA_DECL_DL(int) and RA_DEFN_DL(int) in both files, then, for example, the function _int_DL_insert is defined twice globally. This causes a conflict between identifiers, though they are actually identical.

Using #ifdef guard macros does not solve this problem because macros are not seen across .c files. Using static prevents me using the functions in multiple files, which is sometimes needed.

Solution

I concur with @Lundin that this is all much too much macros. As I'm sure you know a generic implementation using C++ templates is simple - the template container type std::set is ready-made - but you say your framework is "not compatible" with C++. However a template-instantiating C++ implementation can expose a C linkage interface using the extern C {....} language-linkage specifier, and you need only be able use the same toolchain for C and C++ to be ABI safe.

If C++ with C linkage is for some reason ruled out, let's say your macro header file is DL_macros.h.

If you are going to control the inventory of types for which your macros are instantiated then you can do what follows.

If you want a user of your macros to be able to employ them for unforeseen types then you can provide a README that tells them to do what follows.

For each type Type for which you want to instantiate the macros, create a header file with some suitable name, DL_Type.h like:

#pragma once
#include <Type.h> // Maybe
#include <DL_macros.h>

int _Type_eq (Type x, Type y);  // Per your protocol item #3

RA_DECL_DL(Type)

where Type.h will be the relevant header file defining type Type if it is a defined and not a fundamental type.

Also create a source file DL_Type.c like:

#include <DL_Type.h>

int _Type_eq (Type x, Type y)
{
    return x == y; // Or whatever is right.
}

RA_DEFN_DL(Type)

Then compile the file DL_Type.c to an object file DL_Type.o. Link that object file with client code that requires the API declared in DL_Type.h. Such code will #include <DL_Type.h>. Client code will reference the unique definitions provided in DL_Type.o

Optionally, you or your users may make a static library from object files instantiating the macro definitions for various types. From such a static library a linker will only link member object files that are referenced by client code.

You might also avoid exposing the RA_DEFN_DL(T) macro to client code by placing the RA_DECL_DL(T) macro alone in the client-facing header, say, DL_macros_decl.h, placing the RA_DEFN_DL(T) in another, say, DL_macros_defn.h, and including the latter only in DL_Type.c:

#include <DL_Type.h>
#include <DL_macros_defn.h>

Optionally too, you might provide conditional support for toolchains, like GCC's, that recognise weak symbols. The linker will tolerate multiple definitions of a weak symbol and pick one them arbitrarily (in practice, the first one seen), so for these toolchains it would be unnecessary for you or your users to compile and link a distinct DL_Type.o for each Type: you could RA_DECL_DL(Type) and RA_DEFN_DL(Type) in the same translation unit, as you first envisaged if you think that is significantly desirable.

For this option, DL_macros.h would be modified on the lines:

#ifdef __GNUC__
#define WEAK __attribute__((weak))
#else
#define WEAK
#endif

#define RA_DECL_DL(T)                                                         \
  typedef struct _##T##_DL_ *T##_DL_t;                                        \
  struct _##T##_DL_                                                           \
  {                                                                           \
    T val;                                                                    \
    T##_DL_t next;                                                            \
    T##_DL_t prev;                                                            \
  };                                                                          \
  T##_DL_t _##T##_DL_empty () WEAK;                                           \
  T##_DL_t _##T##_DL_insert (T x, T##_DL_t lst) WEAK;                         \
  T##_DL_t _##T##_DL_remove (T x, T##_DL_t lst) WEAK;                         \
  int _##T##_DL_isIn (T x, T##_DL_t lst) WEAK;                                \
  int _##T##_DL_isEmpty (T##_DL_t lst) WEAK;                                  \
  T##_DL_t _##T##_DL_union (T##_DL_t l1, T##_DL_t l2) WEAK;                   \
  T##_DL_t _##T##_DL_intersect (T##_DL_t l1, T##_DL_t l2) WEAK;               \
  T##_DL_t _##T##_DL_diff (T##_DL_t l1, T##_DL_t l2) WEAK;

and the required declaration for the _Type_eq function would be

int _Type_eq (Type x, Type y) WEAK;

My own preference would be not to bother with differentiating toolchains that recognize weak symbols and stick universally with compiling a distinct DL_Type.o.

@Jonathan Leffler's caution about using names beginning with underscores refers to the fact that names beginning with a double-underscore or an underscore and capital letter are reserved by the compiler implementor in C. Your macros can give rise to names that breach this reservation.