Search code examples
cfreedirected-acyclic-graphs

How to free DAG-like data structure


I have the following DAG (Directed Acyclic Graph) data structure:

struct dag {
    struct dag **children;  // Array of pointers to DAGs
    int n_children;         // Number of children
};

I want this to be a DAG and not just a tree, which means some of the children of a given DAG may be identical. This raises a problem in the deletion function:

void del(struct dag *d)
{
    if (d != NULL) {
        for(int i = 0; i < d->n_children; i++) {
            del(d->children[i]);
        }
        free(d->children);
        free(d);
    }
}

Indeed, I end up freeing stuff which is already freed, as shown by the example below (which raises a segmentation fault). This code creates two DAGs a and b; a has no children, b has two children which are both a.

void main()
{
    struct dag *a, *b, **b_children;

    a = malloc(sizeof(*a));
    a->children = NULL;
    a->n_children = 0;

    b_children = malloc(sizeof(a) * 2);
    b_children[0] = a;
    b_children[1] = a;

    b = malloc(sizeof(*b));
    b->children = b_children;
    b->n_children = 2;

    del(b);
}

What is flawed in this approach ? Can I write a del function that does what I want ?


Solution

  • For a directed acyclic graph, the nodes may be referenced from multiple parents, which requires extra bookkeeping to handle the life cycle.

    There are multiple approaches for this:

    • you can add a reference count in each node, increment it for every extra pointer that references the node and decrement it whenever a pointer no longer references the node, in particular when the parent node is to be freed. Reference counting is very tricky to implement correctly and reliably, and is not a complete solution if your graph ever becomes more complex, ie: if there are actual cycles.

    • you can maintain a linked list of allocated nodes and free the full list when the tree is freed. This approach allows for arbitrary node cross references but does not allow for freeing individual nodes. You can only free the whole tree at once. You can still delete individual nodes from the tree but they will remain allocated in the list until you free the whole tree. Also note that the overhead is equivalent: an extra pointer instead vs: a single integer is needed for each node.

    Depending on the specifics of the problem you are solving with this tree, one approach might be more appropriate than the other.

    In both cases, you should use functions to allocate, update and delete the nodes.

    Here is a version with reference counts:

    #include <stdio.h>
    #include <stdlib.h>
    
    static int total_dag_nodes = 0;  // for debugging
    
    struct dag {
        int refs;
        int n_children;         // Number of children
        struct dag **children;  // Array of pointers to DAGs
    };
    
    struct dag *new_dag(int n) {
        struct dag *s = malloc(sizeof *s);
        if (s == NULL)
            return NULL;
        total_dag_nodes++;
        s->refs = 1;
        s->n_children = 0;
        s->children = NULL;
        if (n > 0) {
            s->n_children = n;
            s->children = calloc(sizeof(*s->children), n);
        }
        return s;
    }
    
    void free_dag(struct dag *s) {
        if (s && --s->refs <= 0) {
            for (int i = 0; i < s->n_children; i++)
                free_dag(s->children[i]);
            free(s);
            total_dag_nodes--;
        }
    }
    
    int set_dag_child(struct dag *s, int n, struct dag *child) {
        if (s == NULL || n < 0 || n >= s->n_children) {
            /* invalid argument */
            return -1;
        }
        struct dag *prev = s->children[n];
        s->children[n] = child;
        if (child != NULL)
            child->refs++;
        /* delay freeing the previous node to avoid freeing it
           if it is identical to new one. */
        free_dag(prev);
        return 0;
    }
    
    void print_dag(const char *name, struct dag *s) {
        printf("%s: %p", name, (void*)s);
        if (s) {
            printf(" [refs:%d] {", s->refs);
            for (int i = 0; i < s->n_children; i++)
                printf(" %p", (void*)s->children[i]);
            printf(" }");
        }
        printf(";\n");
    }
    
    int main() {
        struct dag *a = new_dag(0);
        struct dag *b = new_dag(2);
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
        print_dag("a", a);
        print_dag("b", b);
    
        set_dag_child(b, 0, a);
        set_dag_child(b, 1, a);
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
        print_dag("a", a);
        print_dag("b", b);
    
        free_dag(a);    // removes the reference from a to the first dag
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
        print_dag("b[0]", b->children[0]);
        print_dag("b", b);
    
        free_dag(b);    // should remove all references to both dags
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
    
        return 0;
    }
    

    Output:

    total_dag_nodes=2                                                                                           a: 0x7f885f600000 [refs:1] { };
    b: 0x7f885f600010 [refs:1] { 0x0 0x0 };
    
    total_dag_nodes=2                                                                                           a: 0x7f885f600000 [refs:3] { };
    b: 0x7f885f600010 [refs:1] { 0x7f885f600000 0x7f885f600000 };
    
    total_dag_nodes=2                                                                                           b[0]: 0x7f885f600000 [refs:2] { };
    b: 0x7f885f600010 [refs:1] { 0x7f885f600000 0x7f885f600000 };
    
    total_dag_nodes=0
    

    Here is an alternative using a global list:

    #include <stdio.h>
    #include <stdlib.h>
    
    static int total_dag_nodes = 0;  // for debugging
    static struct dag *dag_list;
    
    struct dag {
        struct dag *next;       // linked list for maintenance
        int n_children;         // Number of children
        struct dag **children;  // Array of pointers to DAGs
    };
    
    struct dag *new_dag(int n) {
        struct dag *s = malloc(sizeof *s);
        if (s == NULL)
            return NULL;
        total_dag_nodes++;
        s->next = dag_list;
        dag_list = s;
        s->n_children = 0;
        s->children = NULL;
        if (n > 0) {
            s->n_children = n;
            s->children = calloc(sizeof(*s->children), n);
        }
        return s;
    }
    
    void free_dag(struct dag *s) {
        /* nothing to do because tag could be referenced elsewhere */
    }
    
    void free_all_dags(void) {
        struct dag *p;
        while ((p = dag_list) != NULL) {
            dag_list = p->next;
            free(p->children);
            free(p);
            total_dag_nodes--;
        }
    }
    
    void print_dag(const char *name, struct dag *s) {
        printf("%s: %p", name, (void*)s);
        if (s) {
            printf(" {");
            for (int i = 0; i < s->n_children; i++)
                printf(" %p", (void*)s->children[i]);
            printf(" }");
        }
        printf(";\n");
    }
    
    int main() {
        struct dag *a = new_dag(0);
        struct dag *b = new_dag(2);
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
        print_dag("a", a);
        print_dag("b", b);
    
        b->children[0] = a;
        b->children[1] = a;
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
        print_dag("a", a);
        print_dag("b", b);
    
        free_dag(a);  // individual frees do nothing
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
        print_dag("b[0]", b->children[0]);
        print_dag("b", b);
    
        free_all_dags();
    
        printf("\ntotal_dag_nodes=%d\n", total_dag_nodes);
    
        return 0;
    }
    

    Output:

    total_dag_nodes=2                                                                                           a: 0x7f8d7b402650 { };
    b: 0x7f8d7b402670 { 0x0 0x0 };
    
    total_dag_nodes=2                                                                                           a: 0x7f8d7b402650 { };
    b: 0x7f8d7b402670 { 0x7f8d7b402650 0x7f8d7b402650 };
    
    total_dag_nodes=2                                                                                           b[0]: 0x7f8d7b402650 { };
    b: 0x7f8d7b402670 { 0x7f8d7b402650 0x7f8d7b402650 };
    
    total_dag_nodes=0