Search code examples
pythonpython-2.7python-c-apicpythonreference-counting

How to break a direct reference cycle in CPython


In CPython I have two types of objects, which are close connected to each other.

#include <Python.h>
#include <structmember.h>

typedef struct pyt PYT;
struct pyt { PyObject_HEAD PYT *other; };

static void dealloc (PYT *self) {
    Py_CLEAR(self->other);
    printf("dealloc object at %p\n", self);
    PyObject_GC_Del(self);
}

static PyTypeObject Pyt2Type = {
    PyObject_HEAD_INIT(NULL)
    0, "pyt.Pyt2", sizeof(PYT), 0,
    (destructor) dealloc
};

static PyObject * new (PyTypeObject *type, PyObject *args, PyObject *kwds) {
    PYT *self = PyObject_GC_New(PYT, type);
    if (!self) return NULL;
    self->other = PyObject_GC_New(PYT, &Pyt2Type);
    if (!self->other) { Py_DECREF(self); return NULL; }
    return Py_INCREF(self), self->other->other = self, (PyObject *) self;
}

static PyTypeObject Pyt1Type = {
    PyObject_HEAD_INIT(NULL)
    0, "pyt.Pyt1", sizeof(PYT), 0,
    (destructor) dealloc
};

static int traverse (PYT *self, visitproc visit, void *arg) {
    Py_VISIT(self->other);
    return 0;
}

static int clear (PYT *self) {
    Py_CLEAR(self->other);
    return 0;
}

static PyMemberDef members[] = {
    {"other", T_OBJECT, offsetof(PYT, other), RO, "other"},
    { NULL }
};

static PyMethodDef methods[] = {{ NULL }};

PyMODINIT_FUNC initpyt ( void ) {
    PyObject* m;

    Pyt1Type.tp_flags = Pyt2Type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC;
    Pyt1Type.tp_traverse = Pyt2Type.tp_traverse = (traverseproc) traverse;
    Pyt1Type.tp_clear = Pyt2Type.tp_clear = (inquiry) clear;
    Pyt1Type.tp_members = Pyt2Type.tp_members = members;
    Pyt1Type.tp_new = new;

    if (PyType_Ready(&Pyt1Type) < 0) return;
    if (PyType_Ready(&Pyt2Type) < 0) return;

    m = Py_InitModule("pyt", methods);

    Py_INCREF(&Pyt1Type), PyModule_AddObject(m, "Pyt", (PyObject *) &Pyt1Type);
}

Using my test script

from distutils.core import Extension, setup
import sys, gc
sys.argv.extend(["build_ext", "-i"])
setup(ext_modules = [Extension('pyt', ['pyt.c'])])
from pyt import Pyt
pyt = Pyt()
print pyt, sys.getrefcount(pyt)
pyt = pyt.other
print pyt, sys.getrefcount(pyt)
del pyt
gc.collect()

I get an output like

<pyt.Pyt1 object at 0x7fbc26540138> 3
<pyt.Pyt2 object at 0x7fbc26540150> 3

The objects are not deleted at the end, since each keeps a reference to the other, creating a closed cycle. In other code I was using an approach, where I just kept the objects, until both have a refcount of 0, which I suspect being bad practice. Now I have tried using the Garbage Collector here, but still the objects are not collected.

What is going wrong here? What did I miss?


Solution

  • Ok, I finally found my problem. I didn't start tracking with PyObject_GC_Track.

    Python requires some steps, when using the Garbage Collector:

    • adding Py_TPFLAGS_HAVE_GC to tp_flags
    • adding a tp_traverse and, if needed, a tp_clear functions
    • object creation with PyObject_GC_New or a similar function
    • calling PyObject_GC_Track on the fully initialised object
    • object deletion with PyObject_GC_Del or a similar function

    So here modifying the new function will suffice.

    static PyObject * new (PyTypeObject *type, PyObject *args, PyObject *kwds) {
        PYT *self = PyObject_GC_New(PYT, type);
        if (!self) return NULL;
        self->other = PyObject_GC_New(PYT, &Pyt2Type);
        if (!self->other) { Py_DECREF(self); return NULL; }
        self->other->other = (Py_INCREF(self), self);
        PyObject_GC_Track((PyObject *) self);
        PyObject_GC_Track((PyObject *) self->other);
        return (PyObject *) self;
    }
    

    With an output of

    <pyt.Pyt1 object at 0x7f4904fe1398> 4
    <pyt.Pyt2 object at 0x7f4904fe15c8> 4
    dealloc object at 0x7f4904fe15c8
    dealloc object at 0x7f4904fe1398