Search code examples
pythonnumpypython-c-apirecarray

Access a Numpy Recarray via the C-API


If we have a Numpy recarray: x = np.array([(1.,2.)], dtype=np.dtype([('a','<f8'),('b','<f8')]))
We can access its fields in Python as:
x['a'] or x['b']
But if this array is passed to a C program as a PyArrayObject how do we access its fields?
I realize we can get the dtype in C via: PyArray_Descr *dtype = PyArray_DTYPE(arr)
PyObject *fields = dtype->fields but how can this be used to access the data at x['a']?


Solution

  • I'll try to answer my own question.
    It appears that you can use the function PyObject_GetItem() to access fields in your Numpy recarray. To test this I created a simple recarray with three fields:
    np.dtype([('field1', '<f8', (1,2)), ('field2', '<f8', (2,2)), ('field3', '<f8', (3,1))])
    I send this array to my C++ function and exectute two loops: one loop over each field and a nested loop over the array elements in each field (eg. x['field1'], x['field2'], x['field3']). In the outerloop I use PyObject_GetItem() to access each field. The code is as follows:

    C++ Code

    #include "Python.h"
    #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
    #include "arrayobject.h"
    #include <cmath>
    #include <iostream>
    #include <iomanip>
    using namespace std;
    
    static PyObject *readarray(PyObject *self, PyObject *args) {
        PyArrayObject *arr, *x2;
        PyArray_Descr *dtype;
        PyObject *names, *name, *x1 = NULL;
        Py_ssize_t N, i;
        NpyIter *iter;
        NpyIter_IterNextFunc *iternext;
        double **dataptr;
        npy_intp index;
    
        if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &arr)) {
            return NULL;
        }
        dtype = PyArray_DTYPE(arr);
        names = dtype->names;
        if (names != NULL) {
            names = PySequence_Fast(names, NULL);
            N = PySequence_Fast_GET_SIZE(names);
            for (i=0; i<N; i++) {
                name = PySequence_Fast_GET_ITEM(names, i);
                cout << setw(7) << left << PyString_AsString(name);
                x1 = PyObject_GetItem((PyObject *) arr, name);
                x2 = (PyArrayObject *) x1;
                dtype = PyArray_DTYPE(x2);
                iter = NpyIter_New(x2, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_SAME_KIND_CASTING, dtype);
                if (iter == NULL) {return NULL;}
                dataptr = (double **) NpyIter_GetDataPtrArray(iter);
                iternext = NpyIter_GetIterNext(iter, NULL);
                do {
                    index = NpyIter_GetIterIndex(iter);
                    if (index==0) {
                        cout << setw(6) << right << index << setw(9) << setiosflags(ios::fixed) << setprecision(4) <<**dataptr << endl;
                    } else {
                        cout << "       " << setw(6) << right << index << setw(9) << setiosflags(ios::fixed) << setprecision(4) << **dataptr << endl;
                    }
                } while (iternext(iter));
            }
            NpyIter_Deallocate(iter);
        }
        return Py_BuildValue("i", 0);
    }
    
    static PyMethodDef pyproj4methods[] = {
        {"readarray", readarray, METH_VARARGS, "Documentation"},
        {NULL, NULL, 0, NULL}
    };
    
    PyMODINIT_FUNC initpyproj4(void) {
        Py_InitModule("pyproj4", pyproj4methods);
        import_array();
    }
    


    Python Code

    import numpy as np
    import pyproj4 as p4
    np.random.seed(22)
    
    ## Python Implementation ##
    dt = np.dtype([('field1', '<f8', (1,2)), ('field2', '<f8', (2,2)), ('field3', '<f8', (3,1))])
    x = np.zeros(2, dtype=dt)
    
    for name in x.dtype.names:
        m,n,p = x[name].shape
        x[name] = np.random.randn(m,n,p)
        it = np.nditer(x[name], ['c_index'], ['readonly'])
        for num in it:
            if it.index==0:
                print '{0:6s} {1:6d}  {2: 2.4f}'.format(name, it.index, num.item())
            else:
                print '{0:6s} {1:6d}  {2: 2.4f}'.format(' ', it.index, num.item())
    print '-----------------------'
    ## C-API Implementation ##
    p4.readarray(x)
    


    The output in both cases looks like:

    field1      0  -0.0919
                1  -1.4634
                2   1.0818
                3  -0.2393
    field2      0  -0.4911
                1  -1.0023
                2   0.9188
                3  -1.1036
                4   0.6265
                5  -0.5615
                6   0.0289
                7  -0.2308
    field3      0   0.5878
                1   0.7523
                2  -1.0585
                3   1.0560
                4   0.7478
                5   1.0647
    

    If you know a better way to accomplish this, please don't hesitate to post your solution.