Search code examples
pythoncnumpypython-c-api

Python's C-extension with numpy randomly crashes after a few calls (5~10) from python code


To speed up custom MFCC calculations, I had to write some code in C, wrap it with Python's C API & numpy's C API, and compile it with disutils.core in order to import it from python code.

I was able to successfully compile, and execute, the code from python using a small sample.

However, once I called the same function with the same input ~10 times in a row using timeit OR a for loop, Python stopped without giving any error messages.

I don't think it's due to a memory leak, as the input data is 150kB, and I had gigabytes of spare RAM at the time.

The most baffling thing? It works when the function is called once or twice. It always seems to crash when called in the double digits.

Here is my code, reproduced:

#define PY_SSIZE_T_CLEAN
#include <stdio.h>
#include <stdlib.h>
#include "CMel.h"


typedef struct{
    int real, imag;
} COMPLEX;

static PyMethodDef CMelMethods[] = {
    {"mel_40", mel_40, METH_VARARGS},
    {NULL, NULL}
};

static PyModuleDef CMelModule = {
    PyModuleDef_HEAD_INIT, "CMel", "Python interface for C-code of integer-based Mel Spectrogram calculation", -1, CMelMethods
};

PyMODINIT_FUNC PyInit_CMel(void) {
    import_array();
    return PyModule_Create(&CMelModule);
}

static PyObject* mel_40(PyObject* self, PyObject* args) {
    /* Input: numpy array (nparrayin)
    *  Output: numpy array (nparrayout)
    *  Function: performs mel-spectrogram transformation of given pcm-style array
    *            Always assumes samplerate=16000, framelength=480, hoplength=240, coefficients=40
    */

    PyArrayObject* nparrayin, * nparrayout;
    int* carrayin, * carrayout;
    int numframes;

    // Check i/o types, throw error if something is wrong
    if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &nparrayin))  {
        PyErr_SetString(PyExc_ValueError, "[mel_40] Input array type mismatch!");
        return NULL;
        }
    
    if (nparrayin == NULL)  {
        PyErr_SetString(PyExc_ValueError, "[mel_40] Input array is NULL");
        return NULL;
        }
    if (!is_intvector(nparrayin)) {
        PyErr_SetString(PyExc_ValueError, "[is_intvector] Input array must be of type int16 or int32");
        return NULL;
        }

    // Copy numpy array to C array
    if (nparrayin->descr->type_num == NPY_INT16) {      // this is usually called
        carrayin = pyarray_int16_to_carrayptr_int32(nparrayin);
    }
    else { // if (nparrayin->descr->type_num == NPY_INT32) 
        carrayin = pyarray_int32_to_carrayptr_int32(nparrayin);
    }
    numframes = calculate_numframes(nparrayin->dimensions[0]);

    if (numframes <= 5) {
        PyErr_SetString(PyExc_ValueError, "[mel_40] Input data is too short");
        return NULL;
    }

    // Operate on arrays here
    carrayout = (int*)malloc((numframes - 5) * 40 * sizeof(int));
    Calculate_Melspectrogram(carrayin, carrayout, numframes - 5);

    // Create nparrayout for outputting to python
    const npy_intp dims[2] = {numframes-5, 40};
    nparrayout = (PyArrayObject*)PyArray_SimpleNewFromData(2, dims, NPY_INT32, carrayout);

    free(carrayin);
    // Ref: https://stackoverflow.com/questions/4657764/py-incref-decref-when
    Py_DECREF(nparrayin);
    return PyArray_Return(nparrayout);
}

int is_intvector(PyArrayObject *inputarray) {
    if (inputarray->descr->type_num != NPY_INT16 && inputarray->descr->type_num != NPY_INT32 || inputarray->nd != 1) return 0;
    return 1;
}

int* pyarray_int16_to_carrayptr_int32(PyArrayObject* inputarray) {
    // (int16) Numpy array -> (int32) C Array
    short* pyarray = (short*)inputarray->data;
    int* carray;
    int i, rows;

    rows = inputarray->dimensions[0];

    carray = (int*)malloc(sizeof(int) * rows);

    for (i = 0; i < rows; i++) {
        carray[i] = (int)pyarray[i];
    }
    return carray;
}

int* pyarray_int32_to_carrayptr_int32(PyArrayObject* inputarray) {
    // (int32) Numpy array -> (int32) C Array
    int* pyarray = (int*)inputarray->data;
    int* carray;
    int i, rows;

    rows = inputarray->dimensions[0];

    carray = (int*)malloc(sizeof(int) * rows);

    for (i = 0; i < rows; i++) {
        carray[i] = pyarray[i];
    }
    return carray;
}

int calculate_numframes(int numdata) {
    return ((numdata - 240) / 240);
}


void Calculate_Melspectrogram(int *inputarray, int *outputarray, int numframes) {
    COMPLEX *fftarray = (COMPLEX*)malloc(257 * sizeof(COMPLEX));
    int* window = (int*)calloc(514, sizeof(int));
    
    inputarray += 480;

    for (int i=0; i<numframes; i++) {
        memcpy(window, inputarray, 480 * sizeof(int));
        inputarray += 240;
        MFCC_Extract(window, fftarray);
        memcpy(outputarray, window, 40 * sizeof(int));
        outputarray += 40;
        
        //Reset fftarray
        memset(fftarray, 0, 257 * sizeof(COMPLEX));
        memset(window, 0, 514 * sizeof(int));
    }

    free(window);
    free(fftarray);
}

The function is called like so:

import numpy as np
import scipy.io.wavfile as wavfile
import timeit
from CMel import mel_40

samplerate, data = wavfile.read("./example.wav")

print(timeit.Timer(lambda: mel_40(data)).timeit(number=100))

The C code is compiled using the following: python3 setup.py install

setup.py:

import os
import numpy as np
from sysconfig import get_paths
from distutils.core import setup, Extension

python_paths = get_paths()

CMelModule = Extension("CMel", define_macros=[('MAJOR_VERSION', '1'), ("MINOR_VERSION", "0")],
                       include_dirs=["/usr/local/include", os.path.join(np.get_include(), "numpy"), python_paths['include']],
                    #    libraries=["python36"],
                    #    library_dirs=[python_paths['stdlib']],
                       sources=["CMel.c", "melcalculations.c"])


def main():
    setup(name="CMel",
          version="1.0.0",
          description="Python interface for integer-based FFT and Mel Spectogram calculation",
          author="FB",
          author_email="[email protected]",
          ext_modules=[CMelModule])


if __name__ == "__main__":
    main()

Note, MFCC_Extract() is where the MFCC extraction happens. It is too long to post here, but I have made sure it is working correctly, with no memory allocations happening inside of it.

Also, strangely enough, I have tried writing the contents of the output of mel_40 function to a file, and this gets it to work past the ~10 crash threshold.


Solution

  • Doing Py_DECREF(nparrayin) at the end of mel_40() is incorrect. In reference to PyArg_ParseTuple(), the Python extension docs say:

    Note that any Python object references which are provided to the caller are borrowed references; do not decrement their reference count!

    Here's some code that prints the refcount of the input array after each call to mel_40(), and the output with and without the decref:

    import sys
    import numpy as np
    from CMel import mel_40
    
    data = np.arange(0, (5 + 5)*240 + 240, dtype=np.int16)
    # add a couple extra references
    data_1 = data_2 = data
    
    print("initial refcount:", sys.getrefcount(data))
    for i in range(5):
        result = mel_40(data)
        print("refcount:", sys.getrefcount(data))
    

    With Py_DECREF(nparrayin):

    initial refcount: 4
    refcount: 3
    refcount: 2
    double free or corruption (top)
    [1]    753324 IOT instruction (core dumped)  python test.py
    

    Without:

    initial refcount: 4
    refcount: 4
    refcount: 4
    refcount: 4
    refcount: 4
    refcount: 4