Search code examples
pythoncsegmentation-faultgarbage-collectionpython-extensions

segmentation fault Segfault python extension in c++


This extension is used to search data in a linked list and return it as a nested tuple of lists.[(ele1,ele2,),...]

However when the program runs for a period of time, segment fault will occur. I know that there might be something I'm missing with python memory management, but it's not clear to me exactly where the problem is.

header file:

#include "Python.h"
#define  PY_EXTENSION_H
#ifndef TESTFORSCORE_MAIN_H
#define TESTFORSCORE_MAIN_H
#define MAX(a,b) a<b?b:a
#define MIN(a,b) a<b?a:b
typedef unsigned char byte;
typedef struct{
    char* content;
    int content_len;
    int features_len;
    byte* features;
    long id;
}NODE;

typedef struct LISTNODE{
    LISTNODE* next;
    NODE* node;
}LISTNODE;

extern "C"{
static PyObject *compare(byte* features1,byte* features2,int min);
static PyObject *init_map(byte* features,int features_len,char* content ,int content_len,long id);
static PyObject *get_list_size(PyObject *self, PyObject *args);
static PyObject *search(PyObject *self, PyObject *args);
}
#endif

cpp

#define PY_SSIZE_T_CLEAN
#include "example.h"


static LISTNODE *head_of_map=(LISTNODE*) PyMem_RawMalloc(sizeof(LISTNODE));
LISTNODE *current_head=head_of_map;
long COUNTER=0;
static PyObject *get_list_size(PyObject *self, PyObject *args)
{
    return Py_BuildValue("l", COUNTER);
}
static PyObject *init_map(PyObject *self, PyObject *args){//
    PyObject  *obj1,*obj2;
    int len1,len2;
    long activity;
    byte *features,*tmp_b;
    char *content,*tmp_c;
    if (!PyArg_ParseTuple(args,"SSiiL", &obj1,&obj2,&len1,&len2,&activity)) {
        return NULL;
    }

    features=(byte *) PyBytes_AsString(obj1);
    content=PyBytes_AsString(obj2);

    LISTNODE *list_node=(LISTNODE*) PyMem_RawMalloc(sizeof(LISTNODE));
    NODE *node=(NODE*) PyMem_RawMalloc(sizeof(NODE));
    tmp_b=(byte *) PyMem_RawMalloc(sizeof(byte)*len1);
    tmp_c=(char *) PyMem_RawMalloc(sizeof(char)*len2);
    memcpy(tmp_b,features,len1);
    memcpy(tmp_c,content,len2);
    node->features=tmp_b;
    node->content=tmp_c;
    node->features_len=len1;
    node->content_len=len2;
    node->id=activity;
    list_node->node=node;
    current_head->next=list_node;
    current_head=current_head->next;
    COUNTER+=1;
    // err maybe here
    //Py_DecRef(obj1);
    //Py_DecRef(obj2);
    PyMem_FREE(tmp_b);
    PyMem_Free(tmp_c);
    return Py_BuildValue("b",0);
}

static double _compare(byte* features1,byte* features2,int min){
    //do something ...
    return   0.3;
}
static PyObject *search(PyObject *self, PyObject *args){//(byte* features,char *content,int features_len,int content_len,double threshold,int _idx){

    PyObject *obj1,*obj2,*tuple,*content,*matched_content,*score1,*_id,*matched_id, *lis= PyList_New(0);
    byte* features;
    char *c_content,*tmp_content,*tmp_matched;
    int content_len,length ,idx;
    double  threshold,score;
    LISTNODE *p = head_of_map;

    if (!PyArg_ParseTuple(args,"SSiidi", &obj1,&obj2,&length,&content_len,&threshold,&idx)) {

        return NULL;
    }
    features = (unsigned  char*)PyBytes_AsString(obj1);
    c_content = PyBytes_AsString(obj2);
//    Py_DecRef(obj1);
//    Py_DecRef(obj2);
    p=p->next;
    int counter=0;
    while (p!=NULL){
        counter+=1;
//      compare here
        score =0.3;
        if(score>threshold){
            //if(counter>1000) break;
            tuple      = PyTuple_New(5);
            score1     = Py_BuildValue("d",score);
            _id        = Py_BuildValue("b",idx);
            matched_id = Py_BuildValue("l",p->node->id);

            tmp_content = (char*)PyMem_Malloc(sizeof(char)*content_len);
            if (tmp_content == NULL)
                return PyErr_NoMemory();
            memcpy(tmp_content,c_content,content_len);
            content = Py_BuildValue("y#",tmp_content,(Py_ssize_t) sizeof(char)*content_len);

            tmp_matched=(char*) PyMem_Malloc(sizeof(char)*p->node->content_len);
            if (tmp_matched == NULL)
                return PyErr_NoMemory();
            memcpy(tmp_matched,p->node->content, p->node->content_len);
            matched_content = Py_BuildValue("y#",tmp_matched,(Py_ssize_t) sizeof(char)*p->node->content_len);

            PyTuple_SetItem(tuple,0,_id);
            PyTuple_SetItem(tuple,1,matched_id);
            PyTuple_SetItem(tuple,2,score1);
            PyTuple_SetItem(tuple,3,content);
            PyTuple_SetItem(tuple,4,matched_content);

            PyList_Append(lis,tuple);
            Py_DecRef(tuple);

            PyMem_Free(tmp_content);
            PyMem_Free(tmp_matched);
        }
        p=p->next;
    }

    if (PyErr_Occurred()){
        Py_DECREF(lis);
        return NULL ;
    }
    return lis;
}
static PyMethodDef exampleMethods[] = {
        {"get_list_size", get_list_size, METH_VARARGS, "example"},
        {"compare",compare,METH_VARARGS,"example"},
        {"init_map",init_map,METH_VARARGS,"example"},
        {"destory",destory,METH_VARARGS,""},
        {"search",search,METH_VARARGS,"example"},
        { NULL, NULL, 0, NULL}
};

static struct PyModuleDef ptexamplemodule = {
        PyModuleDef_HEAD_INIT,
        "example",           /* name of module */
        "A module that imports an API",  /* Doc string (may be NULL) */
        -1,                 /* Size of per-interpreter state or -1 */
        exampleMethods       /* Method table */
};
/* Module initialization function */
PyMODINIT_FUNC
PyInit_example(void) {
    PyObject *m;

    m = PyModule_Create(&ptexamplemodule);
    if (m == NULL)
        return NULL;

    /* Import sample, loading its API functions */
    return m;
}
int main() {
return 0;
}

setup file

from distutils.core import setup, Extension

setup(name='example',
      ext_modules=[
          Extension('example',
                    ['example.cpp'],
                    include_dirs = [],  # May need pysample.h directory
                    )
      ],
      version="0.0.1"
      )

test file

import example
import random
import time
import faulthandler
 
faulthandler.enable()
lis=[(b'\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2', b'\x856\x96\xda\xe4H_\xf9\xb3\x95\xff\xda\xc9\x05\x17\xd5'), (b'!\xdf\xc1\xc20h\xc3#\x8f\x8a\xd3\x92\xc9}\x1b\x08', b'\xa3\x03m\xd3\x96\x01\xb2\xe0\xfcOg\x87\xfa\xadA\x89'), (b'Fu\xe6\x97=\xb9\x88S\xe4Q\xc06\x9f[\x84J', b'`9\xf2- \x89\xc63?\xe3=\xf9o\x9b\xe4\x8e'), (b'\x95b\x1f\xba\x90\xfa\xe9j#\xd4\x12x\xc3\x93#\x1c', b'8f\xc4\xeb\xef\x95\xe8\x81\xec\xed1\x00j\xc2\xd2j'), (b'\xf9\x8bUk"\xa2y\xfbHI\x08\xf6\x03\xd3Ye', b'\xdf\xdb\xb6\x87\xf3\x05-p\x80%8\x8cd[43'), (b'\xee\x92\xcf\xb5\xd7\x05j\x92\xa2\x1c\xf7a.[\x05\x86', b'Y\x00|\x02\xa1\xb1wI\x08\xb3\xfc\xfc\xc7\xf5db'), (b"\x9d\xe0&\\~\xbf\xaf`\xae\xc2?'B\xfa\x95\x86", b'FL\xe3\xbb\x18\xeef\x08%\xe1\xc8,\xe3J=\xd5'), (b'\xe1\xfb\xc9n\x89\x1d\x9a\x9d\xe9\x1a\x9c\xde\xcd\xce\xbd\xcf', b'\xa7\xe0\x9b\x8bl\x88\x85,b\x04`3\xe6\x03\x85\x8e'), (b'\xc5\xbf\xd4\xa7\xe7e\xe5\x8a\xe0\xbc\xa8\xb8Yw\x0c\xd3', b":\xc2a\xb6\xd3\x1ct'z\xfe\xf9\xa7P\x8c\x1c\x7f"), (b'\xfd\xb2\x9aP\xc9\x0c\xcf\xe2\x8c\x82g\x8c{r\x94\x84', b'3B\xca\x8c\xd8\x9a\xb8\x94iyT\xd6\x05N_\x8a'), (b'\xea\x7f\x83\xaa\\\x10y\xd1h\x93l\xd9\xfe\x87\xe8j', b'jj\x8f\x1a-tq\xd9\xf1\xf8\xf7\xa5\xf6\x86\xb9\xdb'), (b'\xa4\x0b\x0f\xf1\x84\x95\x1eK\xddtH\xf0IaY:', b'p\xb5\xd9"\xb7\x0f(\r\xb1\x9bm\xc0hq\x00e'), (b'ga\x8f\xbbo\xff\xedQ\x9aM\xbe\x9cTO\xd35', b'=\xc3\x9d\x8d\xd0\xae8*1\x1d\xbf\x84\x89~76'), (b'\xaf\xe1%\xc6\xedo\xec24\xc0\x9d\xa8\x9fy=N', b'\x03m\x81\xd7B\x04\x1a*`C\xee\xccb\x1fm\xf0'), (b'\x1f\x10\xf6\x9fa\xc5\xbf\xaf\x18\xb1Mw\xba\x92&\xe1', b'\xb7\x90/\r$\t\xce}xk\xe7\xbeL\xe9\x8e\xc6'), (b'\xc8.\xd9)\x08g/\xc6\x0e\xed\xbb\xb9\xab\xad\xc8\xa1', b'\xa1c\xcc\xe9\xaasJ\x98\x1d\xd24\xc0\xf2+\x9d\xac'), (b'\xa1\x91\xa0:D\x11(\xb3\xbc0\x13\xd9\xdeD\xb9\x86', b'\x0fV\xa1\x8fl\x0c\xf4\x94=\xe7\xd0*\xdb\xcb\xa5;'), (b'bx;\x8dA\x83\xb8lca\x9c\xadV\xc2\xba/', b'\xe9\xe0\xe7({\x11\r*\xca\xf1\x08\x9bG\x830\x82'), (b"\xde\xbb\xd9\xce{%MP'Xj\xed8>\xa3a", b'\\5M\x13\x13\xb1YY\xa53y\x99$a\x91\xa5'), (b'\xf1\xda\xc4\xfe\xc6\x83\xe6\xf3\xc6\x88\x86H\xc4\x15\x12\xb4', b"\xf8'^\xe5\xb8`\x0fl\x06$\x1cz:\x93\xa1\\"), (b'\xc7@\x03Ny\x11\x96\xcb\xfe\x19\x0e\x18\xbf\xd59I', b'P\xf0\x0f\xec\xaf\x1bS\xa8\x8dt\x11O\xe8\x0f|2'), (b'\xb4\xda\xff\x8c0\x18/\xf1\xad\xa4F\x94\xb1\xad\xf4\xf1', b'.\x83(\x8f#\xe1\x11\x96\xdeS\x17\x8b\xe3]\xb5\xca'), (b'\x1d\t\x11\xc1\x15\xdc\x8e\xea\xe1\xd2\xba%J\\\xf2\xfd', b'\x070ht\xb4\xdeQ+\xc5\xb8#\xac\xd9z\xc0A'), (b' V\xe3\xc5gEg\xc5\xd5\xe1\xd6\xd7\xab\xd0\xb6n', b'\x88\x9b8\xf6\xe26\x92z\x91\xe4\x92\x1e\xbc\xc5;u'), (b'\x88}\xd7*QFn:\xd7\x12RA\xe6-\xd2)', b'\xeb\x04<\xe6\xfa)u\x9ds\xecj\xa7\x84\xcf\xa7\xa7'), (b"\xec\x04.bP)'\xb6\x08\x05\x92\x8c\x85\x1a\x84\x8f", b'@\x80\x08A\xba\x9c2Sd\xdf\x97\x07\xc4Im\xef'), (b'\x11(\x9ez\x0c\xd7\xca+7\xcc\xb0\x19,:\xcb5', b'\xf9V\x8cm\xe5-\xf9\xf1\xf6g\x86~\x8f\xd0<\x84'), (b'\xbb\xe9I=\xc74\xc9C\x0cT\x81\x9d\x85\xdeWF', b'\xfe\x1b\xb1gi\x94O\x98\xf0\x90cJ\xdb\xbf\xc7\x1e'), (b'2`\x9d\x9c\x02\xe4\xce\xf0\x14\xc0Z\xaaW\xf2\xa5\xff', b'\xe6\x05?\xe3X"\xb2O)~\x96\x8f^\x9c\x077'), (b'\xaf\x87,a\xa9\xcd\n|O\x1e\xa7\xdd\xba\x89;\x9d', b'\x98b\xac?\x13*\xf2m\xfeo=v\x96\xd5\x14\x8d'), (b'\xee\xcb{\xeeRQ\x82}\x05|\xc6S\x82\x85\xa4\xff', b'C\x93\xdes\x00\xe8cY<\xde1.\x19\xd8\xe5R'), (b'\xack\x036\x9b4\xca\xa4\x15\xbd\xd5\xbe\xd3\xd4\xfb{', b'm?\xc44>\xf4qig\x10\x8c\xeb\x11o\x99\xb4'), (b'D!\x0e*\xb3/<\x15F_\xd6;j\x85\xbe,', b'Y\xb01H\xb6:\x04_]OL\x9d\x8e\xbcp\xdc'), (b'\xe6\x9d\x02L[\x1c\xee\x8b0lu\xdd\x00\x9a\xfa!', b'J&\tkf\xa8\xeb\x1b\x90\x81\xce\\\x1b\xe8\xb6\x1d'), (b'\xcdnd\xf6\xf6\x11\x9e\x06\x10\xce\xcfP\x89\xed\xad\x02', b'I:\xe0{8H\xfcw\xb1\xe9v\x11p-/0'), (b'\xa22?\xcd\xfbU+\xbf- \xe3<\xc2\x94\xef\xb6', b'\xc7\x89\xc4K?{\x18\x1ftd\x0f\x19L}\xba\xf9'), (b'8\xf9\xe9\xd0\xa0\x97\x8e\rp\xe9\xd6[\x8e`\x93\x81', b'\xf7\x16"\xaa\x86\\b\x9d\x0c\x17"\x81\xeea\x03\x19'), (b')\xfb\x0c\xc7-D\xd8\xa4\xf7\x8d%\x02\x06\xa1\xbf\x94', b'\xc8s!s\xe2n\xee\xb0\x1eC\x15\xc0\xb4d\x08\xa7'), (b'\x7f\xa6\xdc\xf4\x9e\xe3\xda\xd3\x8a\xcb\xfc\x98,1\\\xe5', b'\x82,\xfbKL_\xe9\xc9L`\xc7"&3#\xb5'), (b'&\x98,\x88\xff\x0e\xca[\xcb\xc6t;\xbc5Y"', b'\x1b\x8bA\xd1\x8a\x9e\xe6\xce\xc4#C\x89\xe9Uon'), (b'\x1c\x1b\xac2\x86\xd6\x9a\xf9\xdd\x8d=\xa3\xff\x9a\xe1\xa5', b'\xed\x10\xed\xc1\xfe\xec\xd3\x0b\x8b\xe9\xd4?H"\xd6\x8f'), (b'9\x8f3\x8b\x02\xb7\xe7\xe5\xa1TO\x1a\xb6N\x0b\xb6', b'\xb4\x0f\xa7 \xc7\x83\x81\x18\x9cU\xfd2\x93\xb8(\xe9'), (b'\x8d\xbe\xcf`\xc3d\xde\xca\xf4\n.\x92Mrw\xa6', b';\t\xfb\x19eKt0\x8foG\xcd\xd5\xe0\xf6\x9d'), (b'd\xbcG)\xec\xc8\x04 \xdd\xd9\xf6\xd6\x91/\xb2\xd3', b'2\xba\xa8\x8a\xc5\xd6h\xab\x9c\x9e\x8b\x1b5\xcc_.'), (b'\x98\xea\xf0\xcbJ-c\xea\x81\xcf?B\x17\xcb\x99\xf9', b'\x82\x05\x14\xf2\x05q\x97\xf7Z-B\xad\xa2\x16\xa0`'), (b'\x1c\x94A\xfd\xe1\xf2\x06\x8b\x19\x80\xfa\x01g\xc3V\xaf', b'\x1c\xde\xec\xdf&\x93u\xab\xf9\x12\xfb\x04\xbc0\x99d'), (b'6\xfa\x993b\t\x12E\xd2t\xde\xb6\xfa\x98Im', b',\xa2\xf7VB\x81\x048\xc8\xbf3\xce\x8d\xecn\x82'), (b'L\xd6\x93\xbe\x02\xbeT\xa1\x8d\x88\x84\x18\xc7\xcf\x9fE', b'\x16i"\xb7\x81\xef7\x7f\xdb\x7fY\xbft\x1b\xac\xd0'), (b'\x90\xd6\xe7>Z\x8b\x83\x14\xab\xefH\xa1\x95\xe6\x8e\xc3', b'\xebJ\xfcXd\xd75I@\x12\xe9\x88\xdcG\x15\xb7'), (b'\xb8\xd6\xd03j\xed\xf9\xa0\xb1L3\x8d\x89P\xb4\xe5', b'\xe3\xdf)CR\x03s\x19\x8f\xb5\xc6\xf5\xee&W\xee'), (b'cx\xd7dQ\xc3)YR\x9c7\xf0\xb4\xe4P\xfd', b'h\xa8\x122\xceV\x8c\xf4\x12\x14J\x08\x91\xcf\x93\xfa'), (b'\x1d\x14\x86\xccTi\xdd\xc3L!\x97\x92\xb3\xb7X$', b't\x84\xa4\xe72\xa5X\xe9Ta\x03\x0c\xb00\x07\x83'), (b'\xa6\xdd\x1f\xfc\xca2\x88\x99\xbeA\x9a\xa6JVJ\x84', b'B\x81\x8e-K\xde\x15\xfd\t"\x08\x04@/-\xc9'), (b']b\x18\xe65C\x17*\x103J?\xe79!\x9b', b'\n\xbb\x1f\x98\nZZ\x97\xe0\x12\xf6W\xc4\x1e_~'), (b'\x9a\x9f-c\t\x14\xa4NL\xc5\xc0\x99R\x15\x94\xca', b'\xa0\xf9\x11\xfa\xd7\xf3\xb5\x11\xb2SK\xe8\xdb\x06\x94\x1f'), (b'!\xf0\x90\xd8t_O\xc1\xb5\xfe}\xca\xa6\x15}\x19', b'\xc9+\x89a9y3\xe0\xffh\xedmMq\x13\xcc'), (b'`\xf1\xab\x07A\xe9\xe5\x805\xf2\xabD8\xce\x85A', b'\x9d\x0f\x91\xc2\x01\x93\x8fm\xd6\x03XZl\t\x15J'), (b')\r\xcf\xc0\xa1\xa9\xf7i\x94\xa9\x8d\xed\xea\xcd\xb0B', b'jq\xe9\x01*H\x14\xa7M\x8c\xd2\xa5\x19\\\x80\x12'), (b'vJ\x98\xbf\xd5D\x15\xdb\xa1\xd7\xdc\xaf\xa0\xaa\xf7\xc1', b'\x06\xd8-\x05\xfe\x172W\xa4\xab\xe4\x97_h\xe6\xac'), (b"\xc6'P\xd2\xfcq\xcb\n\xe2\xc1sN>\xbc\x9a\x08", b'\xacysx4z\x96\x8f\xf1\xa5M\x9e\xe7f\\\x16'), (b'\x00\xc6\x81\xf0\xf1\xcb\xa8_\x1fF\x18\xe9\xb0\x95\xa3\xab', b'N\xd8\xdaz\xe8F4\xb1\x88>\xec\xd6[\x15\xd7\xfd'), (b'E\x1ee\x8f\xd5\x0f\x19\xa8\xb6~-\xe0n\xe7\x89\xe1', b'\x83\x90\xa0\x93\xec\x086+\xcf\x08\x9c\x048\x85\x88^'), (b"\x82\x13\xfa\x9f 6\xd3'\xd7\xd2\xc1\xf9\x03.\x13*", b'J\xe2\xf8\x1e\xec\xeb\xd9"q\xbb:\x04\xc2\xdf<\xa8'), (b'\x01\xdf\xfcPW\xc80&{\xfe\x99\xf8\x10\xc6R\xce', b'=Cnh3\x9drKE\x1aU\x8e\xcc\x84jS'), (b'<\xeeo[J\xb0r0N\x07\xf4\x1e\xfb\x16\xac\x07', b'\xf9KP\xfe\xd2\xed\xc7`\xc7\x0c\x9et\xd3^\xc9g'), (b'P\xa1b#\x07V\xb0s\x86S\x9a\x1d[\xd9\xe7\x92', b'r&\xb1a\x0ff2\xd2\x8ft\x13\xff\xba\x0fy\xf4'), (b'\xc8t\xcc\x96V\x8bS\xde\x98\xf1\xa33\xa9\xdb\x1b\x85', b'13\xf1&\x8c\xfe3\xf9\xca\\\xc6\xdb\x1dT\x04b'), (b'\xb9u\xf2O\xfb\x05\xb3\xc2r*\x17d\\\x8f9M', b'\xeeQ\xf9&\xc5\xc8\xc8\xe6B\x1d\xcf\x184\x19]b'), (b'\x96\xe6\xc9\x1e;\x83e4\xb0\xbe\xf6\x94\x03\x06\x85:', b'\xa2\xb0\nS\xaf\xads.\x94\x16s\x901\xaf-\xe4'), (b'&\x81t\x048(\xd7 \xb6\x1aR\xaa\x98\xdbN\x0b', b'\x9cd\x8a#\xdd \xdcS\x14\x1e\x1b\xea\x87\x1b\xa41'), (b"\x1fC\x0cr,\x1d\xab\xa8\xd9\xfc\xa1\xa2\xbb\xcdw'", b'D\xb6)\x7f\xd7\xe6\xb2@\x18D\x1d\x08c\xce2k'), (b'$\xc1\xabD\r\xe0a\x0co\x12\x17\xbdB\x06\xc9y', b's\xd8\xf8\x05\x81\xfeY\xc4\xc8|\xe5\xe2]U\xfa"'), (b'\x8f\xe3\xd6\xc9d\xc4\xf0\xd0\xe3\x17\x12\x82\xb2\xfd\xe9\xef', b'\xf1\r/9\x97/\xd4\xf6dz\x89\x7fEG/\x15'), (b'\x16\xf2\xd8\x9a\x029\x9b{\x02\xf4u\x08~\x80\xb9Z', b'\xe1+:9\xaa\x87:\xf0\x02\xdd\xe5}G\x0ch\x98'), (b'\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO', b'\x13\xd0\x1c[\xe6\x8aa\xd6\xa0a\xb1X\x84\xb8\xde2'), (b'\xa8\xaa\xa8\xe7\xcal>\xe6\xdb\x18vL\xdak%}', b"\xe5W'\x83\xbd\xf2\x01\x9b\x89\x98l\xfd\xcf\x02\x84\x13"), (b'6\x9b\xae\x07\xe0+\xa0\xcd\xdbT\t\x02@\x81$E', b'\x10Q\x1an\xaa\xe7\x9cy\x04t\x16\x1d\xea\x9d\xd2\xe6'), (b'ne\x83\xb0\xe2\x8aG\xda\xaf%\x93cg\x84\xaf\xea', b'p!}\x8aQ(C\x08\xaf\x8bi\x80\xdaw\x7f\x81'), (b'<\xa6\x90\xb2\x03\x8f\xa4\x17$\xc7\xbe\x8a\x1e!\x01s', b'\x19\x7ftaaLUB\x1aBt\xf5U~\xeeY'), (b'\xe4|z\xaer8\xa0\xa3\xc8\x9c\x8d\x03`\xb8rN', b'b\xda\xf8e\n\x1f\xc1\xb2\x97\x13\xd7\x91\x91\x86\xbe\x13'), (b'\x00\x84\xc1\xd8\xe0ub\x1a\xc0\xa1\xfe\x08K\xfb>,', b'g\xb06$\xbb{s\xb9-\x8e5h|+\xf8\xb1'), (b'(\xac{\xb1^OOy\xc98\xf5\xc5\xc2\xd3\xb3\x1e', b'\x8d\xfc\x0f\xda\xd5\xa6\x89)\x92\x173\x1e)*\xf4&'), (b'G*.\xe2\xd8\xee\x87\xfa\x83~eS\xcc\x1e\xdeQ', b'\xc9y \x1a]Y\x8d\xf8\xa0\xd0^p\xc1\xf1\xa4\xd2'), (b'\x16\x91\xd7[\xbeC\x80T\x08\x0f\xbd\xe1\xf7\xbd\x1b\xb8', b'\xcc\x8c\xfb\x1f"\x16\xa7/\xe2inK\xaa\xdf\xc0\xe4'), (b'\x1d\xbc\xf2\x9b\xe90\x13\xc9\tuuk\r\xe4\xa9\xfa', b'\xf6\x97\xf2B\xaa\x83\xf9k\x15K\xa0\xb9\xdf\xf4,\x02'), (b'hIA(;\x8b\x92G!K\xca\x1a\xfd\x8c9\x95', b'0!V\xfc\xfb\xa1,R\xc6\xdaO\x9c\x16\xa8<\xe2'), (b'?\xd8\xd7e\x88\xad\xcd\xd6\x8d\\\x1e\xc2l$;\xd8', b"\x88\x8e\x86'#f\xcf\xe3,\x96&\r\xb1\xee@g"), (b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", b'\x99\xbf\xf9qo\xde\x92xge\x04\x84\xb8 \xfb\xe3'), (b'\xa7\x8d\x83\xc8\xa3\xc15\xdc`\x82\xed\xae.\xea\xf2\xea', b'[\xe8{\r\xd6g\xcf\xe8L\x16\xde\xcd\x90M\xbd\x9e'), (b'\x1a\x1b;\xa62\x10i\xb2\x19n\xc1\xddKz\xb4\xd8', b';On\xd2N\xef%B\x05_T9)IG\xcf'), (b'@g`\x1e\xc1\xdf\x14A\xc1\x04N\xb6]\x97\xd3\x17', b'\xbc6\xcb\xaah*\xf1\xd0_\xd9B\x94F\xa8\xd8\r'), (b'<\x86\xa4z\xb4\xaeS\x90\xa9\xb5I\xe8\xe6FI\x8c', b'\r2\xd2.H"\xfb\x9d}{\x11^\xdf\xcb\x186'), (b'\xf0\xb8J|\x0b\xfe\xcd\x8b\xc0$&"8\xffob', b'\xa8}\xf31\xdd\x07\xc6\xa0\xbb\x9f0\xca\xe3&^\xf6'), (b'r\x9b\x81\xa0D\x94\x80\xfdu\xb1O\xbc\xf6\x1a\xd8\xe0', b'\xd4)\xb6\xdbQ\xa5#\x16\xed9\xadM\xf82%\xeb'), (b'\xb9V\xb7R\xb9\xcdJ9c\x1a`V\xb9\xb0oX', b'\x9a\xff]\xb3\x83\x96\xf5\r\x98\xe2\xc5\x86\xa21k%'), (b"J\xe9\xaa\x8f\x07\xab\x90\x18'\xa6\x85\x08\x18\xa8\x96?", b'\xa7\x83\xed\xd9\t\xd8\x85\xe4\xf8\xcb0\x95n\xf2`\xa8'), (b'\xc3\xaf\xc8m\xfc\x90}=\xb8\x1fr\xab}\r\xc1!', b'\x04\xa0sq\xf7\xd3Dm\xb9\xb8\x9c\xbap(\xa1L'), (b'\x97bq\x0f\x93\xb5\xa1\xa1\xf58\xdcu5\x9a\xbd\x8b', b"\x07'\xc0\xdb/0\xd9H\x81\x06\xe2^ `F\xa9"), (b'Nn\xd4\xbd\x13dN\x9e\xcd\xd4\x06kk~\xf5\x7f', b"\x98j'}\xc5\xb0\x1d\x19\xe2/\x92'\x9e\xd4\xba("), (b'\x08ty\x84\xde>\x0f5jL6\x05\xe1\x02pH', b'0\xb9\xd5\xd40u\x04\xfdK\xe2D\xbc\xddy\xd4\x11')]
def cost(func):
    def wrapper(*args,**kwargs):
        start=time.time()
        res=func(*args,**kwargs)
        end=time.time()
        spend=end-start
        print(f"cost:{spend}")
        return res
    return wrapper
@cost
def test(repeat):
    tmp=[*lis*repeat]
    if example.get_list_size()<4000000:

        for it in tmp:
            t=b'something here'
            example.init_map(it[0],t ,16,len(t),int(random.random()*10000))
 
    print(f"pool size is {example.get_list_size()}")
    print(f"get args from lis start")
    print(lis)
    arg1=lis[0][0]
    print(f"get args from lis complete")
    t=b'something need search'
    print(f"start search")
    res=example.search(arg1,t,16,len(t),0.1,int(random.random()*10000))
    l=len(gc.get_objects())
    print(f"res is {len(res)},gc:{l}")


if __name__=='__main__':
    for i in range(10000):
        test(10)

error

pool size is 1000
get args from lis start
Fatal Python error: Segmentation fault

Current thread 0x00007f0ca3c90340 (most recent call first):
  File "/tmp/tmp.ztKz0S1yKb/test.py", line 29 in test
  File "/tmp/tmp.ztKz0S1yKb/test.py", line 12 in wrapper
  File "/tmp/tmp.ztKz0S1yKb/test.py", line 41 in <module>
Segmentation fault (core dumped)

I tried using PyMem_RawMalloc and Py_DecRef but still segfault. what should i do?

Sorry for the little misinformation I gave. I may have located where the problem is: problem here

Everything works when I comment out Py_DecRef ,otherwise it throws a seg fault exception:

gdb backtrace,
print info

I don't understand why Py_DecRef can't be used here.


Solution

  • There are some things wrong:

    • C and Python code have errors, so it doesn't compile / run - not an MCVE ([SO]: How to create a Minimal, Reproducible Example (reprex (mcve)))

    • There is a mix of raw (malloc) and pymalloc memory functions

    • Mismatches between types and format specifiers

    • C API misunderstanding. According to [Python.Docs]: Parsing arguments and building values (emphasis is mine):

      Note that any Python object references which are provided to the caller are borrowed references; do not decrement their reference count!

    • Design flaw mixing C and Python types. There is the C list that is supposed to be the backbone, but it's tightly coupled with Python structures. Also attempting to export the functions from the .dll makes me think that things are not very clear for the OP. I'd suggest to keep the core functionality at the C level and provide wrappers for interacting with Python. Otherwise, keep switching from one to another would lose the speed advantage (which I assume is the reason for not writing everything in Python)

    • The question is tagged C++, but there's nothing C++'y about the code, it's rather C (in C++ there is std::list, smart pointers and other features that could save writing a lot of code)

    • Minor ones:

      • LISTNODE - NODE nesting. Although it looks better structured, the overhead for working with nested structures might be a greater loss

      • Passing the size of bytes (which is held inside it)

      • Method arguments specifications

      • Code style

      • Many many others

    Before going further, I suggest a thorough reading of the following (including referenced URLs):

    Now, there are multiple problems, each with multiple solutions.
    Starting from your code, I prepared an example. It's written in C (lots of code which doesn't have anything to do with the goal).
    All the business logic is carried out in C (it's callable from a C application), and the Python wrapping layer is on top of that.

    • dll00.h:

      #include <stdint.h>
      
      #if defined(_WIN32) && !defined(PY_MOD_BUILD)
      #  if defined(DLL00_STATIC)
      #    define DLL00_EXPORT_API
      #  else
      #    if defined DLL00_EXPORTS
      #      define DLL00_EXPORT_API __declspec(dllexport)
      #    else
      #      define DLL00_EXPORT_API __declspec(dllimport)
      #    endif
      #  endif
      #else
      #  define DLL00_EXPORT_API
      #endif
      
      
      typedef uint8_t byte;
      typedef unsigned long ulong;
      
      typedef struct Node_ {
          byte *features;
          ulong features_len;
          char *content;
          ulong content_len;
          long id;
      
          struct Node_ *next;
      } Node, *PNode;
      
      typedef struct {
          size_t len;
          PNode *nodes;
          double *scores;
      } FoundData, *PFoundData;
      
      
      #if defined(__cplusplus)
      extern "C" {
      #endif
      
      DLL00_EXPORT_API int addNode(const byte *features, ulong features_len, const char *content, ulong content_len, long id);
      DLL00_EXPORT_API size_t size();
      DLL00_EXPORT_API PFoundData search(const byte *features, ulong features_len, const char *content, ulong content_len, long id, double threshold);
      DLL00_EXPORT_API void freeFoundData(PFoundData data, int membersOnly);
      DLL00_EXPORT_API size_t cleanup();
      
      #if defined(__cplusplus)
      }
      #endif
      
    • dll00.c:

      #include <math.h>
      #include <stdio.h>
      #include <stdlib.h>
      #include <string.h>
      #define DLL00_EXPORTS
      #include "dll00.h"
      
      
      static Node *gHead = NULL;
      static Node *gCur = NULL;
      static size_t gLen = 0;
      
      
      int addNode(const byte *features, ulong features_len, const char *content, ulong content_len, long id)
      {
          Node *node = malloc(sizeof(Node));
          if (node == NULL) {
              return -1;
          }
          node->features = malloc(sizeof(byte) * features_len);
          if (node->features == NULL) {
              free(node);
              return -2;
          }
          node->content = malloc(sizeof(char) * content_len);
          if (node->content == NULL) {
              free(node->features);
              free(node);
              return -3;
          }
          memcpy(node->features, features, features_len);
          node->features_len = features_len;
          memcpy(node->content, content, content_len);
          node->content_len = content_len;
          node->id = id;
          node->next = NULL;
          if (gCur == NULL) {
              gCur = node;
              gHead = node;
          } else {
              gCur->next = node;
              gCur = node;
          }
          ++gLen;
          return 0;
      }
      
      
      size_t size()
      {
          return gLen;
      }
      
      
      static double compare(const byte *features0, ulong features0_len, const byte *features1, ulong features1_len, int min)
      {
          // @TODO - cfati: Dumb comparison
          if ((features0_len > 1) && (features1_len > 1) && ((features0[0] == features1[0]) || (features0[0] == features1[1]) || (features0[1] == features1[0]) || (features0[1] == features1[1]))) {
              return 0.3;
          }
          return 0;
      }
      
      
      PFoundData search(const byte *features, ulong features_len, const char *content, ulong content_len, long id, double threshold)
      {
          if (gCur == NULL) {
              return NULL;
          }
          Node *node = gHead;
          FoundData tmp = {0, NULL, NULL};
          tmp.nodes = malloc(sizeof(PNode) * gLen);
          if (tmp.nodes == NULL) {
              perror("malloc failed 1.");
              freeFoundData(&tmp, 1);
              return NULL;
          }
          tmp.scores = malloc(sizeof(double) * gLen);
          if (tmp.scores == NULL) {
              perror("malloc failed 2.");
              freeFoundData(&tmp, 1);
              return NULL;
          }
          while (node != NULL) {
              double score = compare(features, features_len, node->features, node->features_len, 0);
              if (score > threshold) {
                  tmp.nodes[tmp.len] = node;
                  tmp.scores[tmp.len] = score;
                  ++tmp.len;
              }
              node = node->next;
          }
          if (tmp.len == 0) {
              return NULL;
          } else {
              PFoundData ret = calloc(1, sizeof(FoundData));
              if (ret == NULL) {
                  perror("malloc failed 3.");
                  freeFoundData(&tmp, 1);
                  return NULL;
              }
              ret->nodes = malloc(sizeof(PNode) * tmp.len);
              if (ret->nodes == NULL) {
                  perror("malloc failed 4.");
                  freeFoundData(ret, 0);
                  freeFoundData(&tmp, 1);
                  return NULL;
              }
              ret->scores = malloc(sizeof(double) * tmp.len);
              if (ret->scores == NULL) {
                  perror("malloc failed 5.");
                  freeFoundData(ret, 0);
                  freeFoundData(&tmp, 1);
                  return NULL;
              }
              ret->len = tmp.len;
              memcpy(ret->nodes, tmp.nodes, sizeof(PNode) * tmp.len);
              memcpy(ret->scores, tmp.scores, sizeof(double) * tmp.len);
              freeFoundData(&tmp, 1);
              return ret;
          }
      }
      
      
      void freeFoundData(PFoundData data, int membersOnly)
      {
          if (data == NULL)
              return;
          free(data->nodes);
          free(data->scores);
          if (!membersOnly)
              free(data);
      }
      
      
      size_t cleanup()
      {
          size_t ret = 0;
          Node *node = gHead;
          while (node != NULL) {
              free(node->features);
              free(node->content);
              Node *next = node->next;
              free(node);
              node = next;
              ++ret;
          }
          gHead = NULL;
          gCur = NULL;
          gLen = 0;
          return ret;
      }
      
    • main00.c:

      #include <stdio.h>
      #include <string.h>
      #include "dll00.h"
      
      
      void handleFoundData(PFoundData data)
      {
          printf("Found data (%d items):\n", data == NULL ? 0 : data->len);
          if (data == NULL)
              return;
          for (size_t i = 0; i < data->len; ++i) {
              printf(" Index: %d\n  Score: %.03f\n", i, data->scores[i]);
              printf("  Id: %d, Features length: %d, Content length: %d\n", data->nodes[i]->id, data->nodes[i]->features_len, data->nodes[i]->content_len);
              printf("  Feats: %s\n", data->nodes[i]->features);
              printf("  Cnt: %s\n", data->nodes[i]->content);
          }
          freeFoundData(data, 0);
      }
      
      
      int main()
      {
          Node dummies[] = {
              {"1\02", 3, "567\0890", 7, 2},
              {"ab\0c", 4, "d4567\0890", 9, 3},
          };
          printf("Element count: %zu\n", size());
          for (int i = 0; i < sizeof(dummies) / sizeof(dummies[0]); ++i)
              printf("Add node (%d) returned: %d\n", i, addNode(dummies[i].features, dummies[i].features_len, dummies[i].content, dummies[i].content_len, dummies[i].id));
          printf("Element count: %zu\n", size());
          // @TODO - cfati: Values based on .dll's dumb comparison
          byte *src[] = {
              "xxx",  // None
              "111",  // 1st
              "xaa",  // 2nd
              "1bd",  // Both
          };
          for (int i = 0; i < sizeof(src) / sizeof(src[0]); ++i) {
              printf("Search for '%s' ...\n", src[i]);
              PFoundData found = search(src[i], strlen(src[i]), "", 0, 0, 0.1);
              handleFoundData(found);
          }
          printf("Freed %zu nodes\n", cleanup());
          printf("Element count: %zu\n", size());
          printf("\nDone.\n\n");
          return 0;
      }
      
    • example.c:

      #define PY_SSIZE_T_CLEAN
      #include <Python.h>
      
      //#define PY_MOD_BUILD
      #include "dll00.h"
      
      #define MOD_NAME "_example"
      
      
      static PyObject* example_size(PyObject *self, PyObject *Py_UNUSED(args))
      {
          return PyLong_FromSize_t(size());
      }
      
      
      static PyObject* example_add_node(PyObject *self, PyObject *args)
      {
          PyObject *feat = NULL, *cnt = NULL;
          long id;
          if (!PyArg_ParseTuple(args, "SSl", &feat, &cnt, &id)) {
              return NULL;
          }
          int ret = addNode((const byte*)PyBytes_AsString(feat), (ulong)PyBytes_Size(feat), PyBytes_AsString(cnt), (ulong)PyBytes_Size(cnt), id);
          return PyLong_FromLong(ret);
      }
      
      
      static PyObject* example_search(PyObject *self, PyObject *args)
      {
          PyObject *feat = NULL, *cnt = NULL;
          long id;
          double thres;
          if (!PyArg_ParseTuple(args, "SSld", &feat, &cnt, &id, &thres)) {
              return NULL;
          }
          PFoundData tmp = search((const byte*)PyBytes_AsString(feat), PyBytes_Size(feat), PyBytes_AsString(cnt), PyBytes_Size(cnt), id, thres);
          if (tmp == NULL) {
              return PyList_New(0);
          } else {
              PyObject *ret = PyList_New(tmp->len);
              if (ret == NULL) {
                  freeFoundData(tmp, 0);
                  return NULL;
              }
              for (size_t i = 0; i < tmp->len; ++i) {
                  PyObject *tup = Py_BuildValue(
                      "(ly#y#d)", 
                      tmp->nodes[i]->id,
                      tmp->nodes[i]->content,
                      tmp->nodes[i]->content_len,
                      tmp->nodes[i]->features,
                      tmp->nodes[i]->features_len,
                      tmp->scores[i]);
                      // @TODO - cfati: No need to include data passed in arguments in each of the output tuples !!!
                  if (tup == NULL) {
                      Py_XDECREF(ret);
                      freeFoundData(tmp, 0);
                      return NULL;
                  } else {
                      PyList_SET_ITEM(ret, i, tup);
                  }
              }
              freeFoundData(tmp, 0);
              return ret;
          }
      }
      
      
      static PyObject* example_cleanup(PyObject *self, PyObject *Py_UNUSED(args))
      {
          size_t ret = 0;
          //Py_BEGIN_ALLOW_THREADS
          ret = cleanup();
          //Py_END_ALLOW_THREADS
          return PyLong_FromSize_t(ret);
      }
      
      
      static PyMethodDef methDef[] = {
          {"size", example_size, METH_NOARGS, "Get List size"},
          {"add_node", example_add_node, METH_VARARGS, "Add node"},
          {"search", example_search, METH_VARARGS, "Search elements"},
          {"cleanup", example_cleanup, METH_NOARGS, "Clean up"},
          {NULL, NULL, 0, NULL},
      };
      
      
      static struct PyModuleDef modDef = {
              PyModuleDef_HEAD_INIT,
              MOD_NAME,  // name
              "A module that imports an API",  // Doc string (may be NULL)
              -1,  // Size of per-interpreter state or -1
              methDef,  // Method table
      };
      
      
      PyMODINIT_FUNC PyInit__example() {
          PyObject *m;
      
          m = PyModule_Create(&modDef);
          if (m == NULL)
              return NULL;
      
          /* Import sample, loading its API functions */
          return m;
      }
      
    • setup.py:

      #!/usr/bin/env python
      
      from distutils.core import setup, Extension
      
      
      setup(
          name="example",
          ext_modules=[
              Extension("_example",
                        ["example.c", "dll00.c"],
                        include_dirs=(),
                        define_macros=[
                          ("PY_MOD_BUILD", None),
                        ],
              ),
          ],
          version="0.0.1",
      )
      
    • data.py (I just extracted input data to avoid polluting the other source file):

      data = [
          (b"\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2", b"\x856\x96\xda\xe4H_\xf9\xb3\x95\xff\xda\xc9\x05\x17\xd5"),
          # @TODO - cfati: !!! DELETED NEXT 20 TUPLES so the answer fits the 30K characters limit !!!
          (b"\xb4\xda\xff\x8c0\x18/\xf1\xad\xa4F\x94\xb1\xad\xf4\xf1", b".\x83(\x8f#\xe1\x11\x96\xdeS\x17\x8b\xe3]\xb5\xca"),
          (b"\x1d\t\x11\xc1\x15\xdc\x8e\xea\xe1\xd2\xba%J\\\xf2\xfd", b"\x070ht\xb4\xdeQ+\xc5\xb8#\xac\xd9z\xc0A"),
          (b" V\xe3\xc5gEg\xc5\xd5\xe1\xd6\xd7\xab\xd0\xb6n", b"\x88\x9b8\xf6\xe26\x92z\x91\xe4\x92\x1e\xbc\xc5;u"),
          (b"\x88}\xd7*QFn:\xd7\x12RA\xe6-\xd2)", b"\xeb\x04<\xe6\xfa)u\x9ds\xecj\xa7\x84\xcf\xa7\xa7"),
          (b"\xec\x04.bP)'\xb6\x08\x05\x92\x8c\x85\x1a\x84\x8f", b"@\x80\x08A\xba\x9c2Sd\xdf\x97\x07\xc4Im\xef"),
          (b"\x11(\x9ez\x0c\xd7\xca+7\xcc\xb0\x19,:\xcb5", b"\xf9V\x8cm\xe5-\xf9\xf1\xf6g\x86~\x8f\xd0<\x84"),
          (b"\xbb\xe9I=\xc74\xc9C\x0cT\x81\x9d\x85\xdeWF", b"\xfe\x1b\xb1gi\x94O\x98\xf0\x90cJ\xdb\xbf\xc7\x1e"),
          (b"2`\x9d\x9c\x02\xe4\xce\xf0\x14\xc0Z\xaaW\xf2\xa5\xff", b"\xe6\x05?\xe3X\"\xb2O)~\x96\x8f^\x9c\x077"),
          (b"\xaf\x87,a\xa9\xcd\n|O\x1e\xa7\xdd\xba\x89;\x9d", b"\x98b\xac?\x13*\xf2m\xfeo=v\x96\xd5\x14\x8d"),
          (b"\xee\xcb{\xeeRQ\x82}\x05|\xc6S\x82\x85\xa4\xff", b"C\x93\xdes\x00\xe8cY<\xde1.\x19\xd8\xe5R"),
          (b"\xack\x036\x9b4\xca\xa4\x15\xbd\xd5\xbe\xd3\xd4\xfb{", b"m?\xc44>\xf4qig\x10\x8c\xeb\x11o\x99\xb4"),
          (b"D!\x0e*\xb3/<\x15F_\xd6;j\x85\xbe,", b"Y\xb01H\xb6:\x04_]OL\x9d\x8e\xbcp\xdc"),
          (b"\xe6\x9d\x02L[\x1c\xee\x8b0lu\xdd\x00\x9a\xfa!", b"J&\tkf\xa8\xeb\x1b\x90\x81\xce\\\x1b\xe8\xb6\x1d"),
          (b"\xcdnd\xf6\xf6\x11\x9e\x06\x10\xce\xcfP\x89\xed\xad\x02", b"I:\xe0{8H\xfcw\xb1\xe9v\x11p-/0"),
          (b"\xa22?\xcd\xfbU+\xbf- \xe3<\xc2\x94\xef\xb6", b"\xc7\x89\xc4K?{\x18\x1ftd\x0f\x19L}\xba\xf9"),
          (b"8\xf9\xe9\xd0\xa0\x97\x8e\rp\xe9\xd6[\x8e`\x93\x81", b"\xf7\x16\"\xaa\x86\\b\x9d\x0c\x17\"\x81\xeea\x03\x19"),
          (b")\xfb\x0c\xc7-D\xd8\xa4\xf7\x8d%\x02\x06\xa1\xbf\x94", b"\xc8s!s\xe2n\xee\xb0\x1eC\x15\xc0\xb4d\x08\xa7"),
          (b"\x7f\xa6\xdc\xf4\x9e\xe3\xda\xd3\x8a\xcb\xfc\x98,1\\\xe5", b"\x82,\xfbKL_\xe9\xc9L`\xc7\"&3#\xb5"),
          (b"&\x98,\x88\xff\x0e\xca[\xcb\xc6t;\xbc5Y\"", b"\x1b\x8bA\xd1\x8a\x9e\xe6\xce\xc4#C\x89\xe9Uon"),
          (b"\x1c\x1b\xac2\x86\xd6\x9a\xf9\xdd\x8d=\xa3\xff\x9a\xe1\xa5", b"\xed\x10\xed\xc1\xfe\xec\xd3\x0b\x8b\xe9\xd4?H\"\xd6\x8f"),
          (b"9\x8f3\x8b\x02\xb7\xe7\xe5\xa1TO\x1a\xb6N\x0b\xb6", b"\xb4\x0f\xa7 \xc7\x83\x81\x18\x9cU\xfd2\x93\xb8(\xe9"),
          (b"\x8d\xbe\xcf`\xc3d\xde\xca\xf4\n.\x92Mrw\xa6", b";\t\xfb\x19eKt0\x8foG\xcd\xd5\xe0\xf6\x9d"),
          (b"d\xbcG)\xec\xc8\x04 \xdd\xd9\xf6\xd6\x91/\xb2\xd3", b"2\xba\xa8\x8a\xc5\xd6h\xab\x9c\x9e\x8b\x1b5\xcc_."),
          (b"\x98\xea\xf0\xcbJ-c\xea\x81\xcf?B\x17\xcb\x99\xf9", b"\x82\x05\x14\xf2\x05q\x97\xf7Z-B\xad\xa2\x16\xa0`"),
          (b"\x1c\x94A\xfd\xe1\xf2\x06\x8b\x19\x80\xfa\x01g\xc3V\xaf", b"\x1c\xde\xec\xdf&\x93u\xab\xf9\x12\xfb\x04\xbc0\x99d"),
          (b"6\xfa\x993b\t\x12E\xd2t\xde\xb6\xfa\x98Im", b",\xa2\xf7VB\x81\x048\xc8\xbf3\xce\x8d\xecn\x82"),
          (b"L\xd6\x93\xbe\x02\xbeT\xa1\x8d\x88\x84\x18\xc7\xcf\x9fE", b"\x16i\"\xb7\x81\xef7\x7f\xdb\x7fY\xbft\x1b\xac\xd0"),
          (b"\x90\xd6\xe7>Z\x8b\x83\x14\xab\xefH\xa1\x95\xe6\x8e\xc3", b"\xebJ\xfcXd\xd75I@\x12\xe9\x88\xdcG\x15\xb7"),
          (b"\xb8\xd6\xd03j\xed\xf9\xa0\xb1L3\x8d\x89P\xb4\xe5", b"\xe3\xdf)CR\x03s\x19\x8f\xb5\xc6\xf5\xee&W\xee"),
          (b"cx\xd7dQ\xc3)YR\x9c7\xf0\xb4\xe4P\xfd", b"h\xa8\x122\xceV\x8c\xf4\x12\x14J\x08\x91\xcf\x93\xfa"),
          (b"\x1d\x14\x86\xccTi\xdd\xc3L!\x97\x92\xb3\xb7X$", b"t\x84\xa4\xe72\xa5X\xe9Ta\x03\x0c\xb00\x07\x83"),
          (b"\xa6\xdd\x1f\xfc\xca2\x88\x99\xbeA\x9a\xa6JVJ\x84", b"B\x81\x8e-K\xde\x15\xfd\t\"\x08\x04@/-\xc9"),
          (b"]b\x18\xe65C\x17*\x103J?\xe79!\x9b", b"\n\xbb\x1f\x98\nZZ\x97\xe0\x12\xf6W\xc4\x1e_~"),
          (b"\x9a\x9f-c\t\x14\xa4NL\xc5\xc0\x99R\x15\x94\xca", b"\xa0\xf9\x11\xfa\xd7\xf3\xb5\x11\xb2SK\xe8\xdb\x06\x94\x1f"),
          (b"!\xf0\x90\xd8t_O\xc1\xb5\xfe}\xca\xa6\x15}\x19", b"\xc9+\x89a9y3\xe0\xffh\xedmMq\x13\xcc"),
          (b"`\xf1\xab\x07A\xe9\xe5\x805\xf2\xabD8\xce\x85A", b"\x9d\x0f\x91\xc2\x01\x93\x8fm\xd6\x03XZl\t\x15J"),
          (b")\r\xcf\xc0\xa1\xa9\xf7i\x94\xa9\x8d\xed\xea\xcd\xb0B", b"jq\xe9\x01*H\x14\xa7M\x8c\xd2\xa5\x19\\\x80\x12"),
          (b"vJ\x98\xbf\xd5D\x15\xdb\xa1\xd7\xdc\xaf\xa0\xaa\xf7\xc1", b"\x06\xd8-\x05\xfe\x172W\xa4\xab\xe4\x97_h\xe6\xac"),
          (b"\xc6'P\xd2\xfcq\xcb\n\xe2\xc1sN>\xbc\x9a\x08", b"\xacysx4z\x96\x8f\xf1\xa5M\x9e\xe7f\\\x16"),
          (b"\x00\xc6\x81\xf0\xf1\xcb\xa8_\x1fF\x18\xe9\xb0\x95\xa3\xab", b"N\xd8\xdaz\xe8F4\xb1\x88>\xec\xd6[\x15\xd7\xfd"),
          (b"E\x1ee\x8f\xd5\x0f\x19\xa8\xb6~-\xe0n\xe7\x89\xe1", b"\x83\x90\xa0\x93\xec\x086+\xcf\x08\x9c\x048\x85\x88^"),
          (b"\x82\x13\xfa\x9f 6\xd3'\xd7\xd2\xc1\xf9\x03.\x13*", b"J\xe2\xf8\x1e\xec\xeb\xd9\"q\xbb:\x04\xc2\xdf<\xa8"),
          (b"\x01\xdf\xfcPW\xc80&{\xfe\x99\xf8\x10\xc6R\xce", b"=Cnh3\x9drKE\x1aU\x8e\xcc\x84jS"),
          (b"<\xeeo[J\xb0r0N\x07\xf4\x1e\xfb\x16\xac\x07", b"\xf9KP\xfe\xd2\xed\xc7`\xc7\x0c\x9et\xd3^\xc9g"),
          (b"P\xa1b#\x07V\xb0s\x86S\x9a\x1d[\xd9\xe7\x92", b"r&\xb1a\x0ff2\xd2\x8ft\x13\xff\xba\x0fy\xf4"),
          (b"\xc8t\xcc\x96V\x8bS\xde\x98\xf1\xa33\xa9\xdb\x1b\x85", b"13\xf1&\x8c\xfe3\xf9\xca\\\xc6\xdb\x1dT\x04b"),
          (b"\xb9u\xf2O\xfb\x05\xb3\xc2r*\x17d\\\x8f9M", b"\xeeQ\xf9&\xc5\xc8\xc8\xe6B\x1d\xcf\x184\x19]b"),
          (b"\x96\xe6\xc9\x1e;\x83e4\xb0\xbe\xf6\x94\x03\x06\x85:", b"\xa2\xb0\nS\xaf\xads.\x94\x16s\x901\xaf-\xe4"),
          (b"&\x81t\x048(\xd7 \xb6\x1aR\xaa\x98\xdbN\x0b", b"\x9cd\x8a#\xdd \xdcS\x14\x1e\x1b\xea\x87\x1b\xa41"),
          (b"\x1fC\x0cr,\x1d\xab\xa8\xd9\xfc\xa1\xa2\xbb\xcdw'", b"D\xb6)\x7f\xd7\xe6\xb2@\x18D\x1d\x08c\xce2k"),
          (b"$\xc1\xabD\r\xe0a\x0co\x12\x17\xbdB\x06\xc9y", b"s\xd8\xf8\x05\x81\xfeY\xc4\xc8|\xe5\xe2]U\xfa"),
          (b"\x8f\xe3\xd6\xc9d\xc4\xf0\xd0\xe3\x17\x12\x82\xb2\xfd\xe9\xef", b"\xf1\r/9\x97/\xd4\xf6dz\x89\x7fEG/\x15"),
          (b"\x16\xf2\xd8\x9a\x029\x9b{\x02\xf4u\x08~\x80\xb9Z", b"\xe1+:9\xaa\x87:\xf0\x02\xdd\xe5}G\x0ch\x98"),
          (b"\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO", b"\x13\xd0\x1c[\xe6\x8aa\xd6\xa0a\xb1X\x84\xb8\xde2"),
          (b"\xa8\xaa\xa8\xe7\xcal>\xe6\xdb\x18vL\xdak%}", b"\xe5W'\x83\xbd\xf2\x01\x9b\x89\x98l\xfd\xcf\x02\x84\x13"),
          (b"6\x9b\xae\x07\xe0+\xa0\xcd\xdbT\t\x02@\x81$E", b"\x10Q\x1an\xaa\xe7\x9cy\x04t\x16\x1d\xea\x9d\xd2\xe6"),
          (b"ne\x83\xb0\xe2\x8aG\xda\xaf%\x93cg\x84\xaf\xea", b"p!}\x8aQ(C\x08\xaf\x8bi\x80\xdaw\x7f\x81"),
          (b"<\xa6\x90\xb2\x03\x8f\xa4\x17$\xc7\xbe\x8a\x1e!\x01s", b"\x19\x7ftaaLUB\x1aBt\xf5U~\xeeY"),
          (b"\xe4|z\xaer8\xa0\xa3\xc8\x9c\x8d\x03`\xb8rN", b"b\xda\xf8e\n\x1f\xc1\xb2\x97\x13\xd7\x91\x91\x86\xbe\x13"),
          (b"\x00\x84\xc1\xd8\xe0ub\x1a\xc0\xa1\xfe\x08K\xfb>,", b"g\xb06$\xbb{s\xb9-\x8e5h|+\xf8\xb1"),
          (b"(\xac{\xb1^OOy\xc98\xf5\xc5\xc2\xd3\xb3\x1e", b"\x8d\xfc\x0f\xda\xd5\xa6\x89)\x92\x173\x1e)*\xf4&"),
          (b"G*.\xe2\xd8\xee\x87\xfa\x83~eS\xcc\x1e\xdeQ", b"\xc9y \x1a]Y\x8d\xf8\xa0\xd0^p\xc1\xf1\xa4\xd2"),
          (b"\x16\x91\xd7[\xbeC\x80T\x08\x0f\xbd\xe1\xf7\xbd\x1b\xb8", b"\xcc\x8c\xfb\x1f\"\x16\xa7/\xe2inK\xaa\xdf\xc0\xe4"),
          (b"\x1d\xbc\xf2\x9b\xe90\x13\xc9\tuuk\r\xe4\xa9\xfa", b"\xf6\x97\xf2B\xaa\x83\xf9k\x15K\xa0\xb9\xdf\xf4,\x02"),
          (b"hIA(;\x8b\x92G!K\xca\x1a\xfd\x8c9\x95", b"0!V\xfc\xfb\xa1,R\xc6\xdaO\x9c\x16\xa8<\xe2"),
          (b"?\xd8\xd7e\x88\xad\xcd\xd6\x8d\\\x1e\xc2l$;\xd8", b"\x88\x8e\x86'#f\xcf\xe3,\x96&\r\xb1\xee@g"),
          (b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", b"\x99\xbf\xf9qo\xde\x92xge\x04\x84\xb8 \xfb\xe3"),
          (b"\xa7\x8d\x83\xc8\xa3\xc15\xdc`\x82\xed\xae.\xea\xf2\xea", b"[\xe8{\r\xd6g\xcf\xe8L\x16\xde\xcd\x90M\xbd\x9e"),
          (b"\x1a\x1b;\xa62\x10i\xb2\x19n\xc1\xddKz\xb4\xd8", b";On\xd2N\xef%B\x05_T9)IG\xcf"),
          (b"@g`\x1e\xc1\xdf\x14A\xc1\x04N\xb6]\x97\xd3\x17", b"\xbc6\xcb\xaah*\xf1\xd0_\xd9B\x94F\xa8\xd8\r"),
          (b"<\x86\xa4z\xb4\xaeS\x90\xa9\xb5I\xe8\xe6FI\x8c", b"\r2\xd2.H\"\xfb\x9d}{\x11^\xdf\xcb\x186"),
          (b"\xf0\xb8J|\x0b\xfe\xcd\x8b\xc0$&\"8\xffob", b"\xa8}\xf31\xdd\x07\xc6\xa0\xbb\x9f0\xca\xe3&^\xf6"),
          (b"r\x9b\x81\xa0D\x94\x80\xfdu\xb1O\xbc\xf6\x1a\xd8\xe0", b"\xd4)\xb6\xdbQ\xa5#\x16\xed9\xadM\xf82%\xeb"),
          (b"\xb9V\xb7R\xb9\xcdJ9c\x1a`V\xb9\xb0oX", b"\x9a\xff]\xb3\x83\x96\xf5\r\x98\xe2\xc5\x86\xa21k%"),
          (b"J\xe9\xaa\x8f\x07\xab\x90\x18'\xa6\x85\x08\x18\xa8\x96?", b"\xa7\x83\xed\xd9\t\xd8\x85\xe4\xf8\xcb0\x95n\xf2`\xa8"),
          (b"\xc3\xaf\xc8m\xfc\x90}=\xb8\x1fr\xab}\r\xc1!", b"\x04\xa0sq\xf7\xd3Dm\xb9\xb8\x9c\xbap(\xa1L"),
          (b"\x97bq\x0f\x93\xb5\xa1\xa1\xf58\xdcu5\x9a\xbd\x8b", b"\x07'\xc0\xdb/0\xd9H\x81\x06\xe2^ `F\xa9"),
          (b"Nn\xd4\xbd\x13dN\x9e\xcd\xd4\x06kk~\xf5\x7f", b"\x98j'}\xc5\xb0\x1d\x19\xe2/\x92'\x9e\xd4\xba("),
          (b"\x08ty\x84\xde>\x0f5jL6\x05\xe1\x02pH", b"0\xb9\xd5\xd40u\x04\xfdK\xe2D\xbc\xddy\xd4\x11"),
      ]
      
    • code00.py:

      #!/usr/bin/env python
      
      import gc
      import random
      import sys
      from pprint import pprint as pp
      
      import _example as ex
      from data import data
      
      
      def test(repeat):
          tmp = data * repeat
          if ex.size() < 4000000:
              for idx, it in enumerate(tmp):
                  t = f"dummy content {idx}"
                  ex.add_node(it[0], t.encode(), random.randint(0, 10000))
      
          print(f"List (internal) size: {ex.size()}")
          print(f"Input data len: {len(data)}")
          search_feat = data[0][0]
          t = b"\xd3 "
          print("Start search:")
          res = ex.search(search_feat, t, random.randint(0, 10000), 0.1)
          pp(res)
          print(f"GC len: {len(gc.get_objects())}")
      
      
      def main(*argv):
          test(1)
          print(f"List (internal) size: {ex.size()}")
          print(f"Freed {ex.cleanup()} elements")
          print(f"List (internal) size: {ex.size()}")
          print(f"GC len: {len(gc.get_objects())}")
      
      
      if __name__ == "__main__":
          print("Python {:s} {:03d}bit on {:s}\n".format(" ".join(elem.strip() for elem in sys.version.split("\n")),
                                                         64 if sys.maxsize > 0x100000000 else 32, sys.platform))
          rc = main(*sys.argv[1:])
          print("\nDone.\n")
          sys.exit(rc)
      

    Outputs:

    • Win (C .dll called from a C .exe):

      [cfati@CFATI-5510-0:e:\Work\Dev\StackExchange\StackOverflow\q076229579]> sopr.bat
      ### Set shorter prompt to better fit when pasted in StackOverflow (or other) pages ###
      
      [prompt]> "c:\Install\pc032\Microsoft\VisualStudioCommunity\2019\VC\Auxiliary\Build\vcvarsall.bat" x64 > nul
      
      [prompt]>
      [prompt]> dir /b
      code00.py
      data.py
      dll00.c
      dll00.h
      example.c
      main00.c
      orig
      setup.py
      
      [prompt]>
      [prompt]> cl /nologo /MD /DDLL dll00.c  /link /NOLOGO /DLL /OUT:dll00.dll
      dll00.c
         Creating library dll00.lib and object dll00.exp
      
      [prompt]> cl /nologo /MD /W0 main00.c  /link /NOLOGO /OUT:win_main00.exe dll00.lib
      main00.c
      
      [prompt]> del *.obj *.exp
      
      [prompt]> dir /b
      code00.py
      data.py
      dll00.c
      dll00.dll
      dll00.h
      dll00.lib
      example.c
      main00.c
      orig
      setup.py
      win_main00.exe
      
      [prompt]>
      [prompt]> win_main00.exe
      Element count: 0
      Add node (0) returned: 0
      Add node (1) returned: 0
      Element count: 2
      Search for 'xxx' ...
      Found data (0 items):
      Search for '111' ...
      Found data (1 items):
       Index: 0
        Score: 0.300
        Id: 2, Features length: 3, Content length: 7
        Feats: 1?
        Cnt: 567
      Search for 'xaa' ...
      Found data (1 items):
       Index: 0
        Score: 0.300
        Id: 3, Features length: 4, Content length: 9
        Feats: ab
        Cnt: d4567
      Search for '1bd' ...
      Found data (2 items):
       Index: 0
        Score: 0.300
        Id: 2, Features length: 3, Content length: 7
        Feats: 1?
        Cnt: 567
       Index: 1
        Score: 0.300
        Id: 3, Features length: 4, Content length: 9
        Feats: ab
        Cnt: d4567
      Freed 2 nodes
      Element count: 0
      
      Done.
      
    • Nix (Python module):

      (py_pc064_03.10_test0) [cfati@cfati-5510-0:/mnt/e/Work/Dev/StackExchange/StackOverflow/q076229579]> ~/sopr.sh
      ### Set shorter prompt to better fit when pasted in StackOverflow (or other) pages ###
      
      [064bit prompt]>
      [064bit prompt]> ls
      code00.py  data.py  dll00.c  dll00.dll  dll00.h  dll00.lib  example.c  main00.c  orig  setup.py  win_main00.exe
      [064bit prompt]>
      [064bit prompt]> python setup.py build
      [064bit prompt]>
      [064bit prompt]> ls
      build  code00.py  data.py  dll00.c  dll00.dll  dll00.h  dll00.lib  example.c  main00.c  orig  setup.py  win_main00.exe
      [064bit prompt]> ls build/lib.linux-x86_64-cpython-310/
      _example.cpython-310-x86_64-linux-gnu.so
      [064bit prompt]>
      [064bit prompt]> PYTHONPATH=${PYTHONPATH}:build/lib.linux-x86_64-cpython-310 python code00.py
      Python 3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0] 064bit on linux
      
      List (internal) size: 100
      Input data len: 100
      Start search:
      [(4555,
        b'dummy content 0',
        b'\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2',
        0.3),
       (273, b'dummy content 74', b'\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO', 0.3),
       (9201,
        b'dummy content 87',
        b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0",
        0.3)]
      GC len: 8942
      List (internal) size: 100
      Freed 100 elements
      List (internal) size: 0
      GC len: 8937
      
      Done.