Search code examples
pythonctypescpython

Doing ctypes.memset as of Python 3.11?


I'm implementing a memset function that's supposed to set a bytes object buffer to zero.

As of Python 3.11 the buffer api functions PyObject_GetBuffer() and PyBuffer_Release() are now part of the Stable ABI.

The below code works, but:

  • It feels strange that I have to define my own Py_buffer class. Isn't there one predefined somewhere?
  • Am I using PyBuffer_Release correctly?
def memset(bytes_object):
    import ctypes

    # Define the Py_buffer structure
    class Py_buffer(ctypes.Structure):
        _fields_ = [
            ('buf', ctypes.c_void_p),
            ('obj', ctypes.py_object),
            ('len', ctypes.c_ssize_t),
            ('itemsize', ctypes.c_ssize_t),
            ('readonly', ctypes.c_int),
            ('ndim', ctypes.c_int),
            ('format', ctypes.c_char_p),
            ('shape', ctypes.POINTER(ctypes.c_ssize_t)),
            ('strides', ctypes.POINTER(ctypes.c_ssize_t)),
            ('suboffsets', ctypes.POINTER(ctypes.c_ssize_t)),
            ('internal', ctypes.c_void_p),
        ]
    
    buf = Py_buffer()
    ctypes.pythonapi.PyObject_GetBuffer(ctypes.py_object(bytes_object), ctypes.byref(buf), ctypes.c_int(0))
    try:    
        ctypes.memset(buf.buf, 0, buf.len)
    finally:
        ctypes.pythonapi.PyBuffer_Release(ctypes.byref(buf))


obj = bytes("hello world", "ascii")
print("before:", repr(obj))
memset(obj)
print("after:", repr(obj))

Gives this output:

before: b'hello world'
after: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'

For reference, here's an older way of doing memset on a buffer that uses the deprecated PyObject_AsCharBuffer function. It still works as of Python 3.11 though:

def memset_old(bytes_object: bytes):
    import ctypes

    if not isinstance(bytes_object, bytes):
        raise TypeError(f"expected bytes, not {type(bytes_object)}")

    data = ctypes.POINTER(ctypes.c_char)()
    size = ctypes.c_int()
    ctypes.pythonapi.PyObject_AsCharBuffer(ctypes.py_object(bytes_object), ctypes.pointer(data), ctypes.pointer(size))
    ctypes.memset(data, 0, size.value)


obj = bytes("hello world", "ascii")
print("old before:", repr(obj))
memset_old(obj)
print("old after:", repr(obj))

Solution

  • It feels strange that I have to define my own Py_buffer class. Isn't there one predefined somewhere?

    Py_buffer isn't defined anywhere but in the pybuffer.h header in the Python include directory.

    Am I using PyBuffer_Release correctly?

    Yes, but it is good practice to define .argtypes and .restype so ctypes can do typechecking on arguments. Also as noted bytes object buffers are meant to be read-only, so altering it is undefined behavior.

    Full example with typechecking:

    import ctypes as ct
    
    PyBUF_SIMPLE   = 0
    PyBUF_WRITABLE = 0x0001
    
    class Py_buffer(ct.Structure):
        _fields_ = (('buf', ct.c_void_p),
                    ('obj', ct.py_object),
                    ('len', ct.c_ssize_t),
                    ('itemsize', ct.c_ssize_t),
                    ('readonly', ct.c_int),
                    ('ndim', ct.c_int),
                    ('format', ct.c_char_p),
                    ('shape', ct.POINTER(ct.c_ssize_t)),
                    ('strides', ct.POINTER(ct.c_ssize_t)),
                    ('suboffsets', ct.POINTER(ct.c_ssize_t)),
                    ('internal', ct.c_void_p))
    
        def __repr__(self):
            '''Display representation of a buffer.
            '''
            return f'Py_buffer(obj={self.obj!r}, readonly={self.readonly})'
    
    # Explicitly define arguments and return values so ctypes can typecheck.
    PyObject_CheckBuffer = ct.pythonapi.PyObject_CheckBuffer
    PyObject_CheckBuffer.argtypes = ct.py_object,
    PyObject_CheckBuffer.restype = ct.c_int
    PyObject_GetBuffer = ct.pythonapi.PyObject_GetBuffer
    PyObject_GetBuffer.argtypes = ct.py_object, ct.POINTER(Py_buffer), ct.c_int
    PyObject_GetBuffer.restype = ct.c_int
    PyBuffer_Release = ct.pythonapi.PyBuffer_Release
    PyBuffer_Release.argtypes = ct.POINTER(Py_buffer),
    PyBuffer_Release.restype = None
    
    def memset(obj):
        if PyObject_CheckBuffer(obj):  # ensure object passed supports the buffer interface
            buf = Py_buffer()
            try:
                # PyObject_GetBuffer  can throw an exception if it fails.
                PyObject_GetBuffer(obj, ct.byref(buf), PyBUF_SIMPLE)
                print(buf) # View resulting buffer
                ct.memset(buf.buf, 0, buf.len)  # zero it
            finally:
                PyBuffer_Release(ct.byref(buf))
    
    obj = b'hello, world!'
    print("before:", repr(obj))
    memset(obj)
    print("after:", repr(obj))
    

    Output:

    before: b'hello, world!'
    Py_buffer(obj=b'hello, world!', readonly=1)
    after: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'