I'm implementing a memset function that's supposed to set a bytes object buffer to zero.
As of Python 3.11 the buffer api functions PyObject_GetBuffer() and PyBuffer_Release() are now part of the Stable ABI.
The below code works, but:
def memset(bytes_object):
import ctypes
# Define the Py_buffer structure
class Py_buffer(ctypes.Structure):
_fields_ = [
('buf', ctypes.c_void_p),
('obj', ctypes.py_object),
('len', ctypes.c_ssize_t),
('itemsize', ctypes.c_ssize_t),
('readonly', ctypes.c_int),
('ndim', ctypes.c_int),
('format', ctypes.c_char_p),
('shape', ctypes.POINTER(ctypes.c_ssize_t)),
('strides', ctypes.POINTER(ctypes.c_ssize_t)),
('suboffsets', ctypes.POINTER(ctypes.c_ssize_t)),
('internal', ctypes.c_void_p),
]
buf = Py_buffer()
ctypes.pythonapi.PyObject_GetBuffer(ctypes.py_object(bytes_object), ctypes.byref(buf), ctypes.c_int(0))
try:
ctypes.memset(buf.buf, 0, buf.len)
finally:
ctypes.pythonapi.PyBuffer_Release(ctypes.byref(buf))
obj = bytes("hello world", "ascii")
print("before:", repr(obj))
memset(obj)
print("after:", repr(obj))
Gives this output:
before: b'hello world'
after: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
For reference, here's an older way of doing memset on a buffer that uses the deprecated PyObject_AsCharBuffer function. It still works as of Python 3.11 though:
def memset_old(bytes_object: bytes):
import ctypes
if not isinstance(bytes_object, bytes):
raise TypeError(f"expected bytes, not {type(bytes_object)}")
data = ctypes.POINTER(ctypes.c_char)()
size = ctypes.c_int()
ctypes.pythonapi.PyObject_AsCharBuffer(ctypes.py_object(bytes_object), ctypes.pointer(data), ctypes.pointer(size))
ctypes.memset(data, 0, size.value)
obj = bytes("hello world", "ascii")
print("old before:", repr(obj))
memset_old(obj)
print("old after:", repr(obj))
It feels strange that I have to define my own
Py_buffer
class. Isn't there one predefined somewhere?
Py_buffer
isn't defined anywhere but in the pybuffer.h
header in the Python include
directory.
Am I using
PyBuffer_Release
correctly?
Yes, but it is good practice to define .argtypes
and .restype
so ctypes
can do typechecking on arguments. Also as noted bytes
object buffers are meant to be read-only, so altering it is undefined behavior.
Full example with typechecking:
import ctypes as ct
PyBUF_SIMPLE = 0
PyBUF_WRITABLE = 0x0001
class Py_buffer(ct.Structure):
_fields_ = (('buf', ct.c_void_p),
('obj', ct.py_object),
('len', ct.c_ssize_t),
('itemsize', ct.c_ssize_t),
('readonly', ct.c_int),
('ndim', ct.c_int),
('format', ct.c_char_p),
('shape', ct.POINTER(ct.c_ssize_t)),
('strides', ct.POINTER(ct.c_ssize_t)),
('suboffsets', ct.POINTER(ct.c_ssize_t)),
('internal', ct.c_void_p))
def __repr__(self):
'''Display representation of a buffer.
'''
return f'Py_buffer(obj={self.obj!r}, readonly={self.readonly})'
# Explicitly define arguments and return values so ctypes can typecheck.
PyObject_CheckBuffer = ct.pythonapi.PyObject_CheckBuffer
PyObject_CheckBuffer.argtypes = ct.py_object,
PyObject_CheckBuffer.restype = ct.c_int
PyObject_GetBuffer = ct.pythonapi.PyObject_GetBuffer
PyObject_GetBuffer.argtypes = ct.py_object, ct.POINTER(Py_buffer), ct.c_int
PyObject_GetBuffer.restype = ct.c_int
PyBuffer_Release = ct.pythonapi.PyBuffer_Release
PyBuffer_Release.argtypes = ct.POINTER(Py_buffer),
PyBuffer_Release.restype = None
def memset(obj):
if PyObject_CheckBuffer(obj): # ensure object passed supports the buffer interface
buf = Py_buffer()
try:
# PyObject_GetBuffer can throw an exception if it fails.
PyObject_GetBuffer(obj, ct.byref(buf), PyBUF_SIMPLE)
print(buf) # View resulting buffer
ct.memset(buf.buf, 0, buf.len) # zero it
finally:
PyBuffer_Release(ct.byref(buf))
obj = b'hello, world!'
print("before:", repr(obj))
memset(obj)
print("after:", repr(obj))
Output:
before: b'hello, world!'
Py_buffer(obj=b'hello, world!', readonly=1)
after: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'