Search code examples
pythonpython-3.xcom7zippython-cffi

Python CFFI + 7z.dll (COM) == access violations (sometimes)


I have shortened my code into a minimal use case.

The script always completes, but approximately 50% of the time it gives a memory access violation/crashes.

This is using 64bit Python 3 on Windows 7

This code is supposed to loop through all the handlers for 7zip and say what extensions they support (a space-seperated list, that may be empty... not VT_EMPTY, just bstrVal=L"")

#!/usr/bin/python
from cffi import FFI
ffi = FFI()
ffi.cdef("""

typedef struct PROPVARIANT {
    unsigned short    vt;
    unsigned short    wReserved1;
    unsigned short    wReserved2;
    unsigned short    wReserved3;
    union {
        int          intVal;
        wchar_t*      bstrVal;
        /* shortened for clarity */
    };
} PROPVARIANT;

typedef unsigned long long HRESULT;
typedef uint32_t PROPID;

HRESULT GetNumberOfFormats(uint32_t * numFormats);
HRESULT GetHandlerProperty2(uint32_t index, PROPID propID, PROPVARIANT * value);
""")

S_OK = 0x00000000  # Operation successful
VT_BSTR = 8
kExtension = 2

dll7z = ffi.dlopen('7z.dll')
num_formats = ffi.new('uint32_t*')
assert dll7z.GetNumberOfFormats(num_formats) == S_OK
print(num_formats[0])

tmp_pvar = ffi.new('PROPVARIANT*')
for i in range(num_formats[0]):
    assert dll7z.GetHandlerProperty2(i, kExtension, tmp_pvar) == S_OK
    assert tmp_pvar != ffi.NULL
    assert tmp_pvar[0].vt == VT_BSTR

    print(ffi.string(tmp_pvar[0].bstrVal))

Vexingly the equivalent ctypes code seems to work just fine.

from ctypes import *

class PROPVARIANT(Structure):
    _fields_ = [
        ('vt', c_ushort),
        ('resrved1',  c_ushort),
        ('resrved2',  c_ushort),
        ('resrved3',  c_ushort),
        ('bstrVal', c_wchar_p),
    ]

dll7z = windll.LoadLibrary('7z.dll')

S_OK = 0x00000000  # Operation successful
VT_BSTR = 8
kExtension = 2

num_formats = c_uint()
assert dll7z.GetNumberOfFormats(byref(num_formats)) == S_OK
print(num_formats.value)

tmp_pvar = PROPVARIANT()
for i in range(num_formats.value):
    assert dll7z.GetHandlerProperty2(i, kExtension, tmp_pvar) == S_OK
    assert tmp_pvar.vt == VT_BSTR
    print(tmp_pvar.bstrVal)

Solution

  • OK, this looks like a CFFI bug.

    If I use malloc/free/memset manually it works fine.

    Code:

    from cffi import FFI
    ffi = FFI()
    ffi.cdef("""
    
    typedef struct PROPVARIANT {
        unsigned short    vt;
        unsigned short    wReserved1;
        unsigned short    wReserved2;
        unsigned short    wReserved3;
        union {
            int          intVal;
            wchar_t*      bstrVal;
            /* shortened for clarity */
        };
    } PROPVARIANT;
    
    typedef unsigned long long HRESULT;
    typedef uint32_t PROPID;
    
    HRESULT GetNumberOfFormats(uint32_t * numFormats);
    HRESULT GetHandlerProperty2(uint32_t index, PROPID propID, PROPVARIANT * value);
    
    void * malloc(size_t);
    void memset(void*, int, int);
    void free(void*);
    """)
    
    S_OK = 0x00000000  # Operation successful
    VT_BSTR = 8
    kExtension = 2
    
    dll7z = ffi.dlopen('7z.dll')
    C = ffi.dlopen(None)
    
    num_formats = ffi.new('uint32_t*')
    assert dll7z.GetNumberOfFormats(num_formats) == S_OK
    print(num_formats[0])
    
    tmp_pvar = ffi.cast("PROPVARIANT*", C.malloc(ffi.sizeof('PROPVARIANT')))
    C.memset(tmp_pvar, 0, ffi.sizeof('PROPVARIANT'))
    
    for i in range(num_formats[0]):
        assert dll7z.GetHandlerProperty2(i, kExtension, tmp_pvar) == S_OK
        assert tmp_pvar != ffi.NULL
        assert tmp_pvar[0].vt == VT_BSTR
    
        print(ffi.string(tmp_pvar[0].bstrVal))
    
    C.free(tmp_pvar)