Search code examples
pythonperformancenumpynumbapython-cffi

How to wrap a CFFI function in Numba taking Pointers


It should be a easy task, but I can't find a way how to pass a pointer of a scalar value to a CFFI function within a Numba function. Passing a pointer to an array works without problems using ffi.from_buffer.

Example function

import cffi

ffi = cffi.FFI()
defs="void foo_f(int a,double *b);"
ffi.cdef(defs, override=True)
source="""
#include <stdio.h>;
void foo_f(int a,double *b){
  printf("%i",a);
  printf("   ");
  printf("%f",b[0]);
  }

"""
ffi.set_source(module_name="foo",source=source)
ffi.compile()

Passing a pointer to an array

import numpy as np
import numba as nb
import cffi
ffi = cffi.FFI()
import numpy as np
import ctypes
import foo
nb.cffi_support.register_module(foo)
foo_f = foo.lib.foo_f

@nb.njit()
def Test(a,b):
  a_wrap=np.int32(a)
  #This works for an array
  b_wrap=ffi.from_buffer(b.astype(np.float64))
  foo_f(a_wrap,b_wrap)


a=64.
b=np.ones(5)
Test(a,b)

This works without problems, but how can I modify the Test function to take a scalar value b=5. without modifying the CFFI-function itself?


Solution

  • Pass scalar values by reference using Numba

    To get useful timings I have modified the wrapped function a bit. The function simply adds a scalar (passed by value) to a scalar b (passed by reference).

    Pros and cons of the approach using intrinsics

    • Only working in nopython mode
    • Faster for C or Fortran functions with short runtime (real-world example)

    Example function

    import cffi
    
    ffi = cffi.FFI()
    defs="void foo_f(double a,double *b);"
    ffi.cdef(defs, override=True)
    source="""
    void foo_f(double a,double *b){
      b[0]+=a;
      }
    """
    ffi.set_source(module_name="foo",source=source)
    ffi.compile()
    

    Wrapper using a temporary array

    This is quite straight forward, but requires to allocate an array of size one, which is quite slow.

    import numpy as np
    import numba as nb
    from numba import cffi_support
    import cffi
    ffi = cffi.FFI()
    import foo
    
    nb.cffi_support.register_module(foo)
    foo_f = foo.lib.foo_f
    
    @nb.njit("float64(float64,float64)")
    def method_using_arrays(a,b):
        b_arr=np.empty(1,dtype=np.float64)
        b_arr[0]=b
        b_arr_ptr=b_wrap=ffi.from_buffer(b_arr)
        foo_f(a,b_arr_ptr)
        return b_arr[0]
    

    Wrapper using intrinsics

    from numba import types
    from numba.extending import intrinsic
    from numba import cgutils
    
    @intrinsic
    def ptr_from_val(typingctx, data):
        def impl(context, builder, signature, args):
            ptr = cgutils.alloca_once_value(builder,args[0])
            return ptr
        sig = types.CPointer(data)(data)
        return sig, impl
    
    @intrinsic
    def val_from_ptr(typingctx, data):
        def impl(context, builder, signature, args):
            val = builder.load(args[0])
            return val
        sig = data.dtype(data)
        return sig, impl
    
    @nb.njit("float64(float64,float64)")
    def method_using_intrinsics(a,b):
        b_ptr=ptr_from_val(b)
        foo_f(a,b_ptr)
        return val_from_ptr(b_ptr)
    

    Timings

    #Just call the wrapped function a few times
    @nb.njit()
    def timing_method_using_intrinsics(a,b):
        for i in range(1000):
            b=method_using_intrinsics(a,b)
        return b
    
    #Just call the wrapped function a few times
    @nb.njit()
    def timing_method_using_arrays(a,b):
        for i in range(1000):
            b=method_using_arrays(a,b)
        return b
    
    a=1.
    b=1.
    
    %timeit timing_method_using_intrinsics(a,b)
    #5.15 µs ± 33.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
    %timeit timing_method_using_arrays(a,b)
    #121 µs ± 601 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)