Search code examples
pythonmultithreadingfftenthoughtintel-mkl

Threaded FFT in Enthought Python


Fast Fourier Transforms (FFTs) in Numpy/SciPy are not threaded. Enthought Python is shipped with the Intel MKL numerical library, which is capable of threaded FFTs. How does one get access to these routines?


Solution

  • New and improved version which handles arbitrary strides in the input and output arrays. By default this one is now not-in-place and creates a new array. It mimics the Numpy FFT routines except that it has a different normalisation.

    ''' Wrapper to MKL FFT routines '''
    
    import numpy as _np
    import ctypes as _ctypes
    
    mkl = _ctypes.cdll.LoadLibrary("mk2_rt.dll")
    _DFTI_COMPLEX = _ctypes.c_int(32)
    _DFTI_DOUBLE = _ctypes.c_int(36)
    _DFTI_PLACEMENT = _ctypes.c_int(11)
    _DFTI_NOT_INPLACE = _ctypes.c_int(44)
    _DFTI_INPUT_STRIDES = _ctypes.c_int(12)
    _DFTI_OUTPUT_STRIDES = _ctypes.c_int(13)
    
    def fft2(a, out=None):
        ''' 
    Forward two-dimensional double-precision complex-complex FFT.
    Uses the Intel MKL libraries distributed with Enthought Python.
    Normalisation is different from Numpy!
    By default, allocates new memory like 'a' for output data.
    Returns the array containing output data.
    '''
    
        assert a.dtype == _np.complex128
        assert len(a.shape) == 2
    
        inplace = False
    
        if out is a:
            inplace = True
    
        elif out is not None:
            assert out.dtype == _np.complex128
            assert a.shape == out.shape
            assert not _np.may_share_memory(a, out)
    
        else:
            out = _np.empty_like(a)
    
        Desc_Handle = _ctypes.c_void_p(0)
        dims = (_ctypes.c_int*2)(*a.shape)
    
        mkl.DftiCreateDescriptor(_ctypes.byref(Desc_Handle), _DFTI_DOUBLE, _DFTI_COMPLEX, _ctypes.c_int(2), dims )
    
        #Set input strides if necessary
        if not a.flags['C_CONTIGUOUS']:
            in_strides = (_ctypes.c_int*3)(0, a.strides[0]/16, a.strides[1]/16)
            mkl.DftiSetValue(Desc_Handle, _DFTI_INPUT_STRIDES, _ctypes.byref(in_strides))
    
        if inplace:
            #Inplace FFT
            mkl.DftiCommitDescriptor(Desc_Handle)
            mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p) )
    
        else:
            #Not-inplace FFT
            mkl.DftiSetValue(Desc_Handle, _DFTI_PLACEMENT, _DFTI_NOT_INPLACE)
    
            #Set output strides if necessary
            if not out.flags['C_CONTIGUOUS']:
                out_strides = (_ctypes.c_int*3)(0, out.strides[0]/16, out.strides[1]/16)
                mkl.DftiSetValue(Desc_Handle, _DFTI_OUTPUT_STRIDES, _ctypes.byref(out_strides))
    
            mkl.DftiCommitDescriptor(Desc_Handle)
            mkl.DftiComputeForward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p), out.ctypes.data_as(_ctypes.c_void_p) )
    
        mkl.DftiFreeDescriptor(_ctypes.byref(Desc_Handle))
    
        return out
    
    def ifft2(a, out=None):
        ''' 
    Backward two-dimensional double-precision complex-complex FFT.
    Uses the Intel MKL libraries distributed with Enthought Python.
    Normalisation is different from Numpy!
    By default, allocates new memory like 'a' for output data.
    Returns the array containing output data.
    '''
    
        assert a.dtype == _np.complex128
        assert len(a.shape) == 2
    
        inplace = False
    
        if out is a:
            inplace = True
    
        elif out is not None:
            assert out.dtype == _np.complex128
            assert a.shape == out.shape
            assert not _np.may_share_memory(a, out)
    
        else:
            out = _np.empty_like(a)
    
        Desc_Handle = _ctypes.c_void_p(0)
        dims = (_ctypes.c_int*2)(*a.shape)
    
        mkl.DftiCreateDescriptor(_ctypes.byref(Desc_Handle), _DFTI_DOUBLE, _DFTI_COMPLEX, _ctypes.c_int(2), dims )
    
        #Set input strides if necessary
        if not a.flags['C_CONTIGUOUS']:
            in_strides = (_ctypes.c_int*3)(0, a.strides[0]/16, a.strides[1]/16)
            mkl.DftiSetValue(Desc_Handle, _DFTI_INPUT_STRIDES, _ctypes.byref(in_strides))
    
        if inplace:
            #Inplace FFT
            mkl.DftiCommitDescriptor(Desc_Handle)
            mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p) )
    
        else:
            #Not-inplace FFT
            mkl.DftiSetValue(Desc_Handle, _DFTI_PLACEMENT, _DFTI_NOT_INPLACE)
    
            #Set output strides if necessary
            if not out.flags['C_CONTIGUOUS']:
                out_strides = (_ctypes.c_int*3)(0, out.strides[0]/16, out.strides[1]/16)
                mkl.DftiSetValue(Desc_Handle, _DFTI_OUTPUT_STRIDES, _ctypes.byref(out_strides))
    
            mkl.DftiCommitDescriptor(Desc_Handle)
            mkl.DftiComputeBackward(Desc_Handle, a.ctypes.data_as(_ctypes.c_void_p), out.ctypes.data_as(_ctypes.c_void_p) )
    
        mkl.DftiFreeDescriptor(_ctypes.byref(Desc_Handle))
    
        return out