Search code examples
cythonmemoryview

Cython compiler crash with array -- why?


I'm trying to convert previous python code to cython by using memoryview, and I keep getting a compiler crash from the first bolo line (near the bottom):

from __future__ import print_function
from builtins import range
from builtins import object
import cython
from cython.view cimport array as cvarray
from cpython cimport bool
from libc.math cimport round
from libc.stdlib cimport malloc, free
import numpy as np
cimport numpy as np

class LegCache(object):
def __init__(self):
    self.d = {}
    pass

def prep_legendre(self, n, polyorder):
    p = (n, polyorder)
    if p not in self.d:
        self.d[p] = prep_legendre(n, polyorder)
    return self.d[p]

@cython.boundscheck(False)
@cython.wraparound(False)
cdef prep_legendre(int n, int polyorder):
'''make array of legendre's'''
assert type(n) == int and type(polyorder) == int

cdef int[:,:] legendres = np.empty([n, polyorder + 1], dtype=int)
cdef int l = 0

legendres[:, 0] = np.ones(n)
if polyorder > 0:
    legendres[:, 1] = np.linspace(-1, 1, n)
for i in range(polyorder - 1):
    l = i + 1
    np.multiply(l/(l+1), legendres[:,l-1])

cdef double[:,:] q = np.empty([polyorder + 1, polyorder + 1], dtype=double)
cdef double[:,:] r = np.empty([n, polyorder + 1], dtype=double)
cdef double[:,:] qt = np.empty([polyorder+1, polyorder+1], dtype=double)   
cdef double[:,:] rinv = np.empty([polyorder + 1, n], dtype=double)

q, r = np.linalg.qr(legendres)
rinv = np.linalg.inv(r)
qt = q.T.copy()
return legendres, rinv, qt

def filter_slice_legendre_qr_mask_precalc(bolo,mask,legendres):
    m=legendres.shape[1]
    n=legendres.shape[0]
    l2 = legendres*np.tile(mask.reshape(n,1),[1,m])
    q,r=np.linalg.qr(l2)

rinv = np.linalg.inv(r)
p = np.dot(q.T,bolo)
coeff=np.dot(rinv,p)
out=bolo-np.dot(legendres,coeff)
return out,coeff

@cython.boundscheck(False)
@cython.wraparound(False)
cdef poly_filter_array(
    double[:,:] array,
    np.ndarray[DTYPE3_t, cast=True, ndim=2] mask_remove, # I think this casting should still work like this
    np.ndarray[DTYPE3_t, cast=True, ndim=2] mask,
    int[:] scan_list,
    int ibegin,
    int polyorder,
    double minfrac=.75):
""" writes over input array
"""
cdef double nold = -1
# do nothing
if polyorder < 0:
    return array
#damn, work
cdef int nch = array.shape[0]
cdef int nt = array.shape[1]
cdef int ns = len(scan_list)

cdef double[:,:,:] coeff_out = np.empty([nch, ns, nt], dtype = double)

legcache = LegCache()

cdef int istart = 0
cdef int n = 0
cdef int start = 0
cdef double mean = 0.0

cdef int[:,:] legendres = np.empty([n, polyorder + 1], dtype=int)
cdef double[:,:] qt = np.empty([polyorder+1, polyorder+1], dtype=double)   
cdef double[:,:] rinv = np.empty([polyorder + 1, n], dtype=double)

#cdef double[:,:] bolo # I think you can get away without giving it a value. bolo changes in size throughout the loop
cdef int[:] goodhits = np.empty(np.shape(mask)[1], dtype = int)
# I'm not sure about the size of these
cdef double[:,:] coeff = np.empty([]) # I can't remember how dot product work right now but this should be easy to sort out

# remove mean
if polyorder == 0:
    for s in range(len(scan_list)):
        istart, n = scan_list[s]
        start = istart - ibegin
        for i in range(nch):
            if np.any(mask[i, start:start + n]):
                mean = np.average(
                    array[i, start:start + n], weights=mask[i, start:start + n])
                array[i, start:start + n] -= mean
                coeff_out[i, s, 0] = mean

# other cases
if polyorder > 0:
    for s in range(len(scan_list)):
        istart, n = scan_list[s]
        start = istart - ibegin
        if n <= polyorder:  # otherwise cannot compute legendre polynomials
            for i in range(nch):
                mask[i, start:start + n] = 0  # flag it
                # remove this region from actual data as well
                mask_remove[i, start:start + n] = 0
                print('Not enough points (%d) to build legendre of order (%d)' % (n, polyorder))
            continue
        goodhits = np.sum(mask[:, start:start + n], axis=1)
        if n != nold:
            legendres, rinv, qt = legcache.prep_legendre(n, polyorder)
            rinvqt = np.dot(rinv, qt)
            nold = n
        # handle no masked ones

        for i in range(nch):
            if goodhits[i] != n:
                continue  # skip for now

            bolo[i, :] = array[i, start:start + n] #where problem starts
            coeff = np.dot(rinvqt, bolo)
            coeff_out[i, s, :] = coeff
            bolo -= np.dot(legendres, coeff)


        for i in range(nch):
            if goodhits[i] == n:
                continue  # skip since dealt with above
            if goodhits[i] < minfrac * n:  # not enough points
                mask[i, start:start + n] = 0  # flag it
                # remove this region from actual data as well
                mask_remove[i, start:start + n] = 0
                continue
            bolo, coeff = filter_slice_legendre_qr_mask_precalc(
                array[i, start:start + n], mask[i, start:start + n], legendres)
            array[i, start:start + n] = bolo
            coeff_out[i, s, :] = coeff
return coeff_out

And it raises the non-specific error "Compiler crash in ExpandInplaceOperators" when I try to compile the code. I'm totally lost.


Solution

  • The minimal code to generate the error doesn't need to run. It just needs to be enough to generate the error message. I kept deleting code until I was left with the smallest block that would generate the error message and that was this

    cdef poly_filter_array(
        double[:,:] array):
    
        cdef double mean = 0.0
    
        cdef int i =0
    
        array[i, :] -= mean
    

    Compiling this code gives

    Compiler crash in ExpandInplaceOperators
    
    ModuleNode.body = StatListNode(cycrashdelme.pyx:7:5)
    StatListNode.stats[0] = CFuncDefNode(cycrashdelme.pyx:7:5,
        args = [...]/1,
        modifiers = [...]/0,
        visibility = 'private')
    CFuncDefNode.body = StatListNode(cycrashdelme.pyx:10:4)
    StatListNode.stats[2] = InPlaceAssignmentNode(cycrashdelme.pyx:14:9,
        operator = '-')
    File 'UtilNodes.py', line 146, in __init__: ResultRefNode(may_hold_none = True,
        result_is_used = True,
        use_managed_ref = True)
    
    Compiler crash traceback from this point on:
      File "<some path on my computer>\lib\site-packages\Cython\Compiler\UtilNodes.py", line 146, in __init__
        assert self.pos is not None
    AssertionError:
    

    Something like this would have been a much better example to ask the question with...


    This pretty much tells you that the problem is with the line array[i,:] -= mean (in your original version I think it was array[:,start:start+n]).

    It's worth trying a few simplified versions to see what happens:

    array -= mean
    array[i,:] = array[i,:] - mean
    

    gives

    Invalid operand types for '-' (double[:, :]; double)
    Invalid operand types for '-' (double[:]; double)
    

    for the two lines respectively. So we know that the issue is that Cython memoryviews don't support arithmetic over the whole view (you can do it to individual elements though).

    You can do arithmetic by temporarily converting it to a numpy array

    tmp_as_array = np.asarray(array[i,:])
    tmp_as_array -= mean
    

    (there's no speed advantage to typing tmp_as_array). This modifies the existing data in place, rather than copying it, which you can verify by printing tmp_as_array.owndata which should be False.


    In conclusion -

    1. you can't do arithmetic on Cython memoryviews (they just provide array storage, but aren't designed to provide maths).
    2. The error message you got is hugely unhelpful and probably a bug and should be reported to https://github.com/cython/cython/issues.
    3. Make a copy of your code and then start deleting bits until you identify what's actually causing the error