Implementing numba for word2vec gradient descent but getting LoweringError

I am running gradient descent for word2vec and would like to implement numba to speed up the training.

EDIT: It seems the real error is this

NotImplementedError: unsupported nested memory-managed object

This is a subsequent error:

raise NotImplementedError("%s: %s" % (root_type, str(e)))

numba.errors.LoweringError: Failed at nopython (nopython mode backend)
reflected list(reflected list(int64)): unsupported nested memory-managed object
File "test.py", line 36
[1] During: lowering "negative_indices = arg(6, name=negative_indices)" at test.py (36)

I've searched through the numba documentation and googled this error with no luck.

Here is a replicable code snippet:

import numpy as np
import random
from numba import jit

random.seed(10)
np.random.seed(10)

@jit(nopython=True)
def sigmoid(x):
    return float(1)/(1+np.exp(-x))

@jit(nopython=True)
def listsum(list1):
    ret=0
    for i in list1:
        ret += i
    return ret


num_samples = 2
learning_rate = 0.05
center_token = 50
hidden_size = 100
sequence_chars = [2000, 1500, 400, 600]
W1 = np.random.uniform(-.5, .5, size=(11000, hidden_size))
W2 = np.random.uniform(-.5, .5, size=(11000, hidden_size))
negative_indices = [[800,1000], [777,950], [650,300], [10000,9999]]

@jit(nopython=True)
def performDescent(num_samples, learning_rate, center_token, sequence_chars,W1,W2,negative_indices):
    nll_new = 0
    neg_idx = 0
    for k in range(0, len(sequence_chars)):
        w_c = sequence_chars[k]
        W_neg = negative_indices[k]
        w_j = [w_c] + W_neg
        t_j = [1] + [0]*len(W_neg)
        h = W1[center_token]

        update_i = np.zeros((hidden_size,len(w_j)))
        for i in range(0,len(w_j)):
            v_j = W2[w_j[i]]
            update_i[:,i] = (sigmoid(np.dot(v_j.T,h))-t_j[i])*v_j
            W2[w_j[i]] = v_j - learning_rate*(sigmoid(np.dot(v_j.T,h))-t_j[i])*h #creates v_j_new
        W1[center_token] = h - learning_rate*np.sum(update_i, axis=1)

        update_nll = []
        for i in range(1,len(w_j)):
            update_nll.append(np.log(sigmoid(-np.dot(W2[w_j[i]].T,h))))  #h is updated in memory
        nll = -np.log(sigmoid(np.dot(W2[w_j[0]].T,h))) - listsum(update_nll)
        print("nll:",nll)
        nll_new += nll
    return [nll_new]

performDescent(num_samples, learning_rate, center_token, sequence_chars,W1,W2,negative_indices)

I don't understand why negative_indices is giving an issue.

Solution

As the error message hints at, lists in numba have only partial support. They can't contain "memory-managed" objects, which means they can only hold scalar, primitive types - for example:

@njit
def list_first(l):
    return l[0]

list_first([1, 2, 3])
# Out[65]: 1

list_first([[1], [2]])
# LoweringError: Failed at nopython (nopython mode backend)
# reflected list(reflected list(int64)): unsupported nested memory-managed object

Assuming your example if representative, it seems like in the places you are using a list, it isn't necessary, and would be hurting performance even if it was supported, because you know the allocation sizes in advance.

Here's a potential refactoring that numba can handle.

sequence_chars = np.array([2000, 1500, 400, 600], dtype=np.int64)
negative_indices = np.array([[800,1000], [777,950], [650,300], [10000,9999]], dtype=np.int64)

@jit(nopython=True)
def performDescent2(num_samples, learning_rate, center_token, sequence_chars, W1, W2 ,negative_indices):
    nll_new = 0
    neg_idx = 0

    neg_ind_length = len(negative_indices[0])
    w_j = np.empty(neg_ind_length + 1, dtype=np.int64)
    t_j = np.zeros(neg_ind_length + 1, dtype=np.int64)
    t_j[0] = 1

    for k in range(0, len(sequence_chars)):
        w_j[0] = sequence_chars[k]
        w_j[1:] = negative_indices[k]

        h = W1[center_token]

        update_i = np.zeros((hidden_size,len(w_j)))
        for i in range(0,len(w_j)):
            v_j = W2[w_j[i]]
            update_i[:,i] = (sigmoid(np.dot(v_j.T, h)) - t_j[i]) * v_j
            W2[w_j[i]] = v_j - learning_rate * (sigmoid(np.dot(v_j.T, h)) - t_j[i]) * h #creates v_j_new
        W1[center_token] = h - learning_rate * np.sum(update_i, axis=1)

        update_nll = np.zeros(len(w_j))

        for i in range(1, len(w_j)):
            update_nll[i-1] = np.log(sigmoid(-np.dot(W2[w_j[i]].T, h)))  #h is updated in memory
        nll = -np.log(sigmoid(np.dot(W2[w_j[0]].T,h))) - update_nll.sum()
        print("nll:",nll)
        nll_new += nll
    return nll_new