I'm trying to do a link prediction with stellargraph, following the documention tutorial.
When I reach this part :
def node2vec_embedding(graph, name):
rw = BiasedRandomWalk(graph)
walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)
print(f"Number of random walks for '{name}': {len(walks)}")
model = Word2Vec(
walks,
size=dimensions,
window=window_size,
min_count=0,
sg=1,
workers=workers,
iter=num_iter,
)
def get_embedding(u):
return model.wv[u]
return get_embedding
embedding_train = node2vec_embedding(graph_train, "Train Graph")
i get a UFuncTypeError:
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
I have no idea of what caused it...
I thought about doing something like this instead :
from node2vec import Node2Vec
graph_train = StellarGraph.to_networkx(graph_train)
node2vec = Node2Vec(graph_train)
model = node2vec.fit()
But I'm afraid I'll lose the edges features if I convert my graph to networkX...
Any help would be greatly appreciated :)
EDIT: I tried the tutorial method on another dataset, simpler, with no edge features, and got the same error.
EDIT 2: Just in case, I'll add the full error code:
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-188-673a72292ea4> in <module>
----> 1 embedding_train = node2vec_embedding(graph_train, "Train Graph")
<ipython-input-187-aecb7f480f86> in node2vec_embedding(graph, name)
15 sg=1,
16 workers=workers,
---> 17 iter=num_iter,
18 )
19
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in __init__(self, sentences, corpus_file, size, alpha, window, min_count, max_vocab_size, sample, seed, workers, min_alpha, sg, hs, negative, ns_exponent, cbow_mean, hashfxn, iter, null_word, trim_rule, sorted_vocab, batch_words, compute_loss, callbacks, max_final_vocab)
598 sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
599 callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window,
--> 600 seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss)
601
602 def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in __init__(self, sentences, corpus_file, workers, vector_size, epochs, callbacks, batch_words, trim_rule, sg, alpha, window, seed, hs, negative, ns_exponent, cbow_mean, min_alpha, compute_loss, **kwargs)
743 raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
744
--> 745 self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
746 self.train(
747 sentences=sentences, corpus_file=corpus_file, total_examples=self.corpus_count,
D:\environnements\stel2\lib\site-packages\gensim\models\base_any2vec.py in build_vocab(self, sentences, corpus_file, update, progress_per, keep_raw_vocab, trim_rule, **kwargs)
927 trim_rule=trim_rule, **kwargs)
928 report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
--> 929 self.trainables.prepare_weights(self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary)
930
931 def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in prepare_weights(self, hs, negative, wv, update, vocabulary)
1685 # set initial input/projection and hidden weights
1686 if not update:
-> 1687 self.reset_weights(hs, negative, wv)
1688 else:
1689 self.update_weights(hs, negative, wv)
D:\environnements\stel2\lib\site-packages\gensim\models\word2vec.py in reset_weights(self, hs, negative, wv)
1702 for i in range(len(wv.vocab)):
1703 # construct deterministic seed from word AND seed argument
-> 1704 wv.vectors[i] = self.seeded_vector(wv.index2word[i] + str(self.seed), wv.vector_size)
1705 if hs:
1706 self.syn1 = zeros((len(wv.vocab), self.layer1_size), dtype=REAL)
UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U21'), dtype('<U21')) -> dtype('<U21')
```
I finally found the solution. It was quite unclear (at least to me) from the documentation but your nodes' labels must be string and not integer.
So a simple .astype(str)
in my dataframe fixed it.
I hope this will help others in the future !