I'm trying to implement a Graph/Node data structure to store some connections between synsets in WordNet.
import nltk
from nltk.corpus import wordnet as wn
# a dict of [wn.synset, Node]
syns_node_dict = []
# gets a node from a wn.synset
def nodeFromSyn(syn):
for row in syns_node_dict:
if row[0] == syn:
return row[1]
return False
def display_dict():
for row in syns_node_dict:
curr_nde = row[1]
display([curr_nde._value, curr_nde._parents, curr_nde._children])
# Graph Node data struct
class Node:
# value is a wn.synset
# parents, children are both lists of wn.synset
def __init__(self, value, parents=[], children=[]):
curr = nodeFromSyn(value)
# if a Node for value already exists, merge attributes
if curr:
for p in parents:
if p not in curr._parents : curr._parents.append(p)
for c in children:
if c not in curr._children : curr._children.append(c)
self = curr
# if a Node for value does not exist, create new Node for value
else:
syns_node_dict.append([value, self])
self._value = value
self._parents = parents
self._children = children
# Create a Node for each of self's parents if it does not already exist
# and add self as a child
for parent in self._parents:
parent_node = nodeFromSyn(parent)
if parent_node:
if value not in parent_node._children:
parent_node._children.append(value)
else:
parent_node = Node(parent, children=[value])
# Create a Node for each of self's children if it does not already exist
# and add self as a parent
for child in self._children:
child_node = nodeFromSyn(child)
if child_node:
if value not in child_node._parents:
child_node._parents.append(value)
else:
child_node = Node(child, parents=[value])
However, I'm seeing some strange behavior when I attempt to run my code:
Node(wn.synset('condition.n.01'), [], children=[wn.synset('difficulty.n.03')])
display_dict()
>[Synset('condition.n.01'), [], [Synset('difficulty.n.03')]]
>[Synset('difficulty.n.03'), [Synset('condition.n.01')], []]
Node(wn.synset('state.n.02'), children=[wn.synset('condition.n.01')])
display_dict()
>[Synset('condition.n.01'), [Synset('state.n.02')], [Synset('difficulty.n.03')]]
>[Synset('difficulty.n.03'), [Synset('condition.n.01')], []]
>[Synset('state.n.02'), [], [Synset('condition.n.01')]]
When I run the following lines of code I expect the last line to be
[Synset('attribute.n.02'), [], [Synset('state.n.02')]].
Why does the Node for wn.synset('attribute.n.02')
contain itself among its parents?
Node(wn.synset('attribute.n.02'), children=[wn.synset('state.n.02')])
display_dict()
>[Synset('condition.n.01'), [Synset('state.n.02')], [Synset('difficulty.n.03')]]
>[Synset('difficulty.n.03'), [Synset('condition.n.01')], []]
>[Synset('state.n.02'), [Synset('attribute.n.02')], [Synset('condition.n.01')]]
>[Synset('attribute.n.02'), [Synset('attribute.n.02')], [Synset('state.n.02')]]
I isolated the issue to be within the block below but I can't figure out what's causing this behavior.
# Create a Node for each of self's children if it does not already exist
# and add self as a parent
for child in self._children:
child_node = nodeFromSyn(child)
if child_node:
if value not in child_node._parents:
child_node._parents.append(value)
else:
child_node = Node(child, parents=[value])
The reason this is happening is because default parameters are evaluated once when the function is defined, not each time it is called. Because of this, all the nodes you create with the parents
(or children
) parameter blank are actually sharing the same list for their parent list. For the first two nodes this isn't a problem because you specify an empty list for the first node, and the first node creates a list for the second, but all the nodes after that are sharing the same list (which becomes more obvious if you keep creating more nodes).
A way to solve this problem outlined here is to make the default parameters None, then check if the parameter is none and create the empty list if so:
# modify the init function so that the default parameters are None
def __init__(self, value, parents=None, children=None):
# if they are left as None, set them to an empty list
if parents is None:
parents = []
if children is None:
children = []
# The rest of your code here...