I have got parent and child rows with levels. So I am using the code below to identify the parents ,child and their level as follows:
instanceID parentInstanceID dependencyInstanceID isCycle level
0 44909 103565 103565 False 0
1 45461 103565 103565 False 0
2 45465 103565 103565 False 0
def traverse_graph(graph):
n_stack = Stack([n for n in graph.root_parents])
p_stack = Stack()
track_of_node = {}
cycles = {}
print("Beginning of whileloop", datetime.now())
while n_stack.stack:
if n_stack.last not in graph.visited_nodes:
if n_stack.last != p_stack.last:
if graph.nodes_dict[n_stack.last]:
p_stack.add(n_stack.last)
if p_stack.last not in track_of_node.keys():
track_of_node[p_stack.last] = 1
else:
track_of_node[p_stack.last] += 1
if track_of_node[p_stack.last] >= 3 and check_cycle(p_stack):
if p_stack.stack[-2] not in cycles.keys():
cycles[p_stack.stack[-2]] = [p_stack.last]
else:
cycles[p_stack.stack[-2]].append(p_stack.last)
n_stack.pop()
e = p_stack.pop()
track_of_node[e] -= 1
else:
discard = []
if n_stack.last in cycles.keys():
discard += cycles[n_stack.last]
n_stack.add_multiple(graph.nodes_dict[n_stack.last], discard)
else:
graph.visited_nodes[n_stack.last] = set([])
n_stack.pop()
else:
cycle_nodes = []
if n_stack.last in cycles.keys():
cycle_nodes += cycles[n_stack.last]
descendants = []
for c in graph.nodes_dict[n_stack.last]:
if c in graph.visited_nodes.keys():
for d in graph.visited_nodes[c]:
c, ip, ic, lv = list(d)
if lv != -1:
descendants.append((c, ip, ic, lv+1))
graph.visited_nodes[n_stack.last] = set([(c, n_stack.last, False, 0)
for c in graph.nodes_dict[n_stack.last]] +
descendants +
[(n, n_stack.last, True, -1) for n in cycle_nodes])
n_stack.pop()
e = p_stack.pop()
if e in track_of_node.keys():
#print("within e",e)
track_of_node[e] -= 1
else:
if n_stack.last == p_stack.last:
e = p_stack.pop()
if e in track_of_node.keys():
track_of_node[e] -= 1
n_stack.pop()
The for loop is taking the longest time.
for c in graph.nodes_dict[n_stack.last]:
if c in graph.visited_nodes.keys():
for d in graph.visited_nodes[c]:
c, ip, ic, lv = list(d)
if lv != -1:
descendants.append((c, ip, ic, lv+1))
The graph passed as parameter is like this
[42345] []
[42345, 42547] []
[42345, 42547, 44025] []
[42345, 42547, 44025, 44032] []
[42345, 42547, 44025, 44032, 44085] []
[42345, 42547, 44025, 44032, 44085, 44097] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099, 44327] []
I tried LRU cache and multi thread execution using ThreadPoolExecutor as well. But it did not work. Please let me know if you have any ideas for faster execution of the for loop. Thanks.
I called the traverse_graph(graph) in chunks and that solved the problem.