Search code examples
python-3.xperformancedepth-first-search

optimizing depth first search python


I have got parent and child rows with levels. So I am using the code below to identify the parents ,child and their level as follows:

       instanceID     parentInstanceID dependencyInstanceID isCycle  level  
0         44909            103565                103565    False      0 
1         45461            103565                103565    False      0 
2         45465            103565                103565    False      0 
    def traverse_graph(graph):
        n_stack = Stack([n for n in graph.root_parents])
        p_stack = Stack()
    
        track_of_node = {}
        cycles = {}
        print("Beginning of whileloop", datetime.now())
        while n_stack.stack:
            if n_stack.last not in graph.visited_nodes:
                if n_stack.last != p_stack.last:
                    if graph.nodes_dict[n_stack.last]:
                        p_stack.add(n_stack.last)
                        if p_stack.last not in track_of_node.keys():
                            track_of_node[p_stack.last] = 1
                        else:
                            track_of_node[p_stack.last] += 1
                        if track_of_node[p_stack.last] >= 3 and check_cycle(p_stack):
                            if p_stack.stack[-2] not in cycles.keys():
                                cycles[p_stack.stack[-2]] = [p_stack.last]
                            else:
                                cycles[p_stack.stack[-2]].append(p_stack.last)
                            n_stack.pop()
                            e = p_stack.pop()
                            track_of_node[e] -= 1
                        else:
                            discard = []
                            if n_stack.last in cycles.keys():
                                discard += cycles[n_stack.last]
    
                            n_stack.add_multiple(graph.nodes_dict[n_stack.last], discard)
                    else:
                        graph.visited_nodes[n_stack.last] = set([])
                        n_stack.pop()
    
                else:
                    cycle_nodes = []
                    if n_stack.last in cycles.keys():
                        cycle_nodes += cycles[n_stack.last]
                    descendants = []
                    for c in graph.nodes_dict[n_stack.last]:
                        if c in graph.visited_nodes.keys():
                            for d in graph.visited_nodes[c]:
                                c, ip, ic, lv = list(d)
                                if lv != -1:
                                    descendants.append((c, ip, ic, lv+1))
                    graph.visited_nodes[n_stack.last] = set([(c, n_stack.last, False, 0)
                                                             for c in graph.nodes_dict[n_stack.last]] +
                                                            descendants +
                                                            [(n, n_stack.last, True, -1) for n in cycle_nodes])
                    n_stack.pop()
                    e = p_stack.pop()
                    if e in track_of_node.keys():
                        #print("within e",e)
                        track_of_node[e] -= 1
    
            else:
                if n_stack.last == p_stack.last:
                    e = p_stack.pop()
                    if e in track_of_node.keys():
                        track_of_node[e] -= 1
    
                n_stack.pop()

The for loop is taking the longest time.

for c in graph.nodes_dict[n_stack.last]:
    if c in graph.visited_nodes.keys():
        for d in graph.visited_nodes[c]:
            c, ip, ic, lv = list(d)
            if lv != -1:
               descendants.append((c, ip, ic, lv+1))

The graph passed as parameter is like this

[42345] []  
[42345, 42547] []   
[42345, 42547, 44025] []    
[42345, 42547, 44025, 44032] [] 
[42345, 42547, 44025, 44032, 44085] []  
[42345, 42547, 44025, 44032, 44085, 44097] []   
[42345, 42547, 44025, 44032, 44085, 44097, 44098] []    
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099] [] 
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099, 44327] []  

I tried LRU cache and multi thread execution using ThreadPoolExecutor as well. But it did not work. Please let me know if you have any ideas for faster execution of the for loop. Thanks.


Solution

  • I called the traverse_graph(graph) in chunks and that solved the problem.