Search code examples
pythonnetworkxabstract-syntax-treeisomorphism

Subgraph isomorphism in NetworkX graphs from python ASTs


I have two python files. I generated two NetworkX graphs just traversing the ASTs of the files. I want to check - is there a sub-graph of one graph isomorphic to another one?

Python files are presented below -

whiletest.py

a= 10
while(a <= 0):
    if a == 5:
        print(a)
        a += 1
print("exited")

whiletestchanged.py

a= 10
# ANBD
'''
Test Someting
'''
while(b <= 0): #   print("a is:", a)    
    if a == 5:   # if a is 5, then break  
        print(a) 
    a += 1
    # a += 1 
print("exited")

Class for making the NXgraphs from the ASTs

class GetNXgraphFromAST(ast.NodeVisitor):
    def __init__(self):
        self.stack = []
        self.graph = nx.Graph()

    def generic_visit(self, stmt):
        node_name = stmt
        parent_name = None

        if self.stack:
            parent_name = self.stack[-1]

        self.stack.append(node_name)
        self.graph.add_node(node_name)

        if parent_name:
            self.graph.add_edge(node_name, parent_name)

        super(self.__class__, self).generic_visit(stmt)
        self.stack.pop()

Their graph representations are presented below -

whiletest.png

enter image description here

whiletestchanged.png

enter image description here

The code to check isomorphism -

nodesoriginal = ast.parse(srcOriginal)
nodesbackport = ast.parse(srcBackport)
OriginalNXGraphIni = GetNXgraphFromAST()
BackportNXGraphIni = GetNXgraphFromAST()
OriginalNXGraphIni.visit(nodesoriginal) 
BackportNXGraphIni.visit(nodesbackport)
OriginalNXGraph = OriginalNXGraphIni.graph
BackportNXGraph = BackportNXGraphIni.graph

isomorphicSUbGraphs = isomorphism.GraphMatcher(OriginalNXGraph, BackportNXGraph)
for subGraph in isomorphicSUbGraphs.subgraph_isomorphisms_iter():
    subGraph = subGraph
    break 

But it does not detect any sub-isomorphic graph. Am I making any mistakes? Thanks in advance for any helps regarding this.


Solution

  • The code below accepts two Python source strings, checks if any isomorphic subgraphs exist, and then prints the results:

    import ast
    import collections
    
    def tree_attrs(tree):
       #extract AST node's attributes for matching later
       return (t:=type(tree)).__name__, \
        [a for a in t._fields if not isinstance(getattr(tree, a), 
                                                            (ast.AST, list))], \
        [i for i in t._fields if isinstance(getattr(tree, i), (ast.AST, list))]
        
    def build_graph(tree, d1, d2, p = []):
       #recursively build subgraph from a starting AST node
       #potential child nodes can be discovered by checking d1 and d2
       if str(attrs:=tree_attrs(tree)) in d2 and \
          any(p == p1[-1*len(p):] or not p for _, p1 in d2[str(attrs)]):
          ast_attrs = {}
          for i in attrs[2]:
             if isinstance(v:=getattr(tree, i), list):
                ast_attrs[i] = [result for j in v if (result:=build_graph(j, d1, d2, p+[type(tree).__name__])) is not None]
             elif (result:=build_graph(v, d1, d2, p+[type(tree).__name__])) is not None:
                ast_attrs[i] = result
          return type(tree)(**{i:getattr(tree, i) for i in attrs[1]}, **ast_attrs, **{i:getattr(tree, i, 0) 
                 for i in type(tree)._attributes})   
    
    def walk(tree, p = []):
       #get all AST nodes from the .py source, along with its ancestor hierarchy
       yield tree, p
       for i in tree._fields:
         if isinstance(v:=getattr(tree, i), list):
           for j in v:
              yield from walk(j, p + [type(tree).__name__])
         elif isinstance(v, ast.AST):
            yield from walk(v, p + [type(tree).__name__])
    
    def subgraphs(s1, s2):
       #build AST node lookup from both .py sources
       #these lookups are later used to find valid children for any given AST node
       d1, d2 = collections.defaultdict(list), collections.defaultdict(list)
       for i, p in walk(ast.parse(s1)):
          d1[str(tree_attrs(i))].append((i, p))
       for i, p in walk(ast.parse(s2)):
          d2[str(tree_attrs(i))].append((i, p))
       return [build_graph(i, d1, d2) for i in ast.walk(ast.parse(s1))]
       
    def valid_py_subgraphs(sub_g):
       result = []
       for i in sub_g:
          try:
             if (r:=ast.unparse(i)):
                result.append(r)
          except: pass
       return result
    

    s1 = """
    a= 10
    while(b <= 0):
        if a == 5:
            print(a)
            a += 1
    print("exited")
    """
    s2 = """a= 10
    # ANBD
    '''
    Test Someting
    '''
    while(a <= 0): #   print("a is:", a)    
        if a == 5:   # if a is 5, then break  
            print(a) 
        a += 1
        # a += 1 
    print("exited")"""
    sub_g = valid_py_subgraphs(subgraphs(s1, s2))
    print(bool(sub_g))
    print('='*20)
    for i in sub_g:
      print(i)
      print('-'*20)
    

    Sample output:

    True
    ====================
    a = 10
    while b <= 0:
        if a == 5:
            print(a)
    print('exited')
    --------------------
    a = 10
    --------------------
    while b <= 0:
        if a == 5:
            print(a)
    --------------------
    print('exited')
    --------------------
    a
    --------------------
    10
    --------------------
    b <= 0
    --------------------
    if a == 5:
        print(a)
    --------------------
    print('exited')
    --------------------
    a += 1
    --------------------
    print(a)
    --------------------
    ...
    

    s3 = """
    def main(a, b, c):
        return [i for i in range([3, 4])]
    """
    s4 = """
    def main(a, b, n = 10):
       return [i for i in range([1, 2, 3])]
    """
    sub_g = valid_py_subgraphs(subgraphs(s3, s4))
    print(bool(sub_g))
    for i in sub_g:
      print(i)
      print('-'*20)
    

    Sample Output:

    True
    ====================
    def main(a, b, c):
        return [i for i in range([3, 4])]
    --------------------
    a, b, c
    --------------------
    return [i for i in range([3, 4])]
    --------------------
    a
    --------------------
    b
    --------------------
    c
    --------------------
    [i for i in range([3, 4])]
    --------------------
     for i in range([3, 4])
    --------------------
    i
    --------------------
    range([3, 4])
    --------------------
    range
    --------------------
    [3, 4]
    --------------------
    3
    --------------------
    4
    --------------------