Going on this question of mine, my goal now is to parse a Python file, and to
Without loading the file (running it).
Currently, I have this working code:
parser.py
import ast
def get_classes(path):
with open(path) as fh:
root = ast.parse(fh.read(), path)
classes = []
for node in ast.iter_child_nodes(root):
if isinstance(node, ast.ClassDef):
classes.append(node.name)
else:
continue
return classes
for c in get_classes('a.py'):
print(c)
File to be parsed:
from c import CClass
class MyClass(UndefinedClass):
name = 'Edgar'
def foo(self, x):
print(x)
def func():
print('Hello')
The good part of this solution is that I get the list of class names even given that file a.py contains invalid python code. Looks like I have to dig deeper into AST module. Is there any way I can extract the list of class attributes and its base classes ?
You can use recursion to traverse the ast
produced by ast.parse
. The solution below performs this search not only in the main input file but in any subsequent imported files as well:
import ast, importlib
class Parse:
def __init__(self):
self.c = {}
def walk(self, tree, f = None):
if isinstance(tree, ast.ClassDef):
self.c[tree.name] = {'bases':[i.id for i in tree.bases], 'attrs':[]}
for i in tree.body:
self.walk(i, tree.name)
elif isinstance(tree, (ast.ImportFrom, ast.Import)):
for i in (k if isinstance((k:=getattr(tree, 'module', tree.names)), list) else [k]):
with open(importlib.machinery.PathFinder().find_module(getattr(i, 'name', i)).get_filename()) as f:
self.walk(ast.parse(f.read()))
elif isinstance(tree, ast.Assign) and f is not None:
self.c[f]['attrs'].append(tree.targets[0].id)
else:
for i in getattr(tree, '_fields', []):
for j in (k if isinstance((k:=getattr(tree, i)), list) else [k]):
self.walk(j, None)
Putting it all together with your two original files:
File c.py:
c_var = 2
class CClass:
name = 'Anna'
File a.py:
from c import CClass
class MyClass(UndefinedClass):
name = 'Edgar'
def foo(self, x):
print(x)
def func():
print('Hello')
p = Parse()
with open('a_mod_test.py') as f:
p.walk(ast.parse(f.read()))
print(p.c)
Output:
{'CClass': {'bases': [], 'attrs': ['name']}, 'MyClass': {'bases': ['UndefinedClass'], 'attrs': ['name']}}