The ability for Python to parse itself is a capability that most developers never utilize. You can read Python source code as a structured tree of nodes rather than as text by using the ast (Abstract Syntax Tree) module. You can then examine, evaluate, or modify the code programmatically.
This is how code analysis tools, formatters, linters, and type checkers are constructed. Let's see how it functions.
What Is an Abstract Syntax Tree?
When Python runs your code, the first thing it does is parse it into an AST, which is a tree structure that represents each statement, expression, operator and value. The representation that lies in between raw source text and bytecode is called the AST.
import ast
source = "x = 1 + 2"
tree = ast.parse(source)
print(ast.dump(tree, indent=2))
Module(
body=[
Assign(
targets=[
Name(id='x', ctx=Store())],
value=BinOp(
left=Constant(value=1),
op=Add(),
right=Constant(value=2)))],
type_ignores=[])
Every component of that expression, including the variable name, assignment, operator, and constants, is a separate node in the tree. With 'ast', you may stroll around the tree and do whatever you want with it.
Parsing Code
import ast
# Parse a string of source code
tree = ast.parse("x = 42")
# Parse an expression (not a full statement)
expr_tree = ast.parse("1 + 2 * 3", mode="eval")
# Parse from a file
with open("mymodule.py") as f:
tree = ast.parse(f.read(), filename="mymodule.py")
mode options:
-
"exec"(default) — full module / multiple statements -
"eval"— single expression -
"single"— single interactive statement
Inspecting Nodes
Every node has a _fields attribute listing its children:
import ast
tree = ast.parse("result = foo(x, y=10)")
# Walk every node
for node in ast.walk(tree):
print(type(node).__name__, end=" ")
Module Assign Name Call Name Name keyword Constant
Getting Line Numbers
import ast
source = """
def greet(name):
return f"Hello, {name}"
greet("Alice")
"""
tree = ast.parse(source)
for node in ast.walk(tree):
if hasattr(node, "lineno"):
print(f"Line {node.lineno}: {type(node).__name__}")
NodeVisitor
ast.NodeVisitor gives you a clean visitor pattern for traversing the AST. You define visit_<NodeType> methods and they're called automatically.
Example: Find All Function Definitions
import ast
class FunctionFinder(ast.NodeVisitor):
def __init__(self):
self.functions = []
def visit_FunctionDef(self, node):
self.functions.append({
"name": node.name,
"line": node.lineno,
"args": [arg.arg for arg in node.args.args],
"is_async": False,
})
self.generic_visit(node) # Continue walking into nested functions
def visit_AsyncFunctionDef(self, node):
self.functions.append({
"name": node.name,
"line": node.lineno,
"args": [arg.arg for arg in node.args.args],
"is_async": True,
})
self.generic_visit(node)
source = """
def login(username, password):
pass
async def fetch_data(url, timeout=30):
pass
class UserService:
def create_user(self, name, email):
pass
"""
tree = ast.parse(source)
finder = FunctionFinder()
finder.visit(tree)
for fn in finder.functions:
prefix = "async " if fn["is_async"] else ""
print(f"Line {fn['line']}: {prefix}def {fn['name']}({', '.join(fn['args'])})")
Line 2: def login(username, password)
Line 5: async def fetch_data(url, timeout)
Line 9: def create_user(self, name, email)
Real-World Analysis Examples
Find All Imports
import ast
class ImportFinder(ast.NodeVisitor):
def __init__(self):
self.imports = []
def visit_Import(self, node):
for alias in node.names:
self.imports.append(alias.name)
def visit_ImportFrom(self, node):
module = node.module or ""
for alias in node.names:
self.imports.append(f"{module}.{alias.name}")
with open("mymodule.py") as f:
tree = ast.parse(f.read())
finder = ImportFinder()
finder.visit(tree)
print(sorted(set(finder.imports)))
Find All print() Calls
Useful for catching debug prints before committing:
import ast
class PrintFinder(ast.NodeVisitor):
def visit_Call(self, node):
if isinstance(node.func, ast.Name) and node.func.id == "print":
print(f"Line {node.lineno}: print() call found")
self.generic_visit(node)
with open("main.py") as f:
tree = ast.parse(f.read())
PrintFinder().visit(tree)
Detect Mutable Default Arguments
A classic Python bug: def foo(items=[]). Let's detect it automatically:
import ast
class MutableDefaultFinder(ast.NodeVisitor):
MUTABLE_TYPES = (ast.List, ast.Dict, ast.Set)
def visit_FunctionDef(self, node):
for default in node.args.defaults:
if isinstance(default, self.MUTABLE_TYPES):
print(
f"Line {node.lineno}: '{node.name}' has a mutable default argument: "
f"{ast.unparse(default)}"
)
self.generic_visit(node)
source = """
def append_item(item, items=[]):
items.append(item)
return items
def update_config(key, config={}):
config[key] = True
return config
"""
tree = ast.parse(source)
MutableDefaultFinder().visit(tree)
Line 2: 'append_item' has a mutable default argument: []
Line 6: 'update_config' has a mutable default argument: {}
Count Complexity
import ast
class ComplexityAnalyzer(ast.NodeVisitor):
"""Approximates cyclomatic complexity per function."""
BRANCH_NODES = (ast.If, ast.For, ast.While, ast.ExceptHandler,
ast.With, ast.Assert, ast.comprehension)
def visit_FunctionDef(self, node):
complexity = 1 + sum(
1 for child in ast.walk(node)
if isinstance(child, self.BRANCH_NODES)
)
status = "✅" if complexity <= 5 else "⚠️" if complexity <= 10 else "❌"
print(f"{status} Line {node.lineno}: {node.name}() — complexity: {complexity}")
self.generic_visit(node)
with open("mymodule.py") as f:
tree = ast.parse(f.read())
ComplexityAnalyzer().visit(tree)
ast.unparse()
ast.unparse(), which transforms an AST node back into source code, is included in Python 3.9+. When showing what a node represents, this is quite helpful.
import ast
source = "result = [x**2 for x in range(10) if x % 2 == 0]"
tree = ast.parse(source, mode="eval")
# Round-trip: source → AST → source
print(ast.unparse(tree))
# result = [x ** 2 for x in range(10) if x % 2 == 0]
NodeTransformer: Modifying the AST
ast.NodeTransformer is like NodeVisitor but lets you return modified nodes — effectively rewriting code.
Replace All print() with logger.info()
import ast
class PrintToLogger(ast.NodeTransformer):
def visit_Call(self, node):
self.generic_visit(node) # Transform children first
if isinstance(node.func, ast.Name) and node.func.id == "print":
# Replace print(x) with logger.info(x)
node.func = ast.Attribute(
value=ast.Name(id="logger", ctx=ast.Load()),
attr="info",
ctx=ast.Load(),
)
return node
source = """
print("Starting process")
result = compute()
print(f"Done: {result}")
"""
tree = ast.parse(source)
new_tree = PrintToLogger().visit(tree)
ast.fix_missing_locations(new_tree) # Fill in missing line numbers
print(ast.unparse(new_tree))
logger.info('Starting process')
result = compute()
logger.info(f'Done: {result}')
Safe Expression Evaluation: ast.literal_eval()
ast.literal_eval() safely evaluates Python literals without executing arbitrary code. It's the safe alternative to eval():
import ast
# Safely parse Python literals from strings
print(ast.literal_eval("42")) # 42
print(ast.literal_eval("[1, 2, 3]")) # [1, 2, 3]
print(ast.literal_eval("{'a': 1}")) # {'a': 1}
print(ast.literal_eval("(True, None)")) # (True, None)
# Raises ValueError for anything that isn't a literal
ast.literal_eval("os.system('rm -rf /')") # ❌ ValueError — safe!
ast.literal_eval("1 + 2") # ❌ ValueError — expressions not allowed
# Common use case: parse config values from env vars or config files
import os
raw = os.getenv("ALLOWED_HOSTS", '["localhost", "127.0.0.1"]')
allowed_hosts = ast.literal_eval(raw)
print(allowed_hosts) # ['localhost', '127.0.0.1']
Building a Simple Linter
Let's put it all together — a basic linter that checks for common issues:
import ast
import sys
from pathlib import Path
class SimpleLinter(ast.NodeVisitor):
def __init__(self, filename: str):
self.filename = filename
self.issues = []
def warn(self, node, message: str):
self.issues.append(f"{self.filename}:{node.lineno}: {message}")
def visit_FunctionDef(self, node):
# Check for mutable defaults
for default in node.args.defaults:
if isinstance(default, (ast.List, ast.Dict, ast.Set)):
self.warn(node, f"Mutable default argument in '{node.name}': {ast.unparse(default)}")
# Check for functions with too many arguments
num_args = len(node.args.args)
if num_args > 7:
self.warn(node, f"'{node.name}' has {num_args} arguments (max recommended: 7)")
# Check for missing docstring
if not (node.body and isinstance(node.body[0], ast.Expr) and
isinstance(node.body[0].value, ast.Constant)):
self.warn(node, f"'{node.name}' is missing a docstring")
self.generic_visit(node)
def visit_Call(self, node):
# Flag print() calls
if isinstance(node.func, ast.Name) and node.func.id == "print":
self.warn(node, "print() call found — use logging instead")
self.generic_visit(node)
def visit_Import(self, node):
# Flag wildcard imports
for alias in node.names:
if alias.name == "*":
self.warn(node, "Wildcard import found — be explicit")
self.generic_visit(node)
def visit_ImportFrom(self, node):
for alias in node.names:
if alias.name == "*":
self.warn(node, f"Wildcard import from '{node.module}' — be explicit")
self.generic_visit(node)
def lint_file(filepath: str):
source = Path(filepath).read_text()
tree = ast.parse(source, filename=filepath)
linter = SimpleLinter(filepath)
linter.visit(tree)
return linter.issues
if __name__ == "__main__":
for path in sys.argv[1:]:
issues = lint_file(path)
for issue in issues:
print(issue)
if not issues:
print("No issues found ")
Run it:
python linter.py mymodule.py
mymodule.py:3: 'process' is missing a docstring
mymodule.py:12: Mutable default argument in 'append_items': []
mymodule.py:20: print() call found — use logging instead
Summary
A class of tools that most developers believe require in-depth compiler knowledge are unlocked by the ast module. In actuality, Python's AST is very strong, user-friendly, and well-documented for:
- Static analysis is used to quantify complexity, enforce style, and detect problems.
- Python code can be generated programmatically.
- Rework code at the AST level.
- Parsing literals without using
eval()is known as "safe evaluation."astis your starting point if you're creating developer tools, a code review bot, a custom linter, or anything else that requires a structural understanding of Python code.
Additional reading:Docs for the ast module:
docs.python.org/3/library/ast.html
Pretty-print AST nodes with astpretty
The concrete syntax tree library for lossless code transformation is called libcst.
Top comments (0)