869 lines
35 KiB
Python
869 lines
35 KiB
Python
"""
|
||
ast_introduction.py – A Guided Tour of Python's ast Module
|
||
===========================================================
|
||
AISE501 · AST Exercises · Spring Semester 2026
|
||
|
||
PURPOSE
|
||
-------
|
||
This file is a **reference and tutorial** that introduces every class and
|
||
method from Python's ``ast`` module that you will need in the exercises.
|
||
|
||
It is organised into seven sections:
|
||
1. Parsing: How source code becomes a tree
|
||
2. Node hierarchy: The class hierarchy of AST nodes
|
||
3. Statement nodes: Import, FunctionDef, ClassDef, Assign, ...
|
||
4. Expression nodes: Call, Attribute, Name, Constant, BinOp, ...
|
||
5. Traversal methods: ast.walk(), ast.NodeVisitor, ast.iter_child_nodes()
|
||
6. Utility functions: ast.dump(), ast.unparse(), ast.get_docstring()
|
||
7. Putting it all together: A mini-analysis pipeline
|
||
|
||
Each section contains:
|
||
- A short explanation of the concept
|
||
- Live code that runs and prints output
|
||
- Inline comments explaining every line
|
||
|
||
HOW TO USE
|
||
----------
|
||
Run this file from the ast_exercises/ folder:
|
||
|
||
python ast_introduction.py
|
||
|
||
Read the output alongside the source code. This is a *learning* file --
|
||
you are encouraged to modify it, add print statements, and experiment.
|
||
|
||
PREREQUISITES
|
||
-------------
|
||
- Python 3.9+ (for ast.unparse)
|
||
- No external packages required
|
||
"""
|
||
|
||
import ast
|
||
import textwrap
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 1: PARSING – How Source Code Becomes a Tree ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# The entry point to the ast module is ast.parse(). It takes a string of
|
||
# valid Python source code and returns the root node of the AST.
|
||
#
|
||
# Key function:
|
||
# ast.parse(source, filename='<unknown>', mode='exec')
|
||
# source : str – the Python source code to parse
|
||
# filename: str – used in error messages only (optional)
|
||
# mode : str – 'exec' (module), 'eval' (expression), 'single' (statement)
|
||
#
|
||
# The return value is always an ast.Module object (when mode='exec').
|
||
|
||
print("=" * 72)
|
||
print("SECTION 1: PARSING")
|
||
print("=" * 72)
|
||
|
||
# A simple example: parse a two-line Python program.
|
||
example_source = textwrap.dedent("""\
|
||
import math
|
||
x = math.sqrt(16)
|
||
""")
|
||
|
||
# ast.parse() converts the source string into an AST.
|
||
# This does NOT execute the code -- it only analyses its structure.
|
||
tree = ast.parse(example_source)
|
||
|
||
# The result is an ast.Module node.
|
||
print(f"\nReturn type of ast.parse(): {type(tree).__name__}")
|
||
print(f" -> This is always 'Module' when mode='exec'")
|
||
print(f" -> The Module node represents the entire file/script")
|
||
|
||
# The Module's .body attribute is a list of top-level statements.
|
||
print(f"\nNumber of top-level statements: {len(tree.body)}")
|
||
for i, stmt in enumerate(tree.body):
|
||
print(f" [{i}] {type(stmt).__name__} (line {stmt.lineno})")
|
||
|
||
# You can also parse a single expression:
|
||
expr_tree = ast.parse("3 + 4 * 2", mode="eval")
|
||
print(f"\nParsing in 'eval' mode returns: {type(expr_tree).__name__}")
|
||
print(f" -> The Expression node wraps a single expression")
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 2: NODE HIERARCHY – The Class Hierarchy of AST Nodes ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# Every node in the AST is an instance of a class defined in the ast module.
|
||
# The hierarchy (simplified):
|
||
#
|
||
# ast.AST <-- Base class for ALL nodes
|
||
# ├── ast.mod <-- Module-level nodes
|
||
# │ ├── ast.Module <-- A file/script (mode='exec')
|
||
# │ ├── ast.Expression <-- A single expression (mode='eval')
|
||
# │ └── ast.Interactive <-- A single statement (mode='single')
|
||
# │
|
||
# ├── ast.stmt <-- Statement nodes (things that DO something)
|
||
# │ ├── ast.FunctionDef <-- def foo(): ...
|
||
# │ ├── ast.AsyncFunctionDef <-- async def foo(): ...
|
||
# │ ├── ast.ClassDef <-- class Foo: ...
|
||
# │ ├── ast.Return <-- return x
|
||
# │ ├── ast.Assign <-- x = 42
|
||
# │ ├── ast.AnnAssign <-- x: int = 42
|
||
# │ ├── ast.AugAssign <-- x += 1
|
||
# │ ├── ast.For <-- for x in y: ...
|
||
# │ ├── ast.While <-- while cond: ...
|
||
# │ ├── ast.If <-- if cond: ...
|
||
# │ ├── ast.With <-- with ctx as x: ...
|
||
# │ ├── ast.Raise <-- raise ValueError(...)
|
||
# │ ├── ast.Try <-- try: ... except: ...
|
||
# │ ├── ast.Import <-- import os
|
||
# │ ├── ast.ImportFrom <-- from os import path
|
||
# │ ├── ast.Expr <-- a bare expression used as a statement
|
||
# │ │ (e.g. a docstring, or a function call
|
||
# │ │ whose return value is discarded)
|
||
# │ └── ast.Pass / Break / Continue
|
||
# │
|
||
# ├── ast.expr <-- Expression nodes (things that PRODUCE a value)
|
||
# │ ├── ast.Name <-- a variable name: x, self, np
|
||
# │ ├── ast.Constant <-- a literal: 42, "hello", None, True
|
||
# │ ├── ast.Attribute <-- dotted access: self.data, np.array
|
||
# │ ├── ast.Call <-- a function call: foo(), np.mean(x)
|
||
# │ ├── ast.BinOp <-- binary operation: x + y, a * b
|
||
# │ ├── ast.UnaryOp <-- unary operation: -x, not x
|
||
# │ ├── ast.Compare <-- comparison: x > 0, a == b
|
||
# │ ├── ast.BoolOp <-- boolean: x and y, a or b
|
||
# │ ├── ast.Subscript <-- indexing: x[0], data["key"]
|
||
# │ ├── ast.List / Tuple / Set / Dict <-- container literals
|
||
# │ ├── ast.ListComp <-- list comprehension: [x for x in y]
|
||
# │ ├── ast.Lambda <-- lambda x: x + 1
|
||
# │ ├── ast.IfExp <-- ternary: a if cond else b
|
||
# │ └── ast.JoinedStr <-- f-string: f"Hello {name}"
|
||
# │
|
||
# └── other <-- Helper / context nodes
|
||
# ├── ast.arguments <-- the argument list of a function
|
||
# ├── ast.arg <-- a single parameter (name + annotation)
|
||
# ├── ast.keyword <-- a keyword argument in a call
|
||
# ├── ast.alias <-- an import alias (import X as Y)
|
||
# └── ast.Load / Store / Del <-- name context (reading / writing / deleting)
|
||
#
|
||
# EVERY node has these common attributes (inherited from ast.AST):
|
||
# - lineno : int – the source line number (1-based)
|
||
# - col_offset : int – the column offset (0-based)
|
||
# - end_lineno : int – where the node ends (line)
|
||
# - end_col_offset: int – where the node ends (column)
|
||
#
|
||
# Node-specific attributes are documented below in each section.
|
||
|
||
print("\n" + "=" * 72)
|
||
print("SECTION 2: NODE HIERARCHY")
|
||
print("=" * 72)
|
||
|
||
# Let's verify: every node is an instance of ast.AST
|
||
code = "x = 1 + 2"
|
||
small_tree = ast.parse(code)
|
||
print(f"\nAll nodes are subclasses of ast.AST:")
|
||
for node in ast.walk(small_tree):
|
||
print(f" {type(node).__name__:20s} isinstance(node, ast.AST) = {isinstance(node, ast.AST)}")
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 3: STATEMENT NODES – The Building Blocks of a Program ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# Statements are things that DO something: define a function, import a
|
||
# module, assign a variable, loop, branch, etc. They form the top-level
|
||
# .body list of a Module, and also the .body lists inside classes,
|
||
# functions, if-blocks, loops, etc.
|
||
#
|
||
# Below we explore the most important statement nodes.
|
||
|
||
print("\n" + "=" * 72)
|
||
print("SECTION 3: STATEMENT NODES")
|
||
print("=" * 72)
|
||
|
||
# ── 3a. ast.Import and ast.ImportFrom ──────────────────────────────────────
|
||
#
|
||
# ast.Import represents: import os, sys
|
||
# ast.ImportFrom represents: from os.path import join, exists
|
||
#
|
||
# Attributes:
|
||
# Import:
|
||
# .names : list[ast.alias] – each alias has .name and .asname
|
||
#
|
||
# ImportFrom:
|
||
# .module : str – the module being imported from (e.g. "os.path")
|
||
# .names : list[ast.alias] – the imported names
|
||
# .level : int – number of dots for relative imports (0 = absolute)
|
||
#
|
||
# ast.alias:
|
||
# .name : str – the real name (e.g. "numpy")
|
||
# .asname : str or None – the alias (e.g. "np" in "import numpy as np")
|
||
|
||
print("\n── 3a. Import nodes ──")
|
||
|
||
import_code = textwrap.dedent("""\
|
||
import os
|
||
import numpy as np
|
||
from scipy.stats import ttest_ind, norm
|
||
from . import utils
|
||
""")
|
||
import_tree = ast.parse(import_code)
|
||
|
||
for node in import_tree.body:
|
||
if isinstance(node, ast.Import):
|
||
for alias in node.names:
|
||
alias_str = f" as {alias.asname}" if alias.asname else ""
|
||
print(f" import {alias.name}{alias_str}")
|
||
print(f" -> ast.Import, alias.name='{alias.name}', alias.asname={alias.asname!r}")
|
||
|
||
elif isinstance(node, ast.ImportFrom):
|
||
names = [f"{a.name}" + (f" as {a.asname}" if a.asname else "") for a in node.names]
|
||
dots = "." * node.level # relative import dots
|
||
print(f" from {dots}{node.module or ''} import {', '.join(names)}")
|
||
print(f" -> ast.ImportFrom, module='{node.module}', level={node.level}")
|
||
|
||
|
||
# ── 3b. ast.FunctionDef ───────────────────────────────────────────────────
|
||
#
|
||
# Represents a function or method definition.
|
||
#
|
||
# Attributes:
|
||
# .name : str – function name
|
||
# .args : ast.arguments – the parameter specification
|
||
# .body : list[ast.stmt] – the function body (list of statements)
|
||
# .decorator_list : list[ast.expr] – decorators (@staticmethod, etc.)
|
||
# .returns : ast.expr or None – return type annotation
|
||
# .lineno : int – line number of 'def'
|
||
#
|
||
# The .args attribute is an ast.arguments object with:
|
||
# .args : list[ast.arg] – positional parameters
|
||
# .vararg : ast.arg or None – *args
|
||
# .kwonlyargs : list[ast.arg] – keyword-only parameters
|
||
# .kwarg : ast.arg or None – **kwargs
|
||
# .defaults : list[ast.expr] – default values (right-aligned)
|
||
# .kw_defaults : list – defaults for kwonlyargs
|
||
#
|
||
# Each ast.arg has:
|
||
# .arg : str – parameter name
|
||
# .annotation : ast.expr or None – type annotation
|
||
|
||
print("\n── 3b. FunctionDef ──")
|
||
|
||
func_code = textwrap.dedent("""\
|
||
@staticmethod
|
||
def calculate(data: list[float], threshold: float = 0.05) -> dict:
|
||
\"\"\"Perform a calculation.\"\"\"
|
||
result = sum(data)
|
||
return {"total": result}
|
||
""")
|
||
func_tree = ast.parse(func_code)
|
||
func_node = func_tree.body[0]
|
||
|
||
print(f"\n Function name : {func_node.name}")
|
||
print(f" Line number : {func_node.lineno}")
|
||
print(f" Decorators : {[ast.unparse(d) for d in func_node.decorator_list]}")
|
||
print(f" Return type : {ast.unparse(func_node.returns) if func_node.returns else 'None'}")
|
||
print(f" Body length : {len(func_node.body)} statements")
|
||
|
||
# Inspect parameters
|
||
args = func_node.args
|
||
print(f"\n Parameters ({len(args.args)} total):")
|
||
for arg in args.args:
|
||
ann = ast.unparse(arg.annotation) if arg.annotation else "no annotation"
|
||
print(f" {arg.arg}: {ann}")
|
||
|
||
# Defaults are right-aligned: if there are 2 params and 1 default,
|
||
# the default belongs to the LAST parameter.
|
||
print(f" Defaults: {[ast.unparse(d) for d in args.defaults]}")
|
||
print(f" -> defaults are right-aligned to parameters")
|
||
|
||
|
||
# ── 3c. ast.ClassDef ─────────────────────────────────────────────────────
|
||
#
|
||
# Represents a class definition.
|
||
#
|
||
# Attributes:
|
||
# .name : str – class name
|
||
# .bases : list[ast.expr] – base classes
|
||
# .keywords : list[ast.keyword]– metaclass etc.
|
||
# .body : list[ast.stmt] – class body (methods, attributes, ...)
|
||
# .decorator_list : list[ast.expr] – decorators
|
||
|
||
print("\n── 3c. ClassDef ──")
|
||
|
||
class_code = textwrap.dedent("""\
|
||
class DataProcessor(BaseProcessor):
|
||
\"\"\"Process data for analysis.\"\"\"
|
||
|
||
def __init__(self, data: list):
|
||
self.data = data
|
||
self.result = None
|
||
|
||
@staticmethod
|
||
def validate(item):
|
||
return item is not None
|
||
|
||
def process(self) -> list:
|
||
return [x for x in self.data if self.validate(x)]
|
||
""")
|
||
class_tree = ast.parse(class_code)
|
||
class_node = class_tree.body[0]
|
||
|
||
print(f"\n Class name : {class_node.name}")
|
||
print(f" Base classes: {[ast.unparse(b) for b in class_node.bases]}")
|
||
print(f" Decorators : {[ast.unparse(d) for d in class_node.decorator_list]}")
|
||
print(f" Body has {len(class_node.body)} items:")
|
||
for item in class_node.body:
|
||
if isinstance(item, ast.FunctionDef):
|
||
print(f" FunctionDef: {item.name}() (line {item.lineno})")
|
||
elif isinstance(item, ast.Expr):
|
||
print(f" Expr: (docstring) (line {item.lineno})")
|
||
else:
|
||
print(f" {type(item).__name__} (line {item.lineno})")
|
||
|
||
|
||
# ── 3d. ast.Assign and ast.AnnAssign ─────────────────────────────────────
|
||
#
|
||
# ast.Assign: x = 42 (no type annotation)
|
||
# .targets : list[ast.expr] – what is assigned to (can be multiple: a = b = 1)
|
||
# .value : ast.expr – the right-hand side
|
||
#
|
||
# ast.AnnAssign: x: int = 42 (with type annotation)
|
||
# .target : ast.expr – what is assigned to (single target)
|
||
# .annotation: ast.expr – the type annotation
|
||
# .value : ast.expr or None – the value (None if just a declaration)
|
||
|
||
print("\n── 3d. Assign and AnnAssign ──")
|
||
|
||
assign_code = textwrap.dedent("""\
|
||
x = 42
|
||
a = b = [1, 2, 3]
|
||
name: str = "Alice"
|
||
count: int
|
||
""")
|
||
assign_tree = ast.parse(assign_code)
|
||
|
||
for node in assign_tree.body:
|
||
if isinstance(node, ast.Assign):
|
||
targets = [ast.unparse(t) for t in node.targets]
|
||
print(f" Assign: {', '.join(targets)} = {ast.unparse(node.value)}")
|
||
elif isinstance(node, ast.AnnAssign):
|
||
val = ast.unparse(node.value) if node.value else "(no value)"
|
||
print(f" AnnAssign: {ast.unparse(node.target)}: {ast.unparse(node.annotation)} = {val}")
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 4: EXPRESSION NODES – Things That Produce Values ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# Expressions are things that PRODUCE a value. They appear inside
|
||
# statements: the right-hand side of an assignment, function arguments,
|
||
# conditions in if-statements, etc.
|
||
|
||
print("\n" + "=" * 72)
|
||
print("SECTION 4: EXPRESSION NODES")
|
||
print("=" * 72)
|
||
|
||
# ── 4a. ast.Name ─────────────────────────────────────────────────────────
|
||
#
|
||
# Represents a variable reference.
|
||
#
|
||
# Attributes:
|
||
# .id : str – the variable name (e.g. "x", "self", "np")
|
||
# .ctx : ast.Load / ast.Store / ast.Del
|
||
# – context: is the name being read (Load), written to (Store),
|
||
# or deleted (Del)?
|
||
#
|
||
# ast.Name appears everywhere: in assignments (Store), in expressions (Load),
|
||
# as function call targets, as import aliases, etc.
|
||
|
||
print("\n── 4a. ast.Name ──")
|
||
|
||
name_code = "x = y + z"
|
||
name_tree = ast.parse(name_code)
|
||
|
||
for node in ast.walk(name_tree):
|
||
if isinstance(node, ast.Name):
|
||
ctx = type(node.ctx).__name__ # "Load" or "Store"
|
||
print(f" Name(id='{node.id}', ctx={ctx})")
|
||
|
||
|
||
# ── 4b. ast.Constant ────────────────────────────────────────────────────
|
||
#
|
||
# Represents a literal value: number, string, boolean, None, bytes.
|
||
#
|
||
# Attributes:
|
||
# .value : the Python value (int, float, str, bool, None, bytes, ...)
|
||
#
|
||
# Note: In Python 3.8+, ast.Constant replaces the older ast.Num, ast.Str,
|
||
# ast.NameConstant, ast.Bytes, and ast.Ellipsis nodes.
|
||
|
||
print("\n── 4b. ast.Constant ──")
|
||
|
||
const_code = 'x = 42; y = "hello"; z = True; w = None; f = 3.14'
|
||
const_tree = ast.parse(const_code)
|
||
|
||
for node in ast.walk(const_tree):
|
||
if isinstance(node, ast.Constant):
|
||
print(f" Constant(value={node.value!r}, type={type(node.value).__name__})")
|
||
|
||
|
||
# ── 4c. ast.Attribute ───────────────────────────────────────────────────
|
||
#
|
||
# Represents dotted access: self.data, np.array, os.path.join
|
||
#
|
||
# Attributes:
|
||
# .value : ast.expr – the object (e.g. Name(id='self') or another Attribute)
|
||
# .attr : str – the attribute name (e.g. "data", "array")
|
||
# .ctx : Load / Store / Del
|
||
#
|
||
# IMPORTANT: Chained attributes are nested.
|
||
# self.sections.append becomes:
|
||
# Attribute(
|
||
# value=Attribute(
|
||
# value=Name(id='self'),
|
||
# attr='sections'
|
||
# ),
|
||
# attr='append'
|
||
# )
|
||
|
||
print("\n── 4c. ast.Attribute ──")
|
||
|
||
attr_code = textwrap.dedent("""\
|
||
self.data = np.array([1, 2, 3])
|
||
result = self.data.mean()
|
||
""")
|
||
attr_tree = ast.parse(attr_code)
|
||
|
||
for node in ast.walk(attr_tree):
|
||
if isinstance(node, ast.Attribute):
|
||
# Reconstruct the dotted name for display
|
||
print(f" Attribute: .attr='{node.attr}', value={ast.unparse(node.value)}")
|
||
print(f" -> full expression: {ast.unparse(node)}")
|
||
|
||
|
||
# ── 4d. ast.Call ────────────────────────────────────────────────────────
|
||
#
|
||
# Represents a function or method call.
|
||
#
|
||
# Attributes:
|
||
# .func : ast.expr – what is being called
|
||
# (ast.Name for foo(), ast.Attribute for obj.method())
|
||
# .args : list[ast.expr] – positional arguments
|
||
# .keywords : list[ast.keyword] – keyword arguments (each has .arg and .value)
|
||
#
|
||
# This is the MOST IMPORTANT expression node for code analysis.
|
||
# In the exercises, you will use it to build call graphs.
|
||
|
||
print("\n── 4d. ast.Call ──")
|
||
|
||
call_code = textwrap.dedent("""\
|
||
print("hello")
|
||
result = np.mean(data, axis=0)
|
||
self.process(items, verbose=True)
|
||
""")
|
||
call_tree = ast.parse(call_code)
|
||
|
||
for node in ast.walk(call_tree):
|
||
if isinstance(node, ast.Call):
|
||
# Determine what is being called
|
||
if isinstance(node.func, ast.Name):
|
||
callable_name = node.func.id
|
||
call_type = "simple (Name)"
|
||
elif isinstance(node.func, ast.Attribute):
|
||
callable_name = ast.unparse(node.func)
|
||
call_type = "attribute (obj.method)"
|
||
else:
|
||
callable_name = ast.unparse(node.func)
|
||
call_type = "other"
|
||
|
||
# Count arguments
|
||
n_positional = len(node.args)
|
||
n_keyword = len(node.keywords)
|
||
kw_names = [kw.arg for kw in node.keywords]
|
||
|
||
print(f" Call: {callable_name}()")
|
||
print(f" type : {call_type}")
|
||
print(f" positional: {n_positional} args")
|
||
print(f" keyword : {n_keyword} args {kw_names}")
|
||
print()
|
||
|
||
|
||
# ── 4e. ast.BinOp ──────────────────────────────────────────────────────
|
||
#
|
||
# Represents a binary operation: x + y, a * b, etc.
|
||
#
|
||
# Attributes:
|
||
# .left : ast.expr – the left operand
|
||
# .op : ast.operator – the operator (Add, Sub, Mult, Div, Pow, Mod, ...)
|
||
# .right : ast.expr – the right operand
|
||
|
||
print("── 4e. ast.BinOp ──")
|
||
|
||
binop_code = "result = (a + b) * c - d / e"
|
||
binop_tree = ast.parse(binop_code)
|
||
|
||
for node in ast.walk(binop_tree):
|
||
if isinstance(node, ast.BinOp):
|
||
op_name = type(node.op).__name__
|
||
print(f" BinOp: {ast.unparse(node.left)} {op_name} {ast.unparse(node.right)}")
|
||
print(f" -> full: {ast.unparse(node)}")
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 5: TRAVERSAL METHODS ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# The ast module provides three ways to traverse a tree:
|
||
#
|
||
# 1. ast.walk(node)
|
||
# - Generator that yields every node in the subtree (breadth-first)
|
||
# - Simplest approach; no parent information
|
||
# - Use when: you need to find/count all nodes of a type
|
||
#
|
||
# 2. ast.NodeVisitor
|
||
# - Subclass it and define visit_<NodeType>() methods
|
||
# - The framework dispatches to the correct method automatically
|
||
# - Use when: you need different logic for different node types
|
||
# - MUST call self.generic_visit(node) to continue to children
|
||
#
|
||
# 3. ast.iter_child_nodes(node)
|
||
# - Generator that yields the direct children of a single node
|
||
# - Use when: you need parent-child relationships (see Step 6 of ast_demo.py)
|
||
#
|
||
# There is also ast.NodeTransformer (subclass of NodeVisitor) for MODIFYING
|
||
# the tree, but we don't use it in these exercises.
|
||
|
||
print("\n" + "=" * 72)
|
||
print("SECTION 5: TRAVERSAL METHODS")
|
||
print("=" * 72)
|
||
|
||
# ── 5a. ast.walk() ──────────────────────────────────────────────────────
|
||
|
||
print("\n── 5a. ast.walk() ──")
|
||
|
||
sample = textwrap.dedent("""\
|
||
class Foo:
|
||
def bar(self):
|
||
return self.baz()
|
||
def baz(self):
|
||
return 42
|
||
""")
|
||
sample_tree = ast.parse(sample)
|
||
|
||
# ast.walk yields every node -- we can filter with isinstance
|
||
function_defs = [n for n in ast.walk(sample_tree) if isinstance(n, ast.FunctionDef)]
|
||
print(f" Found {len(function_defs)} FunctionDef nodes via ast.walk():")
|
||
for fd in function_defs:
|
||
print(f" - {fd.name}() at line {fd.lineno}")
|
||
|
||
|
||
# ── 5b. ast.NodeVisitor ──────────────────────────────────────────────────
|
||
|
||
print("\n── 5b. ast.NodeVisitor ──")
|
||
print(" (Detailed example in ast_demo.py, Steps 3-4)")
|
||
|
||
# Quick demonstration: a visitor that counts calls and names.
|
||
|
||
|
||
class CounterVisitor(ast.NodeVisitor):
|
||
"""Count ast.Call and ast.Name nodes."""
|
||
|
||
def __init__(self):
|
||
self.call_count = 0
|
||
self.name_count = 0
|
||
|
||
def visit_Call(self, node):
|
||
self.call_count += 1
|
||
self.generic_visit(node) # <-- DON'T FORGET THIS!
|
||
|
||
def visit_Name(self, node):
|
||
self.name_count += 1
|
||
self.generic_visit(node) # <-- DON'T FORGET THIS!
|
||
|
||
|
||
counter = CounterVisitor()
|
||
counter.visit(sample_tree)
|
||
print(f" In the 'Foo' class:")
|
||
print(f" Call nodes: {counter.call_count}")
|
||
print(f" Name nodes: {counter.name_count}")
|
||
|
||
print()
|
||
print(" REMINDER: If you forget self.generic_visit(node), the visitor")
|
||
print(" STOPS recursing into that node's children. This is the #1 mistake.")
|
||
|
||
|
||
# ── 5c. ast.iter_child_nodes() ──────────────────────────────────────────
|
||
|
||
print("\n── 5c. ast.iter_child_nodes() ──")
|
||
|
||
# ast.iter_child_nodes yields only the DIRECT children of a node.
|
||
# This is useful for building parent-child relationships.
|
||
|
||
class_node = sample_tree.body[0] # The ClassDef for 'Foo'
|
||
print(f" Direct children of ClassDef '{class_node.name}':")
|
||
for child in ast.iter_child_nodes(class_node):
|
||
print(f" {type(child).__name__}", end="")
|
||
if hasattr(child, "name"):
|
||
print(f" (name='{child.name}')", end="")
|
||
print()
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 6: UTILITY FUNCTIONS ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# ast.dump(node, indent=None)
|
||
# - Returns a string representation of the AST (for debugging)
|
||
# - With indent=2, prints a nicely formatted multi-line tree
|
||
#
|
||
# ast.unparse(node) [Python 3.9+]
|
||
# - Converts an AST node BACK into Python source code
|
||
# - Useful for printing type annotations, expressions, etc.
|
||
# - Does NOT reproduce the original formatting (comments are lost)
|
||
#
|
||
# ast.get_docstring(node)
|
||
# - Returns the docstring of a Module, ClassDef, or FunctionDef
|
||
# - Returns None if there is no docstring
|
||
# - Under the hood: checks if the first statement in .body is an
|
||
# Expr containing a Constant with a string value
|
||
|
||
print("\n" + "=" * 72)
|
||
print("SECTION 6: UTILITY FUNCTIONS")
|
||
print("=" * 72)
|
||
|
||
# ── 6a. ast.dump() ──────────────────────────────────────────────────────
|
||
|
||
print("\n── 6a. ast.dump() ──")
|
||
|
||
tiny_tree = ast.parse("x = 1 + 2")
|
||
print(" ast.dump (compact):")
|
||
print(f" {ast.dump(tiny_tree)}")
|
||
print()
|
||
print(" ast.dump (indented):")
|
||
print(textwrap.indent(ast.dump(tiny_tree, indent=2), " "))
|
||
|
||
|
||
# ── 6b. ast.unparse() ──────────────────────────────────────────────────
|
||
|
||
print("\n── 6b. ast.unparse() ──")
|
||
|
||
# ast.unparse is invaluable for printing type annotations and expressions
|
||
# in human-readable form rather than raw AST dumps.
|
||
func_code2 = "def greet(name: str, times: int = 1) -> None: pass"
|
||
func_tree2 = ast.parse(func_code2).body[0]
|
||
|
||
print(f" Function: {func_tree2.name}")
|
||
for arg in func_tree2.args.args:
|
||
if arg.annotation:
|
||
# Compare: ast.dump gives raw AST, ast.unparse gives Python code
|
||
print(f" {arg.arg}: dump={ast.dump(arg.annotation)}")
|
||
print(f" {arg.arg}: unparse={ast.unparse(arg.annotation)}")
|
||
if func_tree2.returns:
|
||
print(f" Returns: {ast.unparse(func_tree2.returns)}")
|
||
|
||
|
||
# ── 6c. ast.get_docstring() ────────────────────────────────────────────
|
||
|
||
print("\n── 6c. ast.get_docstring() ──")
|
||
|
||
doc_code = textwrap.dedent("""\
|
||
class MyClass:
|
||
\"\"\"This is the class docstring.\"\"\"
|
||
|
||
def my_method(self):
|
||
\"\"\"This is the method docstring.\"\"\"
|
||
pass
|
||
|
||
def no_doc(self):
|
||
pass
|
||
""")
|
||
doc_tree = ast.parse(doc_code)
|
||
cls = doc_tree.body[0]
|
||
|
||
print(f" Class '{cls.name}' docstring: {ast.get_docstring(cls)!r}")
|
||
|
||
for item in cls.body:
|
||
if isinstance(item, ast.FunctionDef):
|
||
doc = ast.get_docstring(item)
|
||
print(f" Method '{item.name}' docstring: {doc!r}")
|
||
|
||
|
||
# ╔═══════════════════════════════════════════════════════════════════════════╗
|
||
# ║ SECTION 7: PUTTING IT ALL TOGETHER – A Mini Analysis Pipeline ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════╝
|
||
#
|
||
# This section combines all techniques into a short analysis pipeline
|
||
# that extracts a "code inventory" from a source file -- the same kind
|
||
# of analysis you will build in the exercises, but simplified.
|
||
|
||
print("\n" + "=" * 72)
|
||
print("SECTION 7: PUTTING IT ALL TOGETHER")
|
||
print("=" * 72)
|
||
|
||
# We'll analyse a small inline program.
|
||
analysis_target = textwrap.dedent("""\
|
||
import numpy as np
|
||
from scipy import stats
|
||
|
||
def load_data(path: str) -> np.ndarray:
|
||
\"\"\"Load data from a file.\"\"\"
|
||
return np.loadtxt(path)
|
||
|
||
class Analyzer:
|
||
\"\"\"Perform statistical analysis.\"\"\"
|
||
|
||
def __init__(self, data: np.ndarray):
|
||
self.data = data
|
||
self.results = {}
|
||
|
||
def compute_mean(self) -> float:
|
||
return float(np.mean(self.data))
|
||
|
||
def run_test(self, threshold: float = 0.05) -> dict:
|
||
stat, p = stats.normaltest(self.data)
|
||
return {"statistic": stat, "p_value": p, "significant": p < threshold}
|
||
""")
|
||
|
||
target_tree = ast.parse(analysis_target)
|
||
|
||
|
||
class FullInventoryVisitor(ast.NodeVisitor):
|
||
"""Comprehensive code inventory visitor.
|
||
|
||
Collects:
|
||
- Imports (module aliases)
|
||
- Top-level functions (with signatures)
|
||
- Classes (with methods, attributes, and external calls)
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.imports = {} # alias -> module
|
||
self.functions = [] # list of {name, params, returns}
|
||
self.classes = [] # list of {name, methods, attributes}
|
||
self._current_class = None # track which class we are inside
|
||
|
||
def visit_Import(self, node):
|
||
for alias in node.names:
|
||
key = alias.asname if alias.asname else alias.name
|
||
self.imports[key] = alias.name
|
||
|
||
def visit_ImportFrom(self, node):
|
||
for alias in node.names:
|
||
key = alias.asname if alias.asname else alias.name
|
||
self.imports[key] = f"{node.module}.{alias.name}"
|
||
|
||
def visit_FunctionDef(self, node):
|
||
# Extract parameter info
|
||
params = []
|
||
for arg in node.args.args:
|
||
if arg.arg == "self":
|
||
continue
|
||
ann = ast.unparse(arg.annotation) if arg.annotation else None
|
||
params.append({"name": arg.arg, "type": ann})
|
||
|
||
returns = ast.unparse(node.returns) if node.returns else None
|
||
info = {"name": node.name, "params": params, "returns": returns}
|
||
|
||
if self._current_class is not None:
|
||
# It's a method -- add to current class
|
||
self._current_class["methods"].append(info)
|
||
else:
|
||
# It's a top-level function
|
||
self.functions.append(info)
|
||
|
||
self.generic_visit(node)
|
||
|
||
def visit_ClassDef(self, node):
|
||
class_info = {
|
||
"name": node.name,
|
||
"bases": [ast.unparse(b) for b in node.bases],
|
||
"docstring": ast.get_docstring(node),
|
||
"methods": [],
|
||
"attributes": [],
|
||
}
|
||
|
||
# Find __init__ and extract self.xxx assignments
|
||
for item in node.body:
|
||
if isinstance(item, ast.FunctionDef) and item.name == "__init__":
|
||
for sub in ast.walk(item):
|
||
if isinstance(sub, ast.Assign):
|
||
for target in sub.targets:
|
||
if (isinstance(target, ast.Attribute)
|
||
and isinstance(target.value, ast.Name)
|
||
and target.value.id == "self"):
|
||
class_info["attributes"].append(target.attr)
|
||
|
||
# Visit children (methods) with class context
|
||
old = self._current_class
|
||
self._current_class = class_info
|
||
self.generic_visit(node)
|
||
self._current_class = old
|
||
|
||
self.classes.append(class_info)
|
||
|
||
|
||
# Run the analysis
|
||
inventory = FullInventoryVisitor()
|
||
inventory.visit(target_tree)
|
||
|
||
# Print the results
|
||
print(f"\n Imports:")
|
||
for alias, module in inventory.imports.items():
|
||
print(f" {alias} -> {module}")
|
||
|
||
print(f"\n Top-level functions:")
|
||
for func in inventory.functions:
|
||
params_str = ", ".join(
|
||
f"{p['name']}: {p['type']}" if p["type"] else p["name"]
|
||
for p in func["params"]
|
||
)
|
||
print(f" def {func['name']}({params_str}) -> {func['returns']}")
|
||
|
||
print(f"\n Classes:")
|
||
for cls in inventory.classes:
|
||
print(f" class {cls['name']}({', '.join(cls['bases'])}):")
|
||
print(f" docstring: {cls['docstring']!r}")
|
||
print(f" attributes: {cls['attributes']}")
|
||
for method in cls["methods"]:
|
||
params_str = ", ".join(
|
||
f"{p['name']}: {p['type']}" if p["type"] else p["name"]
|
||
for p in method["params"]
|
||
)
|
||
print(f" def {method['name']}({params_str}) -> {method['returns']}")
|
||
|
||
|
||
# ── Summary ─────────────────────────────────────────────────────────────────
|
||
|
||
print("\n" + "=" * 72)
|
||
print("REFERENCE SUMMARY")
|
||
print("=" * 72)
|
||
print("""
|
||
PARSING
|
||
ast.parse(source) Parse string -> ast.Module
|
||
|
||
KEY NODE TYPES
|
||
ast.Module Root: represents the file
|
||
ast.Import / ast.ImportFrom Import statements
|
||
ast.FunctionDef Function/method definitions
|
||
ast.ClassDef Class definitions
|
||
ast.Assign / ast.AnnAssign Assignment statements
|
||
ast.Call Function/method calls
|
||
ast.Name Variable references
|
||
ast.Attribute Dotted access (obj.attr)
|
||
ast.Constant Literal values (42, "hello", True)
|
||
ast.Expr Bare expression (e.g. docstring)
|
||
ast.arguments / ast.arg Parameter specifications
|
||
|
||
TRAVERSAL
|
||
ast.walk(node) Yield all nodes (breadth-first)
|
||
ast.NodeVisitor Visitor pattern (visit_X methods)
|
||
ast.iter_child_nodes(node) Yield direct children only
|
||
|
||
UTILITIES
|
||
ast.dump(node, indent=2) Debug: print AST structure
|
||
ast.unparse(node) Convert AST back to Python source
|
||
ast.get_docstring(node) Extract docstring from def/class
|
||
|
||
You are now ready to start the exercises!
|
||
Begin with: python ast_demo.py
|
||
Then: python ex01_find_classes_functions.py
|
||
""")
|