AST finished

This commit is contained in:
Michael Schären 2026-05-07 17:26:41 +02:00
parent a5a657250d
commit a62255e30d
4 changed files with 469 additions and 90 deletions

View File

@ -0,0 +1,290 @@
digraph G {
rankdir=LR;
node [shape=box, style=filled, fillcolor=lightblue];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DataCleaner" -> "u" [color=gray];
"DataCleaner" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DataCleaner" -> "u" [color=gray];
"DataCleaner" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DataCleaner" -> "u" [color=gray];
"DataCleaner" -> "c" [color=gray];
"DataCleaner" -> "remove_outliers" [color=black];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DataCleaner" -> "u" [color=gray];
"DataCleaner" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DataCleaner" -> "u" [color=gray];
"DataCleaner" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"DescriptiveStats" -> "full_report" [color=black];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"DescriptiveStats" -> "u" [color=gray];
"DescriptiveStats" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"HypothesisTester" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"HypothesisTester" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"HypothesisTester" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"HypothesisTester" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"HypothesisTester" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"HypothesisTester" -> "c" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"CurveFitter" -> "r_squared" [color=black];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"CurveFitter" -> "u" [color=gray];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"ReportGenerator" -> "add_descriptive" [color=blue];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"ReportGenerator" -> "add_hypothesis" [color=blue];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"ReportGenerator" -> "add_curve_fit" [color=blue];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
"ReportGenerator" -> "save" [color=black];
"run_analysis_pipeline" -> "remove_nans" [color=red];
"run_analysis_pipeline" -> "remove_outliers" [color=red];
"run_analysis_pipeline" -> "full_report" [color=red];
"run_analysis_pipeline" -> "normality_test" [color=red];
"run_analysis_pipeline" -> "add_descriptive" [color=red];
"run_analysis_pipeline" -> "add_hypothesis" [color=red];
"run_analysis_pipeline" -> "to_dict" [color=red];
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 252 KiB

View File

@ -169,7 +169,20 @@ print("=" * 60)
def get_param_types(func_node: ast.FunctionDef) -> dict[str, str]: def get_param_types(func_node: ast.FunctionDef) -> dict[str, str]:
"""Map parameter name -> annotation type name.""" """Map parameter name -> annotation type name."""
# TODO: implement # TODO: implement
return {} param_types = {}
for arg in func_node.args.args:
if arg.arg == "self":
continue
name = arg.arg
if isinstance(arg.annotation, ast.Name):
param_types[name] = arg.annotation.id
elif isinstance(arg.annotation, ast.Attribute):
param_types[name] = ast.unparse(arg.annotation)
return param_types
# TODO 6: Write a function `find_cross_class_calls(cls_name, method_name)` # TODO 6: Write a function `find_cross_class_calls(cls_name, method_name)`
@ -184,16 +197,29 @@ def get_param_types(func_node: ast.FunctionDef) -> dict[str, str]:
def find_cross_class_calls(cls_name: str, method_name: str) -> list[tuple[str, str]]: def find_cross_class_calls(cls_name: str, method_name: str) -> list[tuple[str, str]]:
"""Return [(target_class, target_method), ...] for cross-class calls.""" """Return [(target_class, target_method), ...] for cross-class calls."""
# TODO: implement # TODO: implement
return [] class_methods = class_info[cls_name]["methods"]
method = class_methods[method_name]
param_types = get_param_types(method)
calls = extract_calls(method)
cross_class_calls = []
for name, param_type in param_types.items():
for call in calls:
if name == call.get("object"):
cross_class_calls.append((param_type, call["method"]))
return cross_class_calls
# TODO 7: Print cross-class call edges. # TODO 7: Print cross-class call edges.
# for cls_name, info in class_info.items(): for cls_name, info in class_info.items():
# for method_name in info["methods"]: for method_name in info["methods"]:
# cross = find_cross_class_calls(cls_name, method_name) cross = find_cross_class_calls(cls_name, method_name)
# for target_cls, target_method in cross: for target_cls, target_method in cross:
# print(f" {cls_name}.{method_name}() -> {target_cls}.{target_method}()") print(f" {cls_name}.{method_name}() -> {target_cls}.{target_method}()")
# ── Part D: Full Call Graph as Adjacency List ────────────────────────────── # ── Part D: Full Call Graph as Adjacency List ──────────────────────────────
@ -209,22 +235,22 @@ print("=" * 60)
# Also analyse run_analysis_pipeline() which instantiates classes # Also analyse run_analysis_pipeline() which instantiates classes
# and calls their methods. # and calls their methods.
# call_graph = defaultdict(list) call_graph = defaultdict(list)
#
# # Add internal calls # Add internal calls
# for cls_name, info in class_info.items(): for cls_name, info in class_info.items():
# for method_name in info["methods"]: for method_name in info["methods"]:
# source = f"{cls_name}.{method_name}" source = f"{cls_name}.{method_name}"
# for target in find_internal_calls(cls_name, method_name): for target in find_internal_calls(cls_name, method_name):
# call_graph[source].append(f"{cls_name}.{target}") call_graph[source].append(f"{cls_name}.{target} (internal)")
# for target_cls, target_method in find_cross_class_calls(cls_name, method_name): for target_cls, target_method in find_cross_class_calls(cls_name, method_name):
# call_graph[source].append(f"{target_cls}.{target_method}") call_graph[source].append(f"{target_cls}.{target_method} (cross-class)")
#
# # Print the graph # Print the graph
# for source, targets in sorted(call_graph.items()): for source, targets in sorted(call_graph.items()):
# print(f" {source}") print(f" {source}")
# for t in targets: for t in targets:
# print(f" -> {t}") print(f" -> {t}")
# ── Expected Output (key edges) ──────────────────────────────────────────── # ── Expected Output (key edges) ────────────────────────────────────────────

View File

@ -88,15 +88,15 @@ print("=" * 60)
alias_to_module: dict[str, str] = {} alias_to_module: dict[str, str] = {}
# TODO: iterate over tree.body and fill alias_to_module # TODO: iterate over tree.body and fill alias_to_module
# for node in tree.body: for node in tree.body:
# if isinstance(node, ast.Import): if isinstance(node, ast.Import):
# for alias in node.names: for alias in node.names:
# key = alias.asname if alias.asname else alias.name key = alias.asname if alias.asname else alias.name
# alias_to_module[key] = alias.name alias_to_module[key] = alias.name
# elif isinstance(node, ast.ImportFrom): elif isinstance(node, ast.ImportFrom):
# for alias in node.names: for alias in node.names:
# key = alias.asname if alias.asname else alias.name key = alias.asname if alias.asname else alias.name
# alias_to_module[key] = node.module or "" alias_to_module[key] = node.module or ""
# TODO 2: For each class, find which external modules its methods call. # TODO 2: For each class, find which external modules its methods call.
@ -112,12 +112,27 @@ def get_external_deps(cls_name: str) -> set[str]:
"""Return the set of external module names used by *cls_name*.""" """Return the set of external module names used by *cls_name*."""
deps = set() deps = set()
# TODO: implement # TODO: implement
methods = class_info[cls_name]["methods"]
for method in methods.values():
func_calls = extract_calls(method)
for call in func_calls:
obj = call.get("object")
if not obj:
continue
dep = alias_to_module.get(str(obj))
if dep:
deps.add(dep)
return deps return deps
# for cls_name in class_info: for cls_name in class_info:
# deps = get_external_deps(cls_name) deps = get_external_deps(cls_name)
# print(f"\n {cls_name}: {sorted(deps) if deps else '(none)'}") print(f"\n {cls_name}: {sorted(deps) if deps else '(none)'}")
# ── Part B: Analyse run_analysis_pipeline for Data Flow ──────────────────── # ── Part B: Analyse run_analysis_pipeline for Data Flow ────────────────────
@ -135,6 +150,9 @@ print("=" * 60)
pipeline_func = None pipeline_func = None
# TODO: find it in tree.body # TODO: find it in tree.body
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == "run_analysis_pipeline":
pipeline_func = node
# TODO 4: Walk the pipeline function and find all variable assignments. # TODO 4: Walk the pipeline function and find all variable assignments.
# For each ast.Assign where the right-hand side is a Call: # For each ast.Assign where the right-hand side is a Call:
@ -149,7 +167,14 @@ pipeline_func = None
var_types: dict[str, str] = {} var_types: dict[str, str] = {}
# TODO: implement by walking pipeline_func # TODO: implement by walking pipeline_func
if pipeline_func:
for node in pipeline_func.body:
if isinstance(node, ast.Assign) and isinstance(node.value, ast.Call):
for target in node.targets:
var_types[ast.unparse(target)] = ast.unparse(node.value.func)
for target, value in var_types.items():
print(f"\n {target}: {value}")
# TODO 5: Now trace method calls on those variables. # TODO 5: Now trace method calls on those variables.
# For each attribute call (e.g. cleaner.remove_nans()): # For each attribute call (e.g. cleaner.remove_nans()):
@ -162,9 +187,19 @@ pipeline_edges: list[tuple[str, str]] = []
# TODO: implement # TODO: implement
# print("\n Data flow edges:") if pipeline_func:
# for source, target in pipeline_edges: calls = extract_calls(pipeline_func)
# print(f" {source} -> {target}")
for call in calls:
if call["type"] == "attribute" and call["object"] in var_types.keys():
type_name = var_types[call["object"]]
# pipeline_edges.append(("run_analysis_pipeline", f"{type_name}.{call["method"]}"))
pipeline_edges.append(("run_analysis_pipeline", f"{call["method"]}"))
print("\n Data flow edges:")
for source, target in pipeline_edges:
print(f" {source} -> {target}")
# ── Part C: Export to DOT Format ──────────────────────────────────────────── # ── Part C: Export to DOT Format ────────────────────────────────────────────
@ -183,10 +218,21 @@ print("=" * 60)
# Use the format: (source_label, target_label, edge_type) # Use the format: (source_label, target_label, edge_type)
# where edge_type is one of: "internal", "cross_class", "pipeline", "external" # where edge_type is one of: "internal", "cross_class", "pipeline", "external"
from ex03_method_call_graph import find_internal_calls, find_cross_class_calls
all_edges: list[tuple[str, str, str]] = [] all_edges: list[tuple[str, str, str]] = []
# TODO: collect all edges # TODO: collect all edges
for cls_name, cls_info in class_info.items():
for method_name in cls_info["methods"].keys():
for internal in find_internal_calls(cls_name, method_name):
all_edges.append((cls_name, method_name, "internal"))
for cross_class in find_cross_class_calls(cls_name, method_name):
all_edges.append((cls_name, method_name, "cross_class"))
for pipeline_edge in pipeline_edges:
all_edges.append((pipeline_edge[0], pipeline_edge[1], "pipeline"))
for external_dep in get_external_deps(cls_name):
all_edges.append((cls_name, external_dep[1], "external"))
# TODO 7: Generate a DOT string and write it to "dependency_graph.dot". # TODO 7: Generate a DOT string and write it to "dependency_graph.dot".
# #
@ -207,14 +253,32 @@ all_edges: list[tuple[str, str, str]] = []
def generate_dot(edges: list[tuple[str, str, str]]) -> str: def generate_dot(edges: list[tuple[str, str, str]]) -> str:
"""Return a DOT-format string for the dependency graph.""" """Return a DOT-format string for the dependency graph."""
# TODO: implement # TODO: implement
return "digraph G {\n}\n" EDGE_COLORS = {
"internal": "black",
"cross_class": "blue",
"pipeline": "red",
"external": "gray",
}
lines = [
"digraph G {",
" rankdir=LR;",
' node [shape=box, style=filled, fillcolor=lightblue];',
]
for source, target, edge_type in edges:
color = EDGE_COLORS.get(edge_type, "black")
lines.append(f' "{source}" -> "{target}" [color={color}];')
lines.append("}")
return "\n".join(lines) + "\n"
# dot_string = generate_dot(all_edges) dot_string = generate_dot(all_edges)
# dot_file = Path(__file__).parent / "dependency_graph.dot" dot_file = Path(__file__).parent / "dependency_graph.dot"
# dot_file.write_text(dot_string) dot_file.write_text(dot_string)
# print(f"\n Written to {dot_file}") print(f"\n Written to {dot_file}")
# print(f" Render with: dot -Tpng dependency_graph.dot -o dependency_graph.png") print(f" Render with: dot -Tpng dependency_graph.dot -o dependency_graph.png")
# ── Part D: (Optional) Render with networkx + matplotlib ─────────────────── # ── Part D: (Optional) Render with networkx + matplotlib ───────────────────
@ -228,51 +292,50 @@ print("=" * 60)
# #
# TODO 9: Build a networkx DiGraph from all_edges and render it. # TODO 9: Build a networkx DiGraph from all_edges and render it.
# #
# import networkx as nx import networkx as nx
# import matplotlib.pyplot as plt import matplotlib.pyplot as plt
#
# G = nx.DiGraph() G = nx.DiGraph()
#
# # Color map for edge types # Color map for edge types
# edge_colors = { edge_colors = {
# "internal": "black", "internal": "black",
# "cross_class": "blue", "cross_class": "blue",
# "pipeline": "red", "pipeline": "red",
# "external": "gray", "external": "gray",
# } }
#
# # Add edges with colors # Add edges with colors
# for source, target, etype in all_edges: for source, target, etype in all_edges:
# G.add_edge(source, target, color=edge_colors.get(etype, "black")) G.add_edge(source, target, color=edge_colors.get(etype, "black"))
#
# # Node colors: classes in light blue, functions in light green, modules in light gray # Node colors: classes in light blue, functions in light green, modules in light gray
# node_colors = [] node_colors = []
# for n in G.nodes(): for n in G.nodes():
# if "." in n and n.split(".")[0] in class_info: if "." in n and n.split(".")[0] in class_info:
# node_colors.append("lightblue") node_colors.append("lightblue")
# elif n in module_functions: elif n in module_functions:
# node_colors.append("lightgreen") node_colors.append("lightgreen")
# else: else:
# node_colors.append("lightgray") node_colors.append("lightgray")
#
# # Draw # Draw
# pos = nx.spring_layout(G, k=2, seed=42) pos = nx.spring_layout(G, k=2, seed=42)
# colors = [G[u][v]["color"] for u, v in G.edges()] colors = [G[u][v]["color"] for u, v in G.edges()]
#
# plt.figure(figsize=(16, 10)) plt.figure(figsize=(16, 10))
# nx.draw(G, pos, nx.draw(G, pos,
# with_labels=True, with_labels=True,
# node_color=node_colors, node_color=node_colors,
# edge_color=colors, edge_color=colors,
# node_size=2000, node_size=2000,
# font_size=7, font_size=7,
# arrows=True, arrowsize=15)
# arrowsize=15) plt.title("Dependency Graph sample_stats.py")
# plt.title("Dependency Graph sample_stats.py") plt.tight_layout()
# plt.tight_layout() plt.savefig(Path(__file__).parent / "dependency_graph.png", dpi=150)
# plt.savefig(Path(__file__).parent / "dependency_graph.png", dpi=150) plt.show()
# plt.show() print(" Saved dependency_graph.png")
# print(" Saved dependency_graph.png")
print("\n (Uncomment the code above after installing networkx and matplotlib)") print("\n (Uncomment the code above after installing networkx and matplotlib)")