AISE1_CLASS/Prompting Exercise/ex03_structured_output_solution.py

"""
Exercise 3 – SOLUTION – Structured Input and Structured Output
==============================================================
AISE501 · Prompting in Coding · Spring Semester 2026
"""

import json
from pathlib import Path

import yaml  # pip install pyyaml

from server_utils import chat, chat_json, get_client, print_messages, print_separator

client = get_client()

code_to_review = Path("analyze_me.py").read_text()


# ── Part A: Structured Input → JSON Output ────────────────────────────────────
print_separator("Part A – Request JSON Output")

system_a = """\
You are a code-review assistant. You ALWAYS respond with valid JSON and
nothing else — no markdown code fences, no introductory text, no trailing
commentary. Your entire response must be parseable by json.loads().
"""

prompt_a = f"""\
<persona>
  You are a senior Python engineer performing a thorough, structured code review.
</persona>

<task>
  Review the Python code below and return your findings as JSON.
  Follow the schema defined in <schema> exactly.
</task>

<schema>
{{
  "summary": "<one-sentence overview of the code quality>",
  "bugs": [
    {{
      "id": 1,
      "severity": "Critical|Medium|Style",
      "line": <integer line number or null if not applicable>,
      "function": "<name of the affected function>",
      "description": "<what is wrong and why it matters>",
      "fix": "<one-sentence fix hint>"
    }}
  ],
  "overall_quality": "Poor|Fair|Good|Excellent"
}}
</schema>

<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""

messages_a = [
    {"role": "system", "content": system_a},
    {"role": "user",   "content": prompt_a},
]

print_messages(messages_a)
raw_json_a = chat_json(client, messages_a)   # response_format=json_object → always valid JSON
print("Raw response:")
print(raw_json_a)


# ── Part B: Parse the JSON and Display a Summary ──────────────────────────────
print_separator("Part B – Parse JSON and Print Summary")

report = json.loads(raw_json_a)


print(f"Overall quality : {report['overall_quality']}")
print(f"Summary         : {report['summary']}\n")

col_w = [4, 10, 6, 24, 45]
header = (
    f"{'ID':<{col_w[0]}} | {'Severity':<{col_w[1]}} | {'Line':<{col_w[2]}} | "
    f"{'Function':<{col_w[3]}} | {'Description':<{col_w[4]}}"
)
print(header)
print("-" * len(header))

for bug in report["bugs"]:
    line_str = str(bug["line"]) if bug["line"] is not None else "—"
    print(
        f"{bug['id']:<{col_w[0]}} | "
        f"{bug['severity']:<{col_w[1]}} | "
        f"{line_str:<{col_w[2]}} | "
        f"{bug['function']:<{col_w[3]}} | "
        f"{bug['description'][:col_w[4]]}"
    )


# ── Part C: Use the Parsed Data to Build a Follow-Up Prompt ──────────────────
print_separator("Part C – Dynamic Follow-Up Prompt from Parsed Data")

critical_bugs = [b for b in report["bugs"] if b["severity"] == "Critical"]

if not critical_bugs:
    print("No critical bugs found — nothing to fix.")
else:
    lines = []
    for b in critical_bugs:
        lines.append(f'  - Bug {b["id"]} (line {b["line"]}): {b["description"]}')
    bug_list_text = "\n".join(lines)

    followup_prompt = f"""\
<task>
  The following critical bugs were found in analyze_me.py:

{bug_list_text}

  For each bug, provide the corrected Python code snippet (the full function
  is fine). Return your answer as a JSON object with this schema:
  {{
    "fixes": [
      {{"bug_id": <int>, "fixed_code": "<corrected Python code as a string>"}}
    ]
  }}
  No markdown, no explanation — only the JSON object.
</task>"""

    messages_c = messages_a + [
        {"role": "assistant", "content": raw_json_a},
        {"role": "user",      "content": followup_prompt},
    ]

    print_messages(messages_c)
    raw_json_c = chat_json(client, messages_c)

    fixes = json.loads(raw_json_c)["fixes"]
    for fix in fixes:
        print(f"\n--- Fix for bug {fix['bug_id']} ---")
        print(fix["fixed_code"])


# ── Part D: Request YAML Instead of JSON ─────────────────────────────────────
print_separator("Part D – YAML Output")

system_d = """\
You are a code-review assistant. You ALWAYS respond with valid YAML and
nothing else — no markdown fences, no introductory text.
"""

prompt_d = f"""\
<persona>
  You are a senior Python engineer performing a structured code review.
</persona>

<task>
  Review the code below and return your findings as YAML.
  Use the same fields as before: summary, bugs (with id/severity/line/
  function/description/fix), and overall_quality.
</task>

<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""

messages_d = [
    {"role": "system", "content": system_d},
    {"role": "user",   "content": prompt_d},
]

print_messages(messages_d)
raw_yaml = chat(client, messages_d, temperature=0.2)

try:
    yaml_report = yaml.safe_load(raw_yaml)
    print(f"Parsed YAML – overall quality: {yaml_report.get('overall_quality')}")
    print(f"Number of bugs found: {len(yaml_report.get('bugs', []))}")
except yaml.YAMLError as e:
    print(f"ERROR: malformed YAML: {e}")
    print(raw_yaml)


# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
    "1. What can go wrong when asking an LLM to return JSON?\n"
    "2. How did the <schema> tag influence the output structure?\n"
    "3. Why is structured output important for building LLM pipelines?\n"
    "4. When would you use JSON vs. YAML vs. plain text?\n"
)