"""add_eval_case.py: your turn, practice the core EDD habit: every bug becomes a test.

The discipline: when you find a bug, you do NOT just fix it. You first add an eval case that FAILS
because of the bug, then fix the code so the case passes. Now that bug can never come back silently,
because the gate would catch it.

Scenario: a user reports the agent gives no useful answer to "Do you offer refunds?". That is a bug
(the app has no refund handling). Practice EDD:

Steps:
  1. Add a refund case to the eval set below. Run the gate: it should FAIL (the bug is real).
  2. Open ../solution/app.py and add a refund answer so the new case passes.
  3. Run the gate again: it should PASS. You have turned a bug into a permanent regression test.
  Run:  python add_eval_case.py
"""

import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "solution"))
import evalset
from app import respond

# TODO 1: add the new case (this should FAIL until you fix app.respond in TODO 2).
NEW_CASES = evalset.CASES + [
    {"id": "refunds", "q": "Do you offer refunds?", "expect": "refund"},
]

if __name__ == "__main__":
    summary = evalset.run_suite(respond, cases=NEW_CASES)
    for r in summary["results"]:
        print(f"  [{'pass' if r['pass'] else 'FAIL'}] {r['id']}: {r['answer']!r}")
    print(f"{summary['passed']}/{summary['total']} pass.",
          "Now fix app.respond so the refunds case passes (TODO 2), then rerun.")
    # TODO 3 (optional): once green, move the new case into evalset.CASES so CI gates on it forever.
