"""extract.py, M6 build: turn messy text into GUARANTEED structured JSON.

This is the payoff over M5. In M5 we *asked* for JSON and parsed defensively (it could
arrive fenced or broken). Here we use the API's structured-output feature: we hand the
model a JSON **schema**, and the reply is guaranteed to be valid JSON in that exact shape, no fence-stripping, no try/except gymnastics. We parse it into a dict (M3's json) and use it.

Run (venv active, key in .env, from this folder):
    python extract.py
"""

import os
import json
from dotenv import load_dotenv
import anthropic

load_dotenv()
client = anthropic.Anthropic()
MODEL = "claude-opus-4-8"        # the "real app" model; supports structured outputs

# The exact shape we want back. The API will MAKE the model fill this in as valid JSON.
EXPENSE_SCHEMA = {
    "type": "object",
    "properties": {
        "item": {"type": "string"},
        "amount": {"type": "number"},
        "category": {"type": "string", "enum": ["food", "transport", "equipment", "other"]},
        "reimbursable": {"type": "boolean"},
    },
    "required": ["item", "amount", "category", "reimbursable"],
    "additionalProperties": False,
}


def extract_expense(messy_text):
    """Pull a clean expense record out of a free-text line. Returns a dict."""
    response = client.messages.create(
        model=MODEL,
        max_tokens=300,
        messages=[{
            "role": "user",
            "content": f"Extract the expense from this note:\n\n{messy_text}",
        }],
        output_config={"format": {"type": "json_schema", "schema": EXPENSE_SCHEMA}},
    )
    # output_config guarantees the first text block is valid JSON matching the schema.
    text = next(b.text for b in response.content if b.type == "text")
    return json.loads(text)          # no fence-stripping needed, it's guaranteed valid


if __name__ == "__main__":
    note = input("Describe an expense in your own words: ")
    expense = extract_expense(note)
    print("\nStructured record (ready for your code, a database, or a spreadsheet):")
    print(f"  Item:         {expense['item']}")
    print(f"  Amount:       ${expense['amount']:.2f}")
    print(f"  Category:     {expense['category']}")
    print(f"  Reimbursable: {'yes' if expense['reimbursable'] else 'no'}")
