"""app.py, M11: wrap an AI app in a real web API with FastAPI.

On your laptop, your app is a script only you can run. A web **API** lets anything, a website, a phone app, a teammate, send it a request over the internet. FastAPI turns
your Python function into exactly that, with almost no extra code.

Endpoints:
  GET  /health  -> a liveness check (is the service up?)
  POST /chat    -> {"message": "..."} -> {"reply": "..."}, with latency + token logging

Run locally (venv active, key in .env, from this folder):
    uvicorn app:app --reload
Then open http://127.0.0.1:8000/docs to try it in the browser.
"""

import os
import time
import logging
from dotenv import load_dotenv
from fastapi import FastAPI
from pydantic import BaseModel
import anthropic

load_dotenv()                                  # load ANTHROPIC_API_KEY from .env (local dev)
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
log = logging.getLogger("app")

client = anthropic.Anthropic()
MODEL = "claude-opus-4-8"

app = FastAPI(title="AI App", description="A tiny deployable AI service.")


# Request/response shapes (Pydantic models, FastAPI validates these for you).
class ChatRequest(BaseModel):
    message: str


class ChatResponse(BaseModel):
    reply: str


@app.get("/health")
def health():
    """Liveness check, load balancers and monitors hit this to see if we're up."""
    return {"status": "ok"}


@app.post("/chat", response_model=ChatResponse)
def chat(request: ChatRequest):
    """Send a message to the model and return its reply (with basic monitoring)."""
    start = time.time()
    response = client.messages.create(
        model=MODEL,
        max_tokens=500,
        system="You are a helpful, concise assistant.",
        messages=[{"role": "user", "content": request.message}],
    )
    reply = response.content[0].text

    # --- basic monitoring: latency + token usage (your cost signal) ----------
    latency = time.time() - start
    usage = response.usage
    log.info("POST /chat ok latency=%.2fs in_tokens=%s out_tokens=%s",
             latency, usage.input_tokens, usage.output_tokens)

    return ChatResponse(reply=reply)