"""app.py: a production-shaped service for an agent. The difference between "runs on my laptop" and
"serves many users reliably".

What this adds over the basic M11 app:
  - config from the environment (config.py), validated at startup (fail fast on bad config)
  - liveness vs readiness probes (/healthz, /readyz) so an orchestrator knows when to send traffic
  - graceful startup/shutdown via lifespan (warm up, then drain)
  - a request id and structured access log on every request
  - statelessness: no per-process session memory, so you can run many replicas behind a load balancer

The agent itself is a stub here (`handle`); wire in the M27 capstone agent for the real thing. The
focus of this module is serving, not the agent.

    uvicorn app:app --host 0.0.0.0 --port 8000 --workers 2
"""

import time
import uuid
import logging
import contextlib
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import config

settings = config.load()
logging.basicConfig(level=settings.log_level, format="%(levelname)s %(message)s")
log = logging.getLogger("serve")

STATE = {"ready": False}      # readiness flag: false until warmed up, false again while draining


@contextlib.asynccontextmanager
async def lifespan(app):
    problems = settings.validate()
    if problems:
        # fail fast: do not start serving with broken config
        raise RuntimeError("bad configuration: " + "; ".join(problems))
    log.info("startup env=%s config=%s", settings.environment, settings.redacted())
    # (warm caches, open DB/vector-store connections, etc. here)
    STATE["ready"] = True
    yield
    STATE["ready"] = False    # stop accepting new readiness checks while we drain
    log.info("shutdown: draining in-flight requests")


app = FastAPI(title="Agent Service", lifespan=lifespan)


class ChatIn(BaseModel):
    message: str
    session_id: str | None = None      # state lives OUTSIDE the process, keyed by this id


def handle(message, settings=settings):
    """Stub agent. In production, call the M27 SupportAgent here. Stateless: depends only on input."""
    return f"You said: {message}"


@app.middleware("http")
async def access_log(request, call_next):
    rid = request.headers.get("x-request-id") or uuid.uuid4().hex[:8]
    start = time.time()
    response = await call_next(request)
    response.headers["x-request-id"] = rid
    log.info("%s %s -> %d %.3fs rid=%s", request.method, request.url.path,
             response.status_code, time.time() - start, rid)
    return response


@app.get("/healthz")
def healthz():
    """Liveness: is the process up? If this fails, the orchestrator restarts the container."""
    return {"status": "alive"}


@app.get("/readyz")
def readyz():
    """Readiness: should we receive traffic yet? 503 while starting or draining keeps users off."""
    if not STATE["ready"]:
        return JSONResponse({"status": "not ready"}, status_code=503)
    return {"status": "ready"}


@app.post("/chat")
def chat(req: ChatIn):
    if not STATE["ready"]:
        return JSONResponse({"error": "not ready"}, status_code=503)
    return {"answer": handle(req.message), "session_id": req.session_id}
