"""streaming_agent.py: an agent that EMITS events as it works, instead of returning one final blob.

chat_stream is a GENERATOR: it yields status updates, tool calls, citations, the answer token by
token, and a final cost report. A UI renders each event the moment it arrives, so the user sees the
agent working and the answer appearing live. Because it is a generator, the user can also CANCEL: if
the consumer stops iterating, the agent stops working and no further tokens (or cost) are produced.

The client is the streaming mock (mockmodel.py) so this runs offline; a production build would wrap
the real SDK streaming API (M6) behind the same event stream.
"""

import events as E
import corpus

PRICE_IN, PRICE_OUT = 5.0, 25.0          # illustrative Opus price per 1,000,000 tokens (M25)


def _toks(text):
    return max(1, len(text) // 4)


def _cost(in_tok, out_tok):
    return (in_tok * PRICE_IN + out_tok * PRICE_OUT) / 1_000_000


class StreamingAgent:
    def __init__(self, client):
        self.client = client

    def chat_stream(self, question):
        yield E.status("thinking")
        in_tok = _toks(question)
        sources = []

        plan = self.client.turn(question, searched=False)
        if plan["action"] == "search":
            yield E.status("searching the knowledge base")
            hits = corpus.search(plan["query"])
            yield E.tool("search_kb", query=plan["query"], found=len(hits))
            for doc_id, text in hits:
                sources.append(doc_id)
                in_tok += _toks(text)
                yield E.citation(doc_id)
            plan = self.client.turn(question, searched=True)

        yield E.status("writing the answer")
        out_tok = 0
        for chunk in plan["chunks"]:            # each chunk streams out the instant it is ready
            out_tok += _toks(chunk)
            yield E.token(chunk)

        yield E.cost(round(_cost(in_tok, out_tok), 6), in_tok + out_tok)
        yield E.done()
