"""streaming.py, M6: print the reply as it's written, instead of waiting for all of it.

Without streaming, your program waits for the whole reply, then prints it, a long pause,
then a wall of text. With streaming, words appear as the model produces them, like a chat
app. Same request, much better feel for anything longer than a sentence.

Run (venv active, key in .env, from this folder):
    python streaming.py
"""

import os
from dotenv import load_dotenv
import anthropic

load_dotenv()
client = anthropic.Anthropic()
MODEL = "claude-opus-4-8"

print("Claude (streaming): ", end="", flush=True)
with client.messages.stream(
    model=MODEL,
    max_tokens=400,
    messages=[{"role": "user", "content": "Write a short, 3-sentence pep talk for someone learning to code."}],
) as stream:
    for text in stream.text_stream:        # yields little chunks as they arrive
        print(text, end="", flush=True)    # flush=True shows each chunk immediately
print()                                    # newline at the end
