"""chat_local.py, M13: chat with an OPEN model running on YOUR machine (via Ollama).

No API key. No internet. No per-token bill. Your data never leaves your laptop.
The trick: Ollama runs the model and exposes a small **local web API** at
http://localhost:11434, so you call it almost exactly like the hosted API in M4,
just pointed at your own computer.

Setup (one time): install Ollama, then pull a small model:
    ollama pull llama3.2          # ~1-2 GB; or try gemma2:2b / qwen2.5:0.5b
(See the install guide: resources/install-guides/ollama.md)

Run (venv active, with `requests` installed, from this folder):
    python chat_local.py
"""

import requests

OLLAMA_URL = "http://localhost:11434/api/chat"   # the LOCAL server Ollama runs
MODEL = "llama3.2"                                # an open model you've pulled (no key!)


def chat(prompt):
    """Send a message to the local model and return its reply."""
    response = requests.post(
        OLLAMA_URL,
        json={
            "model": MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "stream": False,                     # get the whole reply at once
        },
        timeout=120,                             # local models can be slower on a laptop
    )
    response.raise_for_status()
    return response.json()["message"]["content"]


if __name__ == "__main__":
    print(f"Chatting with a LOCAL model ({MODEL}), no key, no internet, free. Type 'quit' to exit.\n")
    while True:
        user = input("You: ")
        if user.strip().lower() in {"quit", "exit"}:
            break
        try:
            print("Local model:", chat(user), "\n")
        except requests.exceptions.ConnectionError:
            print("Couldn't reach Ollama at localhost:11434, is it running? "
                  "Start the Ollama app (or run `ollama serve`), and check `ollama list`.\n")