"""feedback_log.py: capture what really happened in production, safely.

A deployed agent produces the most valuable data there is: real questions, the answers it gave, the
sources it used, and signals about whether each answer was good (a thumbs up/down, or a human
correction). This logs each interaction to a JSONL file. PII is redacted BEFORE anything is stored,
because feedback data is data you are now keeping (the M14 privacy rules apply).
"""

import os
import re
import json

_EMAIL = re.compile(r"[\w.+-]+@[\w.-]+\.[a-z]{2,}", re.I)
_PHONE = re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b")


def redact_pii(text):
    """Strip obvious personal data (emails, phone numbers) before storing. Extend for your domain."""
    if not text:
        return text
    return _PHONE.sub("[phone]", _EMAIL.sub("[email]", text))


def log_interaction(record, path):
    """Append one interaction (with PII redacted) to the JSONL log. Returns the stored record.

    A record looks like:
      {"id": "...", "question": "...", "answer": "...", "sources": ["D1"],
       "feedback": "up" | "down" | None, "correction": "..." | None}
    """
    rec = dict(record)
    for field in ("question", "answer", "correction"):
        if rec.get(field):
            rec[field] = redact_pii(rec[field])
    with open(path, "a") as fh:
        fh.write(json.dumps(rec) + "\n")
    return rec


def load(path):
    if not os.path.exists(path):
        return []
    return [json.loads(line) for line in open(path) if line.strip()]