"""curate.py: turn raw feedback logs into the two datasets that improve the agent.

The flywheel: production interactions + feedback become (1) new EVAL cases (M26) so quality is
protected, and (2) FINE-TUNING examples (M15) so the agent learns. Curation is the judgement step:
filter, dedupe, and decide what each signal means.

  thumbs UP            -> a confirmed-good example: lock it in as a golden eval case AND a training example
  thumbs DOWN + fix    -> a regression: an eval case the agent currently FAILS, and a corrected training example
  thumbs DOWN, no fix  -> cannot auto-label; route to a human for review (do not guess)
"""


def to_eval_cases(records):
    """Make eval cases from feedback. Up -> golden; down+correction -> regression. Deduped."""
    cases, seen = [], set()
    for r in records:
        if r.get("feedback") == "up":
            case = {"q": r["question"], "expect": r["answer"], "source": "golden"}
        elif r.get("feedback") == "down" and r.get("correction"):
            case = {"q": r["question"], "expect": r["correction"], "source": "regression"}
        else:
            continue
        key = (case["q"], case["expect"])
        if key not in seen:
            seen.add(key)
            cases.append(case)
    return cases


def needs_review(records):
    """Down-voted with no correction: a real problem we cannot auto-label. A human must triage these."""
    return [r for r in records if r.get("feedback") == "down" and not r.get("correction")]


def to_finetune_examples(records, min_len=10):
    """Chat-format training examples (M15). Learn from good answers and from corrected ones; dedupe by question."""
    examples, seen = [], set()
    for r in records:
        if r.get("feedback") == "up":
            target = r.get("answer")
        elif r.get("feedback") == "down" and r.get("correction"):
            target = r.get("correction")
        else:
            continue
        question = r.get("question", "")
        if not target or len(target) < min_len or question in seen:
            continue
        seen.add(question)
        examples.append({"messages": [{"role": "user", "content": question},
                                      {"role": "assistant", "content": target}]})
    return examples
