"""privacy.py, M14: redact personal data BEFORE it leaves your machine.

A simple, dependency-free privacy guard: strip emails, phone numbers, SSN-like and
card-like numbers out of text before you send it to a (hosted) model. "Don't send what
you don't need to" is privacy-by-design, and you can test this without any model or key.

This is a *first line*, not perfect PII detection (names, addresses, and odd formats slip
through). Pair it with judgment about what data you collect at all.
"""

import re

# Order matters: match the most specific patterns first.
PATTERNS = [
    ("EMAIL", r"[\w.+-]+@[\w-]+\.[\w.-]+"),
    ("SSN",   r"\b\d{3}-\d{2}-\d{4}\b"),
    ("CARD",  r"\b(?:\d[ -]?){13,16}\b"),
    ("PHONE", r"\b(?:\+?\d{1,3}[ -]?)?\(?\d{3}\)?[ -]?\d{3}[ -]?\d{4}\b"),
]


def redact_pii(text):
    """Return (redacted_text, counts), replaces detected PII with [TYPE REDACTED]."""
    counts = {}
    for label, pattern in PATTERNS:
        text, n = re.subn(pattern, f"[{label} REDACTED]", text)
        if n:
            counts[label] = counts.get(label, 0) + n
    return text, counts


if __name__ == "__main__":
    sample = ("Hi, I'm Dana, email dana@example.com, phone (555) 123-4567. "
              "My SSN is 123-45-6789 and card 4111 1111 1111 1111. Please help.")
    clean, counts = redact_pii(sample)
    print("Original:\n ", sample)
    print("\nRedacted (safe to send):\n ", clean)
    print("\nRemoved:", counts)
