# SAMPLE GitHub Actions workflow. Copy this to  .github/workflows/evals.yml  in YOUR project
# (not in the course repo). It runs your eval gate on every push and pull request, and the build
# goes red automatically if run_evals.py exits non-zero, which blocks the merge.
#
# This runs the DETERMINISTIC eval set (no API key, fast, free, repeatable). For live model-behaviour
# evals, add a separate scheduled workflow that sets ANTHROPIC_API_KEY from a repo secret and runs a
# small live subset (see notes.md), so your fast gate stays free and your live checks stay honest.

name: evals

on:
  push:
    branches: [main]
  pull_request:

jobs:
  eval-gate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.12"
      # - run: pip install -r requirements.txt   # if your evals need dependencies
      - name: Run the eval gate
        run: python run_evals.py                  # exits non-zero on regression -> build fails -> merge blocked

  # Optional: live evals on a schedule (costs tokens). Uncomment and set the secret to use it.
  # live-evals:
  #   runs-on: ubuntu-latest
  #   if: github.event_name == 'schedule'
  #   steps:
  #     - uses: actions/checkout@v4
  #     - uses: actions/setup-python@v5
  #       with: { python-version: "3.12" }
  #     - run: pip install -r requirements.txt
  #     - run: python run_evals.py --live
  #       env:
  #         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
