#!/bin/bash set -euo pipefail # Automated eval scorer # Usage: ./score-eval.sh # # Reads the manifest rubric, session JSONL, and test output to # automatically score each variant. No interactive input needed. EVAL_DIR="$(cd "$(dirname "$0")" && pwd)" RESULTS_DIR="${1:?Usage: $0 }" [ -d "$RESULTS_DIR" ] || { echo "ERROR: $RESULTS_DIR not found"; exit 1; } exec python3 "$EVAL_DIR/score.py" "$RESULTS_DIR"