codeflash-agent/evals/score-eval.sh

15 lines
436 B
Bash
Raw Normal View History

2026-03-24 21:14:04 +00:00
#!/bin/bash
set -euo pipefail
# Automated eval scorer
# Usage: ./score-eval.sh <results-dir>
#
# Reads the manifest rubric, session JSONL, and test output to
# automatically score each variant. No interactive input needed.
EVAL_DIR="$(cd "$(dirname "$0")" && pwd)"
RESULTS_DIR="${1:?Usage: $0 <results-dir>}"
[ -d "$RESULTS_DIR" ] || { echo "ERROR: $RESULTS_DIR not found"; exit 1; }
exec python3 "$EVAL_DIR/score.py" "$RESULTS_DIR"