codeflash-agent/evals/baseline-scores.json

{
  "version": 2,
  "updated": "2026-03-27",
  "note": "Deterministic auto-scoring for profiler usage, iterative profiling, and ranked list reduces LLM variance. Thresholds tightened from expected-3 to expected-2.",
  "evals": {
    "ranking":             { "expected": 9, "min": 7, "max": 10 },
    "memory-hard":         { "expected": 9, "min": 7, "max": 10 },
    "memory-misdirection": { "expected": 9, "min": 7, "max": 10 }
  }
}