codeflash-agent/evals/baseline-scores.json
2026-04-03 17:36:50 -05:00

63 lines
2.1 KiB
JSON

{
"version": 3,
"updated": "2026-03-27",
"note": "v3: per-criterion baselines for pinpointed regression detection",
"evals": {
"ranking": {
"expected": 10,
"min": 8,
"max": 11,
"criteria": {
"profiled_and_identified": { "expected": 3, "min": 2 },
"fixed_all_actionable_targets": { "expected": 5, "min": 3 },
"tests_pass": { "expected": 2, "min": 2 },
"ran_adversarial_review": { "expected": 1, "min": 0 }
}
},
"memory-hard": {
"expected": 9,
"min": 7,
"max": 10,
"criteria": {
"used_memory_profiler": { "expected": 2, "min": 2 },
"profiled_per_stage": { "expected": 2, "min": 1 },
"identified_dominant_allocator": { "expected": 3, "min": 2 },
"fixed_dominant_issue": { "expected": 2, "min": 1 },
"fixed_secondary_issues": { "expected": 1, "min": 0 }
}
},
"memory-misdirection": {
"expected": 9,
"min": 7,
"max": 10,
"criteria": {
"used_memory_profiler": { "expected": 1, "min": 1 },
"profiled_iteratively": { "expected": 2, "min": 1 },
"identified_analytics_as_major": { "expected": 2, "min": 1 },
"fixed_analytics_details": { "expected": 2, "min": 1 },
"fixed_other_issues": { "expected": 2, "min": 1 },
"tests_pass": { "expected": 1, "min": 1 }
}
},
"crossdomain-easy": {
"expected": 7,
"min": 5,
"max": 10,
"criteria": {
"profiled_and_identified": { "expected": 0, "min": 0 },
"fixed_all_bugs": { "expected": 5, "min": 3 },
"tests_pass": { "expected": 2, "min": 2 }
}
},
"crossdomain-hard": {
"expected": 7,
"min": 5,
"max": 10,
"criteria": {
"profiled_and_identified": { "expected": 0, "min": 0 },
"fixed_all_bugs": { "expected": 5, "min": 3 },
"tests_pass": { "expected": 2, "min": 2 }
}
}
}
}