codeflash/tiles/codeflash-docs/evals/summary.json
Kevin Turcios 869fbe1766 chore: add eval scenarios for codeflash-docs tile
5 scenarios testing: code serialization format, candidate lifecycle/DAG,
deterministic patches, effort levels/selection criteria, and function
representation/concurrency model.
2026-02-14 21:29:22 -05:00

40 lines
1,008 B
JSON

{
"total_scenarios": 5,
"capabilities_coverage": {
"total_capabilities": 16,
"capabilities_tested": 12,
"coverage_percentage": 75.0
},
"complexity_distribution": {
"basic": 1,
"intermediate": 3,
"advanced": 1
},
"scenarios": [
{
"index": 1,
"capability": "code-strings-markdown-format, read-writable-vs-read-only",
"complexity": "intermediate"
},
{
"index": 2,
"capability": "candidate-source-types, candidate-forest-dag, repair-request-structure",
"complexity": "intermediate"
},
{
"index": 3,
"capability": "deterministic-patch-values, plugin-blocklists",
"complexity": "advanced"
},
{
"index": 4,
"capability": "effort-level-values, best-candidate-selection",
"complexity": "intermediate"
},
{
"index": 5,
"capability": "function-to-optimize-fields, concurrent-testgen-optimization, pipeline-stage-ordering",
"complexity": "basic"
}
]
}