codeflash-internal/tiles/codeflash-internal-skills/evals/scenario-1/criteria.json
2026-02-14 22:25:30 -05:00

26 lines
1.2 KiB
JSON

{
"context": "Tests ability to diagnose a router dispatch failure caused by an unsupported language and identify the correct fix path",
"type": "weighted_checklist",
"checklist": [
{
"name": "Identifies router dispatch stage",
"description": "Correctly identifies that the failure occurs at the router dispatch stage (Step 2) in core/shared/optimizer_router.py, not at request validation or LLM calls",
"max_score": 30
},
{
"name": "Explains unsupported language cause",
"description": "Explains that 'rust' is not among the supported languages (python, javascript, typescript, java) and there is no dispatch branch for it",
"max_score": 25
},
{
"name": "References correct files",
"description": "References both core/shared/optimizer_router.py (where dispatch happens) and core/shared/optimizer_models.py (where validation could be added)",
"max_score": 20
},
{
"name": "Proposes valid fix",
"description": "Proposes either adding language validation to the schema to reject unsupported languages at request time, or implementing Rust support via the add-language-support workflow",
"max_score": 25
}
]
}