codeflash-internal/tiles/codeflash-internal-rules/evals/scenario-1/criteria.json
2026-02-14 22:25:30 -05:00

31 lines
1.7 KiB
JSON

{
"context": "Tests whether the agent correctly implements a new language handler following the registry pattern, protocol system, code style conventions, and monorepo directory structure",
"type": "weighted_checklist",
"checklist": [
{
"name": "Registry and protocol integration",
"description": "Handler class uses @register_handler('ruby') decorator, sets supports_optimizer=True and other supports_* to False, implements OptimizerProtocol with async optimizer_optimize method, and is imported in core/languages/__init__.py",
"max_score": 30
},
{
"name": "Lazy import dispatch in router",
"description": "optimizer_router.py dispatches 'ruby' language using a lazy import inside the endpoint body with # noqa: PLC0415 comment, following the existing pattern for js_ts",
"max_score": 20
},
{
"name": "Async and schema conventions",
"description": "Endpoint method is async def, uses ninja.Schema or Pydantic BaseModel for types, calls LLM through aiservice/llm.py (not provider APIs directly), and prompt is a .md file rendered with Jinja2",
"max_score": 25
},
{
"name": "Correct directory placement",
"description": "Handler is at core/languages/ruby/optimizer/optimizer.py, prompt .md file is alongside the module, test is at tests/optimizer/test_ruby_optimizer.py, and all __init__.py files are created",
"max_score": 15
},
{
"name": "Test conventions",
"description": "Test file uses @pytest.mark.asyncio for async tests, follows the feature-based test organization, and optionally uses test factories like create_optimizer_context()",
"max_score": 10
}
]
}