codeflash-internal/tiles/codeflash-internal-rules/evals/scenario-5/criteria.json
2026-02-14 22:25:30 -05:00

26 lines
1.5 KiB
JSON

{
"context": "Tests whether the agent follows optimization postprocessing patterns (AST dedup, no-op checks, model distribution), code style conventions (libcst for transforms, ast for reads, LLM via llm.py, Jinja2 prompts), and test conventions",
"type": "weighted_checklist",
"checklist": [
{
"name": "AST deduplication and no-op detection",
"description": "Postprocessor deduplicates candidates by normalizing JS/TS code to an AST representation and comparing dumps. Detects no-ops by comparing optimized code to original. Follows the pattern from the Python postprocessor.",
"max_score": 30
},
{
"name": "libcst vs ast usage",
"description": "Uses libcst for any code transformations that modify source (preserves formatting). Uses ast module only for read-only analysis (parsing, dumping for comparison). Does not use ast for code modification.",
"max_score": 25
},
{
"name": "LLM and prompt conventions",
"description": "LLM calls go through aiservice/llm.py (not direct provider API calls). Prompt is stored as a .md file alongside the module and rendered with Jinja2. Model distribution follows claude_calls = (total - 1) // 2 formula.",
"max_score": 25
},
{
"name": "Test structure and async conventions",
"description": "Tests are in tests/optimizer/ (feature-based organization), use @pytest.mark.asyncio for async tests, and test both deduplication and no-op detection scenarios",
"max_score": 20
}
]
}