codeflash-internal/tiles/codeflash-internal-rules/evals/scenario-5/criteria.json

{
  "context": "Tests whether the agent follows optimization postprocessing patterns (AST dedup, no-op checks, model distribution), code style conventions (libcst for transforms, ast for reads, LLM via llm.py, Jinja2 prompts), and test conventions",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "AST deduplication and no-op detection",
      "description": "Postprocessor deduplicates candidates by normalizing JS/TS code to an AST representation and comparing dumps. Detects no-ops by comparing optimized code to original. Follows the pattern from the Python postprocessor.",
      "max_score": 30
    },
    {
      "name": "libcst vs ast usage",
      "description": "Uses libcst for any code transformations that modify source (preserves formatting). Uses ast module only for read-only analysis (parsing, dumping for comparison). Does not use ast for code modification.",
      "max_score": 25
    },
    {
      "name": "LLM and prompt conventions",
      "description": "LLM calls go through aiservice/llm.py (not direct provider API calls). Prompt is stored as a .md file alongside the module and rendered with Jinja2. Model distribution follows claude_calls = (total - 1) // 2 formula.",
      "max_score": 25
    },
    {
      "name": "Test structure and async conventions",
      "description": "Tests are in tests/optimizer/ (feature-based organization), use @pytest.mark.asyncio for async tests, and test both deduplication and no-op detection scenarios",
      "max_score": 20
    }
  ]
}