codeflash-internal/tiles/codeflash-internal-docs/evals/scenario-3/criteria.json
2026-02-14 22:25:30 -05:00

26 lines
1.4 KiB
JSON

{
"context": "Tests whether the agent understands the markdown code block format with file path annotations, AST-based deduplication, equality checking, and code validation from the context extraction and postprocessing docs.",
"type": "weighted_checklist",
"checklist": [
{
"name": "File-path-annotated code block parsing",
"description": "extract_code_blocks correctly parses the ```python:path/to/file.py format, extracting both the file path and language from the annotation. Handles both annotated and plain code blocks.",
"max_score": 30
},
{
"name": "AST-based deduplication",
"description": "deduplicate_candidates uses ast.parse() and ast.dump() to normalize code before comparison, rather than string equality. This ensures whitespace/comment differences are ignored.",
"max_score": 30
},
{
"name": "Equality check against original",
"description": "deduplicate_candidates filters out candidates whose AST is identical to the original_code's AST, ensuring only genuinely different optimizations are returned.",
"max_score": 20
},
{
"name": "Graceful SyntaxError handling",
"description": "Both deduplicate_candidates and validate_python_code handle SyntaxError from ast.parse() gracefully. deduplicate_candidates keeps unparseable candidates rather than discarding them.",
"max_score": 20
}
]
}