codeflash-internal/tiles/codeflash-internal-rules/evals/scenario-4/criteria.json

32 lines
1.9 KiB
JSON
Raw Normal View History

{
"context": "Tests whether the agent follows PR review guidelines: commenting only on critical issues (logic errors, security, test typos, breaking changes) and skipping style/formatting/suggestion comments",
"type": "weighted_checklist",
"checklist": [
{
"name": "Identifies security vulnerability",
"description": "Flags the hardcoded Redis password 'codeflash123' in the source code as a security vulnerability. Credentials should not be committed to the codebase.",
"max_score": 25
},
{
"name": "Identifies test name typo",
"description": "Catches the typo in the test docstring 'test optimzier cache hit' (optimzier -> optimizer). While this is a docstring not a method name, the agent should note that test method names with typos won't be discovered by the test runner -- here the method names are correct but this shows awareness of the rule.",
"max_score": 20
},
{
"name": "Identifies breaking change",
"description": "Flags that changing OptimizeRequestSchema.use_cache from no default to a default value (or adding a new required->optional field) could be a breaking change for existing clients, and the test file is in the wrong directory (core/shared/ instead of tests/)",
"max_score": 25
},
{
"name": "Skips style and suggestion comments",
"description": "Does NOT comment on code formatting, import ordering, variable naming, or offer 'consider using X' suggestions. Does not suggest performance improvements without profiling data. Stays within 5-7 comment limit.",
"max_score": 15
},
{
"name": "Identifies logic error with sync Redis in async endpoint",
"description": "Flags that using synchronous redis.Redis calls inside an async def endpoint will block the event loop. Should use async Redis client (aioredis or redis.asyncio).",
"max_score": 15
}
]
}