5 scenarios testing: sequential debugging, Result type + effort config, test patterns, domain type conventions, and deduplication/repair mechanics. Also adds tessl-labs/tessl-skill-eval-scenarios dev dependency.
26 lines
1.1 KiB
JSON
26 lines
1.1 KiB
JSON
{
|
|
"context": "Tests whether the agent follows codeflash test conventions when writing tests, including path handling, temp directory patterns, and awareness of the deterministic patching system.",
|
|
"type": "weighted_checklist",
|
|
"checklist": [
|
|
{
|
|
"name": "Uses tmp_path fixture",
|
|
"description": "Test function uses pytest tmp_path fixture parameter, NOT tempfile.NamedTemporaryFile or tempfile.mkdtemp",
|
|
"max_score": 25
|
|
},
|
|
{
|
|
"name": "Calls resolve on paths",
|
|
"description": "Calls .resolve() on Path objects before using them in assertions or function calls",
|
|
"max_score": 25
|
|
},
|
|
{
|
|
"name": "Full string equality",
|
|
"description": "Uses exact equality assertions (== or assert_equal) for code string comparisons, NOT substring checks like 'in' or assertIn or contains",
|
|
"max_score": 25
|
|
},
|
|
{
|
|
"name": "No real time dependency",
|
|
"description": "Test does NOT depend on real time.time(), datetime.now(), random values, or uuid generation for correctness. Acknowledges or accounts for deterministic patches if time/random values are involved.",
|
|
"max_score": 25
|
|
}
|
|
]
|
|
}
|