5 scenarios testing: code serialization format, candidate lifecycle/DAG, deterministic patches, effort levels/selection criteria, and function representation/concurrency model.
31 lines
1.2 KiB
JSON
31 lines
1.2 KiB
JSON
{
|
|
"context": "Tests whether the agent knows the specific deterministic patch values used in codeflash's pytest plugin and the subprocess-based test execution architecture.",
|
|
"type": "weighted_checklist",
|
|
"checklist": [
|
|
{
|
|
"name": "Subprocess isolation",
|
|
"description": "States that tests run in a subprocess to isolate the test environment from the main codeflash process, NOT in the same process",
|
|
"max_score": 20
|
|
},
|
|
{
|
|
"name": "Fixed time value",
|
|
"description": "References the specific fixed timestamp 1761717605.108106 for time.time() or the fixed datetime 2021-01-01 02:05:10 UTC for datetime.now()",
|
|
"max_score": 20
|
|
},
|
|
{
|
|
"name": "Fixed UUID value",
|
|
"description": "References the specific fixed UUID 12345678-1234-5678-9abc-123456789012 for uuid4/uuid1",
|
|
"max_score": 20
|
|
},
|
|
{
|
|
"name": "Random seed",
|
|
"description": "States that random is seeded with 42 (NOT a different seed value)",
|
|
"max_score": 20
|
|
},
|
|
{
|
|
"name": "Plugin blocklists",
|
|
"description": "Mentions that behavioral tests block specific pytest plugins (at least 2 of: benchmark, codspeed, xdist, sugar) to ensure deterministic execution",
|
|
"max_score": 20
|
|
}
|
|
]
|
|
}
|