60 lines
2.8 KiB
JSON
60 lines
2.8 KiB
JSON
{
|
||
"name": "memory-balanced",
|
||
"description": "Order pipeline with 3 memory issues of similar weight (~30% each). No single dominant allocator — tests iterative profiling and fix-then-reprofile discipline.",
|
||
"eval_type": "memory-balanced",
|
||
"test_command": "PYTHONPATH=src uv run python -m pytest tests/ -v",
|
||
"bugs": [
|
||
{
|
||
"id": "validation-audit-context",
|
||
"file": "src/orders/core.py",
|
||
"function": "validate_orders",
|
||
"description": "Stores a _audit dict on every order with a formatted order_repr string and checks dict. 50K orders × ~600B = ~36MB. Never read after validation.",
|
||
"expected_fix": "Don't store audit context on orders, or store only a pass/fail boolean",
|
||
"is_dominant": false,
|
||
"peak_contribution_pct": 30
|
||
},
|
||
{
|
||
"id": "pricing-receipt-strings",
|
||
"file": "src/orders/core.py",
|
||
"function": "compute_pricing",
|
||
"description": "Stores a _receipt formatted string on every order for invoice generation. 50K strings + __dict__ resize overhead from adding new attributes post-init = ~47MB.",
|
||
"expected_fix": "Don't store receipt string on each order. Compute final_price directly without intermediate storage. Use __slots__ to avoid dict resize.",
|
||
"is_dominant": false,
|
||
"peak_contribution_pct": 40
|
||
},
|
||
{
|
||
"id": "fulfillment-label-materialization",
|
||
"file": "src/orders/core.py",
|
||
"function": "build_fulfillment_plan",
|
||
"description": "Pre-generates formatted shipping label strings with json.dumps(metadata) for each order. 50K labels × ~700B = ~35MB. Summary only needs counts, not labels.",
|
||
"expected_fix": "Don't materialize label strings. Store only order_id, warehouse, priority in plan entries.",
|
||
"is_dominant": false,
|
||
"peak_contribution_pct": 30
|
||
}
|
||
],
|
||
"test_file": "tests/test_orders.py",
|
||
"test_name": "test_large_batch",
|
||
"data_size": 50000,
|
||
"memory_profile": {
|
||
"peak_rss_mb": 168,
|
||
"overhead_mb": 118,
|
||
"target_peak_mb": 70
|
||
},
|
||
"rubric": {
|
||
"criteria": {
|
||
"used_memory_profiler": 1,
|
||
"profiled_iteratively": 3,
|
||
"identified_all_three": 3,
|
||
"fixed_issues": 2,
|
||
"tests_pass": 1
|
||
},
|
||
"total": 10,
|
||
"notes": {
|
||
"used_memory_profiler": "Used tracemalloc, memray, or similar — not just source reading",
|
||
"profiled_iteratively": "Re-profiled after each fix to find the next contributor. 3pts for fix→profile→fix→profile cycle. 1pt for fixing all from one profile without re-profiling.",
|
||
"identified_all_three": "1pt per issue correctly identified: validation audit context, pricing receipt strings, fulfillment label materialization",
|
||
"fixed_issues": "1pt per 2 issues fixed with measurable memory reduction (max 2pts for all 3 fixed)",
|
||
"tests_pass": "All tests pass after changes"
|
||
}
|
||
}
|
||
}
|