mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
57 lines
2.5 KiB
JSON
57 lines
2.5 KiB
JSON
{
|
|
"name": "crossdomain-easy",
|
|
"description": "3 modules with cross-domain performance bugs. Tests domain pivot recognition.",
|
|
"eval_type": "crossdomain",
|
|
"test_command": "PYTHONPATH=src uv run python -m pytest tests/ -v",
|
|
"bugs": [
|
|
{
|
|
"id": "analyzer-on2",
|
|
"file": "src/log_analyzer/analyzer.py",
|
|
"function": "analyze_logs",
|
|
"domain_apparent": "memory",
|
|
"domain_actual": "data-structures",
|
|
"description": "O(n²) nested loop for anomaly detection — scans all entries for each entry",
|
|
"expected_fix": "Group by source first (dict/set), then check cross-level conflicts in O(n)",
|
|
"test_file": "tests/test_analyzer.py",
|
|
"test_name": "test_analyze_large_batch",
|
|
"data_size": 10000
|
|
},
|
|
{
|
|
"id": "batch-list-as-set",
|
|
"file": "src/log_analyzer/batch.py",
|
|
"function": "_deduplicate",
|
|
"domain_apparent": "async",
|
|
"domain_actual": "data-structures",
|
|
"description": "Uses list for seen-IDs with O(n) `in` check per item — O(n²) total",
|
|
"expected_fix": "Use set for seen IDs",
|
|
"test_file": "tests/test_batch.py",
|
|
"test_name": "test_batch_large",
|
|
"data_size": 50000
|
|
},
|
|
{
|
|
"id": "streamer-deepcopy-roundtrip",
|
|
"file": "src/log_analyzer/streamer.py",
|
|
"function": "stream_results",
|
|
"domain_apparent": "data-structures",
|
|
"domain_actual": "memory",
|
|
"description": "deepcopy of large nested template per record + redundant JSON round-trip",
|
|
"expected_fix": "Shallow copy or snapshot template once; remove double serialization",
|
|
"test_file": "tests/test_streamer.py",
|
|
"test_name": "test_stream_large_batch",
|
|
"data_size": 50000
|
|
}
|
|
],
|
|
"rubric": {
|
|
"criteria": {
|
|
"profiled_and_identified": 3,
|
|
"fixed_all_bugs": 5,
|
|
"tests_pass": 2
|
|
},
|
|
"total": 10,
|
|
"notes": {
|
|
"profiled_and_identified": "Used a profiler (cProfile, tracemalloc, or similar) and identified the performance bottlenecks with evidence. Must show actual profiling output or systematic timing, not just source-level guesses. Full credit for profiling with impact quantification.",
|
|
"fixed_all_bugs": "Fixed ALL 3 cross-domain bugs correctly. Full credit (5) for fixing all 3. 3-4 points for fixing 2. 1-2 points for fixing 1. Zero if no bugs fixed. Each bug: analyzer O(n²), batch list-as-set, streamer deepcopy.",
|
|
"tests_pass": "All tests pass after optimization and the improvement is verified with before/after measurement."
|
|
}
|
|
}
|
|
}
|