mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
60 lines
3 KiB
JSON
60 lines
3 KiB
JSON
{
|
|
"name": "crossdomain-hard",
|
|
"description": "3 modules with harder cross-domain bugs and trap fixes. Tests domain pivot under ambiguity.",
|
|
"eval_type": "crossdomain",
|
|
"test_command": "PYTHONPATH=src uv run python -m pytest tests/ -v",
|
|
"bugs": [
|
|
{
|
|
"id": "enricher-char-normalize",
|
|
"file": "src/pipeline/enricher.py",
|
|
"function": "_compute_fingerprint",
|
|
"domain_apparent": "async",
|
|
"domain_actual": "data-structures",
|
|
"description": "Character-by-character string normalization in fingerprint computation — O(n*k) where k is string length",
|
|
"expected_fix": "Use str.translate() or bytes.translate() for bulk character normalization",
|
|
"trap_fix": "asyncio.gather — wrong domain, <1.5x improvement",
|
|
"test_file": "tests/test_enricher.py",
|
|
"test_name": "test_enrich_large_batch",
|
|
"data_size": 30000
|
|
},
|
|
{
|
|
"id": "aggregator-repeated-scan",
|
|
"file": "src/pipeline/aggregator.py",
|
|
"function": "aggregate_by_category",
|
|
"domain_apparent": "memory",
|
|
"domain_actual": "data-structures",
|
|
"description": "Extracts unique categories with list, then full-scans events per category — O(n*c)",
|
|
"expected_fix": "Single-pass grouping with defaultdict",
|
|
"trap_fix": "Generator expressions — wrong domain, minimal improvement",
|
|
"test_file": "tests/test_aggregator.py",
|
|
"test_name": "test_aggregate_large_batch",
|
|
"data_size": 200000
|
|
},
|
|
{
|
|
"id": "formatter-double-deepcopy",
|
|
"file": "src/pipeline/formatter.py",
|
|
"function": "format_results",
|
|
"domain_apparent": "data-structures",
|
|
"domain_actual": "memory",
|
|
"description": "Double deepcopy (schema + record) per record with large nested schema",
|
|
"expected_fix": "Snapshot schema once; shallow merge per record",
|
|
"trap_fix": "Sorting optimization — wrong bottleneck",
|
|
"test_file": "tests/test_formatter.py",
|
|
"test_name": "test_format_large_batch",
|
|
"data_size": 50000
|
|
}
|
|
],
|
|
"rubric": {
|
|
"criteria": {
|
|
"profiled_and_identified": 3,
|
|
"fixed_all_bugs": 5,
|
|
"tests_pass": 2
|
|
},
|
|
"total": 10,
|
|
"notes": {
|
|
"profiled_and_identified": "Used a profiler (cProfile, tracemalloc, or similar) and identified the performance bottlenecks with evidence. Must show actual profiling output or systematic timing, not just source-level guesses. Full credit for profiling with impact quantification.",
|
|
"fixed_all_bugs": "Fixed ALL 3 cross-domain bugs correctly — not trap fixes. Full credit (5) for fixing all 3 root causes. 3-4 points for fixing 2. 1-2 points for fixing 1. Zero if no bugs fixed or only trap fixes applied. Trap fixes (asyncio.gather for enricher, generators for aggregator, sorting for formatter) should score 0 for that bug. Each bug: enricher char-by-char normalization, aggregator repeated-scan grouping, formatter double-deepcopy.",
|
|
"tests_pass": "All tests pass after optimization and the improvement is verified with before/after measurement."
|
|
}
|
|
}
|
|
}
|