mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
117 lines
3.9 KiB
JSON
117 lines
3.9 KiB
JSON
{
|
||
"name": "ranking-hard",
|
||
"description": "10-function pipeline with 3 hot bottlenecks (~98%) and 7 cold antipattern traps. Tests experiment efficiency at scale.",
|
||
"eval_type": "ranking-hard",
|
||
"test_command": "PYTHONPATH=src uv run python -m pytest tests/ -v",
|
||
"bugs": [
|
||
{
|
||
"id": "score-on2",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "score_by_category",
|
||
"domain": "data-structures",
|
||
"description": "O(n²) nested loop counting category peers for each record",
|
||
"expected_fix": "Pre-compute category counts with Counter/defaultdict",
|
||
"impact_pct": 31
|
||
},
|
||
{
|
||
"id": "rank-insertion-sort",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "rank_results",
|
||
"domain": "data-structures",
|
||
"description": "O(n²) insertion sort with custom comparator",
|
||
"expected_fix": "Use sorted() with key function",
|
||
"impact_pct": 31
|
||
},
|
||
{
|
||
"id": "summary-cross-category",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "generate_summary",
|
||
"domain": "data-structures",
|
||
"description": "O(c² × n) cross-category source overlap with nested list scans per category pair",
|
||
"expected_fix": "Pre-build source sets per category, use set intersection",
|
||
"impact_pct": 36
|
||
},
|
||
{
|
||
"id": "enrich-deepcopy",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "enrich_metadata",
|
||
"domain": "data-structures",
|
||
"description": "copy.deepcopy(config) per record",
|
||
"expected_fix": "Extract defaults once before loop",
|
||
"impact_pct": 0.9,
|
||
"cold_trap": true
|
||
},
|
||
{
|
||
"id": "format-json-roundtrip",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "format_output",
|
||
"domain": "data-structures",
|
||
"description": "Double JSON serialization per record for integrity check",
|
||
"expected_fix": "Single serialization or remove round-trip",
|
||
"impact_pct": 0.4,
|
||
"cold_trap": true
|
||
},
|
||
{
|
||
"id": "normalize-string-concat",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "normalize_fields",
|
||
"domain": "data-structures",
|
||
"description": "Character-by-character string concatenation in loop",
|
||
"expected_fix": "Use split/join or regex",
|
||
"impact_pct": 0.2,
|
||
"cold_trap": true
|
||
},
|
||
{
|
||
"id": "dedup-list-scan",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "deduplicate",
|
||
"domain": "data-structures",
|
||
"description": "List-based ID dedup with O(n) scan per record",
|
||
"expected_fix": "Use set for seen IDs",
|
||
"impact_pct": 0.2,
|
||
"cold_trap": true
|
||
},
|
||
{
|
||
"id": "parse-regex-compile",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "parse_records",
|
||
"domain": "data-structures",
|
||
"description": "re.compile() called per field instead of once",
|
||
"expected_fix": "Compile regex once outside the loop",
|
||
"impact_pct": 0.1,
|
||
"cold_trap": true
|
||
},
|
||
{
|
||
"id": "validate-list-blocklist",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "validate_records",
|
||
"domain": "data-structures",
|
||
"description": "List-based blocklist and required fields checks",
|
||
"expected_fix": "Convert to sets",
|
||
"impact_pct": 0.05,
|
||
"cold_trap": true
|
||
},
|
||
{
|
||
"id": "filter-list-tags",
|
||
"file": "src/analytics/pipeline.py",
|
||
"function": "apply_filters",
|
||
"domain": "data-structures",
|
||
"description": "Nested loop checking tags against blocklist",
|
||
"expected_fix": "Use set intersection",
|
||
"impact_pct": 0.03,
|
||
"cold_trap": true
|
||
}
|
||
],
|
||
"test_file": "tests/test_pipeline.py",
|
||
"test_name": "test_large_batch",
|
||
"data_size": 5000,
|
||
"rubric": {
|
||
"criteria": {
|
||
"built_ranked_list_with_impact_pct": 3,
|
||
"fixed_top_3_hot_functions": 3,
|
||
"skipped_cold_traps": 2,
|
||
"reprofiled_after_fixes": 2
|
||
},
|
||
"total": 10
|
||
}
|
||
}
|