From 2e2e19f7aee9c08adcd6882ed5df2e6fca712a69 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 00:21:45 -0500 Subject: [PATCH] bench: add libcst visitor benchmarks for multi-file and full pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_benchmark_libcst_multi_file: discover_functions + get_code_optimization_context across 10 real source files - test_benchmark_libcst_pipeline: full discover → extract → replace → merge pipeline on one file --- .../test_benchmark_libcst_multi_file.py | 75 +++++++++++++++++++ .../test_benchmark_libcst_pipeline.py | 56 ++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 tests/benchmarks/test_benchmark_libcst_multi_file.py create mode 100644 tests/benchmarks/test_benchmark_libcst_pipeline.py diff --git a/tests/benchmarks/test_benchmark_libcst_multi_file.py b/tests/benchmarks/test_benchmark_libcst_multi_file.py new file mode 100644 index 000000000..2e04d9f3c --- /dev/null +++ b/tests/benchmarks/test_benchmark_libcst_multi_file.py @@ -0,0 +1,75 @@ +"""Benchmark libcst visitor performance across many files. + +Exercises the visitor-heavy codepaths that benefit from the libcst dispatch +table cache: discover_functions + get_code_optimization_context on multiple +real source files. +""" + +from __future__ import annotations + +from pathlib import Path + +from codeflash.discovery.functions_to_optimize import FunctionToOptimize +from codeflash.languages.python.context.code_context_extractor import get_code_optimization_context +from codeflash.languages.python.support import PythonSupport +from codeflash.models.models import FunctionParent + +# Real source files from the codeflash codebase, chosen for size and visitor diversity. +_CODEFLASH_ROOT = Path(__file__).parent.parent.parent.resolve() / "codeflash" + +_SOURCE_FILES: list[Path] = [ + _CODEFLASH_ROOT / "languages" / "function_optimizer.py", + _CODEFLASH_ROOT / "languages" / "python" / "context" / "code_context_extractor.py", + _CODEFLASH_ROOT / "languages" / "python" / "support.py", + _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_extractor.py", + _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_replacer.py", + _CODEFLASH_ROOT / "code_utils" / "instrument_existing_tests.py", + _CODEFLASH_ROOT / "benchmarking" / "compare.py", + _CODEFLASH_ROOT / "models" / "models.py", + _CODEFLASH_ROOT / "discovery" / "discover_unit_tests.py", + _CODEFLASH_ROOT / "languages" / "base.py", +] + +# For each file, pick one top-level function to extract context for. +# (class, function_name) — class=None means module-level. +_TARGETS: list[tuple[Path, str | None, str]] = [ + (_SOURCE_FILES[0], "FunctionOptimizer", "replace_function_and_helpers_with_optimized_code"), + (_SOURCE_FILES[1], None, "get_code_optimization_context"), + (_SOURCE_FILES[2], "PythonSupport", "discover_functions"), + (_SOURCE_FILES[3], None, "add_global_assignments"), + (_SOURCE_FILES[4], None, "replace_functions_in_file"), + (_SOURCE_FILES[5], None, "inject_profiling_into_existing_test"), + (_SOURCE_FILES[6], None, "compare_branches"), + (_SOURCE_FILES[7], None, "get_comment_prefix"), + (_SOURCE_FILES[8], None, "discover_unit_tests"), + (_SOURCE_FILES[9], None, "convert_parents_to_tuple"), +] + + +def _discover_all() -> None: + """Run discover_functions on all source files.""" + ps = PythonSupport() + for file_path in _SOURCE_FILES: + source = file_path.read_text() + ps.discover_functions(source=source, file_path=file_path) + + +def _extract_all_contexts() -> None: + """Run get_code_optimization_context on every target function.""" + project_root = _CODEFLASH_ROOT.parent + for file_path, class_name, func_name in _TARGETS: + parents = [FunctionParent(name=class_name, type="ClassDef")] if class_name else [] + fto = FunctionToOptimize( + function_name=func_name, file_path=file_path, parents=parents, starting_line=None, ending_line=None + ) + get_code_optimization_context(fto, project_root) + + +def test_benchmark_discover_functions_multi_file(benchmark) -> None: + """Discover functions across 10 source files.""" + benchmark(_discover_all) + + +def test_benchmark_extract_context_multi_file(benchmark) -> None: + """Extract code optimization context for 10 functions across 10 files.""" + benchmark(_extract_all_contexts) diff --git a/tests/benchmarks/test_benchmark_libcst_pipeline.py b/tests/benchmarks/test_benchmark_libcst_pipeline.py new file mode 100644 index 000000000..4361181c5 --- /dev/null +++ b/tests/benchmarks/test_benchmark_libcst_pipeline.py @@ -0,0 +1,56 @@ +"""Benchmark the full libcst-heavy pipeline on a single file. + +Runs discover → extract context → replace functions → add global assignments +in sequence, exercising ~15 distinct visitor/transformer classes in one pass. +""" + +from __future__ import annotations + +from pathlib import Path + +from codeflash.discovery.functions_to_optimize import FunctionToOptimize +from codeflash.languages.python.context.code_context_extractor import get_code_optimization_context +from codeflash.languages.python.static_analysis.code_extractor import add_global_assignments +from codeflash.languages.python.static_analysis.code_replacer import replace_functions_in_file +from codeflash.languages.python.support import PythonSupport + +_CODEFLASH_ROOT = Path(__file__).parent.parent.parent.resolve() / "codeflash" +_PROJECT_ROOT = _CODEFLASH_ROOT.parent + +# Target: a real, non-trivial file with classes and module-level functions. +_TARGET_FILE = _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_extractor.py" +_TARGET_FUNC = "add_global_assignments" + +# A second file to serve as "optimized" source for replace/merge steps. +_SECOND_FILE = _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_replacer.py" + + +def _run_pipeline() -> None: + """Simulate a single-file optimization pass through the full visitor pipeline.""" + source = _TARGET_FILE.read_text() + source2 = _SECOND_FILE.read_text() + + # 1. Discover functions (FunctionVisitor + MetadataWrapper) + ps = PythonSupport() + functions = ps.discover_functions(source=source, file_path=_TARGET_FILE) + + # 2. Extract code optimization context (multiple collectors + dependency resolver) + fto = FunctionToOptimize( + function_name=_TARGET_FUNC, file_path=_TARGET_FILE, parents=[], starting_line=None, ending_line=None + ) + get_code_optimization_context(fto, _PROJECT_ROOT) + + # 3. Replace functions (GlobalFunctionCollector + GlobalFunctionTransformer) + # Use a class method from discovered functions if available, else module-level. + func_names = [_TARGET_FUNC] + replace_functions_in_file( + source_code=source, original_function_names=func_names, optimized_code=source2, preexisting_objects=set() + ) + + # 4. Add global assignments (6 visitors/transformers) + add_global_assignments(source2, source) + + +def test_benchmark_full_pipeline(benchmark) -> None: + """Full discover → extract → replace → merge pipeline on one file.""" + benchmark(_run_pipeline)