bench: add libcst visitor benchmarks for multi-file and full pipeline

- test_benchmark_libcst_multi_file: discover_functions + get_code_optimization_context across 10 real source files
- test_benchmark_libcst_pipeline: full discover → extract → replace → merge pipeline on one file
This commit is contained in:
Kevin Turcios 2026-04-10 00:21:45 -05:00
parent 1a25f05e14
commit 2e2e19f7ae
2 changed files with 131 additions and 0 deletions

View file

@ -0,0 +1,75 @@
"""Benchmark libcst visitor performance across many files.
Exercises the visitor-heavy codepaths that benefit from the libcst dispatch
table cache: discover_functions + get_code_optimization_context on multiple
real source files.
"""
from __future__ import annotations
from pathlib import Path
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.languages.python.context.code_context_extractor import get_code_optimization_context
from codeflash.languages.python.support import PythonSupport
from codeflash.models.models import FunctionParent
# Real source files from the codeflash codebase, chosen for size and visitor diversity.
_CODEFLASH_ROOT = Path(__file__).parent.parent.parent.resolve() / "codeflash"
_SOURCE_FILES: list[Path] = [
_CODEFLASH_ROOT / "languages" / "function_optimizer.py",
_CODEFLASH_ROOT / "languages" / "python" / "context" / "code_context_extractor.py",
_CODEFLASH_ROOT / "languages" / "python" / "support.py",
_CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_extractor.py",
_CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_replacer.py",
_CODEFLASH_ROOT / "code_utils" / "instrument_existing_tests.py",
_CODEFLASH_ROOT / "benchmarking" / "compare.py",
_CODEFLASH_ROOT / "models" / "models.py",
_CODEFLASH_ROOT / "discovery" / "discover_unit_tests.py",
_CODEFLASH_ROOT / "languages" / "base.py",
]
# For each file, pick one top-level function to extract context for.
# (class, function_name) — class=None means module-level.
_TARGETS: list[tuple[Path, str | None, str]] = [
(_SOURCE_FILES[0], "FunctionOptimizer", "replace_function_and_helpers_with_optimized_code"),
(_SOURCE_FILES[1], None, "get_code_optimization_context"),
(_SOURCE_FILES[2], "PythonSupport", "discover_functions"),
(_SOURCE_FILES[3], None, "add_global_assignments"),
(_SOURCE_FILES[4], None, "replace_functions_in_file"),
(_SOURCE_FILES[5], None, "inject_profiling_into_existing_test"),
(_SOURCE_FILES[6], None, "compare_branches"),
(_SOURCE_FILES[7], None, "get_comment_prefix"),
(_SOURCE_FILES[8], None, "discover_unit_tests"),
(_SOURCE_FILES[9], None, "convert_parents_to_tuple"),
]
def _discover_all() -> None:
"""Run discover_functions on all source files."""
ps = PythonSupport()
for file_path in _SOURCE_FILES:
source = file_path.read_text()
ps.discover_functions(source=source, file_path=file_path)
def _extract_all_contexts() -> None:
"""Run get_code_optimization_context on every target function."""
project_root = _CODEFLASH_ROOT.parent
for file_path, class_name, func_name in _TARGETS:
parents = [FunctionParent(name=class_name, type="ClassDef")] if class_name else []
fto = FunctionToOptimize(
function_name=func_name, file_path=file_path, parents=parents, starting_line=None, ending_line=None
)
get_code_optimization_context(fto, project_root)
def test_benchmark_discover_functions_multi_file(benchmark) -> None:
"""Discover functions across 10 source files."""
benchmark(_discover_all)
def test_benchmark_extract_context_multi_file(benchmark) -> None:
"""Extract code optimization context for 10 functions across 10 files."""
benchmark(_extract_all_contexts)

View file

@ -0,0 +1,56 @@
"""Benchmark the full libcst-heavy pipeline on a single file.
Runs discover extract context replace functions add global assignments
in sequence, exercising ~15 distinct visitor/transformer classes in one pass.
"""
from __future__ import annotations
from pathlib import Path
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.languages.python.context.code_context_extractor import get_code_optimization_context
from codeflash.languages.python.static_analysis.code_extractor import add_global_assignments
from codeflash.languages.python.static_analysis.code_replacer import replace_functions_in_file
from codeflash.languages.python.support import PythonSupport
_CODEFLASH_ROOT = Path(__file__).parent.parent.parent.resolve() / "codeflash"
_PROJECT_ROOT = _CODEFLASH_ROOT.parent
# Target: a real, non-trivial file with classes and module-level functions.
_TARGET_FILE = _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_extractor.py"
_TARGET_FUNC = "add_global_assignments"
# A second file to serve as "optimized" source for replace/merge steps.
_SECOND_FILE = _CODEFLASH_ROOT / "languages" / "python" / "static_analysis" / "code_replacer.py"
def _run_pipeline() -> None:
"""Simulate a single-file optimization pass through the full visitor pipeline."""
source = _TARGET_FILE.read_text()
source2 = _SECOND_FILE.read_text()
# 1. Discover functions (FunctionVisitor + MetadataWrapper)
ps = PythonSupport()
functions = ps.discover_functions(source=source, file_path=_TARGET_FILE)
# 2. Extract code optimization context (multiple collectors + dependency resolver)
fto = FunctionToOptimize(
function_name=_TARGET_FUNC, file_path=_TARGET_FILE, parents=[], starting_line=None, ending_line=None
)
get_code_optimization_context(fto, _PROJECT_ROOT)
# 3. Replace functions (GlobalFunctionCollector + GlobalFunctionTransformer)
# Use a class method from discovered functions if available, else module-level.
func_names = [_TARGET_FUNC]
replace_functions_in_file(
source_code=source, original_function_names=func_names, optimized_code=source2, preexisting_objects=set()
)
# 4. Add global assignments (6 visitors/transformers)
add_global_assignments(source2, source)
def test_benchmark_full_pipeline(benchmark) -> None:
"""Full discover → extract → replace → merge pipeline on one file."""
benchmark(_run_pipeline)