Merge pull request #2059 from codeflash-ai/refactor/benchmarks-to-dotcodeflash

Move benchmarks to .codeflash/benchmarks/
This commit is contained in:
Kevin Turcios 2026-04-13 05:06:00 -05:00 committed by GitHub
commit 4d4cb5f517
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 24 additions and 11 deletions

View file

@ -2,7 +2,7 @@ from codeflash.models.models import FunctionTestInvocation, InvocationId, TestRe
from codeflash.verification.parse_test_output import merge_test_results
def generate_test_invocations(count=100):
def generate_test_invocations(count: int = 100) -> tuple[TestResults, TestResults]:
"""Generate a set number of test invocations for benchmarking."""
test_results_xml = TestResults()
test_results_bin = TestResults()
@ -21,7 +21,7 @@ def generate_test_invocations(count=100):
function_getting_tested="sorter",
iteration_id=iteration_id,
),
file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py", # noqa: S108
did_pass=True,
runtime=None if i % 3 == 0 else i * 100, # Vary runtime values
test_framework="unittest",
@ -42,7 +42,7 @@ def generate_test_invocations(count=100):
function_getting_tested="sorter",
iteration_id=iteration_id,
),
file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py", # noqa: S108
did_pass=True,
runtime=500 + i * 20, # Generate varying runtime values
test_framework="unittest",
@ -56,12 +56,12 @@ def generate_test_invocations(count=100):
return test_results_xml, test_results_bin
def run_merge_benchmark(count=100):
def run_merge_benchmark(count: int = 100) -> None:
test_results_xml, test_results_bin = generate_test_invocations(count)
# Perform the merge operation that will be benchmarked
merge_test_results(xml_test_results=test_results_xml, bin_test_results=test_results_bin, test_framework="unittest")
def test_benchmark_merge_test_results(benchmark):
def test_benchmark_merge_test_results(benchmark) -> None:
benchmark(run_merge_benchmark, 1000) # Default to 100 test invocations

View file

@ -16,7 +16,7 @@
"tests/",
"-vv",
"--ignore",
"tests/benchmarks/"
".codeflash/benchmarks/"
],
},
"launch": {

View file

@ -156,7 +156,14 @@ def process_pyproject_config(args: Namespace) -> Namespace:
raise AssertionError("--tests-root must be specified")
assert Path(args.tests_root).is_dir(), f"--tests-root {args.tests_root} must be a valid directory"
if args.benchmark:
assert args.benchmarks_root is not None, "--benchmarks-root must be specified when running with --benchmark"
if args.benchmarks_root is None:
# Auto-discover .codeflash/benchmarks/ convention
candidate = Path.cwd() / ".codeflash" / "benchmarks"
if candidate.is_dir():
args.benchmarks_root = str(candidate)
else:
msg = "--benchmarks-root must be specified when running with --benchmark, or .codeflash/benchmarks/ must exist"
raise AssertionError(msg)
assert Path(args.benchmarks_root).is_dir(), (
f"--benchmarks-root {args.benchmarks_root} must be a valid directory"
)

View file

@ -87,7 +87,13 @@ def run_compare(args: Namespace) -> None:
benchmarks_root_str = pyproject_config.get("benchmarks_root")
if not benchmarks_root_str:
logger.error("benchmarks-root must be configured in [tool.codeflash] to use compare")
# Auto-discover .codeflash/benchmarks/ if it exists
candidate = project_root / ".codeflash" / "benchmarks"
if candidate.is_dir():
benchmarks_root_str = str(candidate)
logger.info(f"Auto-discovered benchmarks at {candidate}")
else:
logger.error("benchmarks-root must be configured in [tool.codeflash] or .codeflash/benchmarks/ must exist")
sys.exit(1)
benchmarks_root = Path(benchmarks_root_str).resolve()

View file

@ -423,7 +423,7 @@ def get_run_tmp_file(file_path: Path | str) -> Path:
file_path = Path(file_path)
if not hasattr(get_run_tmp_file, "tmpdir_path"):
get_run_tmp_file.tmpdir = TemporaryDirectory(prefix="codeflash_")
get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name)
get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name).resolve()
return get_run_tmp_file.tmpdir_path / file_path

View file

@ -354,7 +354,7 @@ __version__ = "{version}"
# All paths are relative to this pyproject.toml's directory.
module-root = "codeflash"
tests-root = "tests"
benchmarks-root = "tests/benchmarks"
benchmarks-root = ".codeflash/benchmarks"
ignore-paths = []
formatter-cmds = [
"uvx ruff check --exit-zero --fix $file",