candidates whose behavioral runtime exceeds 10x the baseline will skip benchmarking

This commit is contained in:
Sarthak Agarwal 2026-03-06 23:30:37 +05:30
parent 5d872e845d
commit 36c50c6136
4 changed files with 212 additions and 0 deletions

View file

@ -38,6 +38,9 @@ TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in incre
# setting this value to 1 will disable repair if there is at least one correct candidate
MIN_CORRECT_CANDIDATES = 2
# Skip benchmarking if candidate behavioral runtime exceeds this multiple of baseline behavioral runtime
BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD = 10.0
try:
from codeflash.lsp.helpers import is_LSP_enabled

View file

@ -47,6 +47,7 @@ from codeflash.code_utils.code_utils import (
unified_diff_strings,
)
from codeflash.code_utils.config_consts import (
BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD,
COVERAGE_THRESHOLD,
INDIVIDUAL_TESTCASE_TIMEOUT,
MIN_CORRECT_CANDIDATES,
@ -2529,6 +2530,25 @@ class FunctionOptimizer:
)
return self.get_results_not_matched_error()
candidate_behavior_runtime = candidate_behavior_results.total_passed_runtime()
baseline_behavior_runtime = baseline_results.behavior_test_results.total_passed_runtime()
if (
baseline_behavior_runtime > 0
and candidate_behavior_runtime > 0
and candidate_behavior_runtime > baseline_behavior_runtime * BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD
):
slowdown = candidate_behavior_runtime / baseline_behavior_runtime
logger.info(
"h4|Candidate %d is %.1fx slower than baseline in behavioral tests, skipping benchmarking ⏭️",
optimization_candidate_index,
slowdown,
)
console.rule()
return Failure(
f"Candidate behavioral runtime ({candidate_behavior_runtime}ns) "
f"exceeds {BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD:.0f}x baseline ({baseline_behavior_runtime}ns)."
)
logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...")
console.rule()

View file

View file

@ -0,0 +1,189 @@
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
from codeflash.code_utils.config_consts import BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD
from codeflash.either import Failure, is_successful
from codeflash.models.models import FunctionTestInvocation, InvocationId, OriginalCodeBaseline, TestResults
from codeflash.models.test_type import TestType
MOCK_FILE_PATH = Path("tests/test_example.py")
def _make_test_results(runtime_ns: int) -> TestResults:
results = TestResults()
invocation = FunctionTestInvocation(
loop_index=1,
id=InvocationId(
test_module_path="tests.test_example",
test_class_name=None,
test_function_name="test_func",
function_getting_tested="func",
iteration_id="0",
),
file_name=MOCK_FILE_PATH,
did_pass=True,
runtime=runtime_ns,
test_framework="pytest",
test_type=TestType.EXISTING_UNIT_TEST,
return_value=None,
timed_out=False,
)
results.add(invocation)
return results
def _make_baseline(behavior_runtime_ns: int) -> OriginalCodeBaseline:
return OriginalCodeBaseline(
behavior_test_results=_make_test_results(behavior_runtime_ns),
benchmarking_test_results=TestResults(),
line_profile_results={},
runtime=behavior_runtime_ns,
coverage_results=None,
)
def _make_optimizer_mock(mock_run_and_parse: MagicMock, mock_compare_results: MagicMock, **kwargs: object) -> MagicMock:
optimizer = MagicMock()
optimizer.function_to_optimize.file_path = MOCK_FILE_PATH
optimizer.function_to_optimize.is_async = False
optimizer.test_files = []
optimizer.run_and_parse_tests = mock_run_and_parse
optimizer.compare_candidate_results = mock_compare_results
for key, value in kwargs.items():
setattr(optimizer, key, value) if "." not in key else None
return optimizer
def _run_candidate(optimizer: MagicMock, baseline: OriginalCodeBaseline) -> Failure | object:
from codeflash.optimization.function_optimizer import FunctionOptimizer
with patch.object(Path, "read_text", return_value="def func(): pass"):
return FunctionOptimizer.run_optimized_candidate(
optimizer,
optimization_candidate_index=1,
baseline_results=baseline,
original_helper_code={},
file_path_to_helper_classes={},
eval_ctx=MagicMock(),
code_context=MagicMock(),
candidate=MagicMock(),
exp_type="test",
)
class TestBehavioralTimingGate:
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.run_and_parse_tests")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.compare_candidate_results")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.write_code_and_helpers")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.instrument_capture")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.get_test_env")
@patch("codeflash.optimization.function_optimizer.get_run_tmp_file")
def test_slow_candidate_skips_benchmarking(
self,
mock_get_run_tmp_file: MagicMock,
mock_get_test_env: MagicMock,
mock_instrument_capture: MagicMock,
mock_write_code_and_helpers: MagicMock,
mock_compare_results: MagicMock,
mock_run_and_parse: MagicMock,
) -> None:
"""A candidate 15x slower than baseline should be rejected without benchmarking."""
baseline = _make_baseline(1000)
candidate_results = _make_test_results(15000) # 15x slower
mock_get_run_tmp_file.return_value = MagicMock()
mock_get_test_env.return_value = {}
mock_run_and_parse.return_value = (candidate_results, None)
mock_compare_results.return_value = (True, [])
optimizer = _make_optimizer_mock(mock_run_and_parse, mock_compare_results)
optimizer.write_code_and_helpers = mock_write_code_and_helpers
optimizer.instrument_capture = mock_instrument_capture
optimizer.get_test_env = mock_get_test_env
result = _run_candidate(optimizer, baseline)
assert not is_successful(result)
assert isinstance(result, Failure)
assert f"{BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD:.0f}x" in str(result.value)
assert mock_run_and_parse.call_count == 1
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.run_and_parse_tests")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.compare_candidate_results")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.write_code_and_helpers")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.instrument_capture")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.get_test_env")
@patch("codeflash.optimization.function_optimizer.get_run_tmp_file")
def test_acceptable_candidate_proceeds_to_benchmarking(
self,
mock_get_run_tmp_file: MagicMock,
mock_get_test_env: MagicMock,
mock_instrument_capture: MagicMock,
mock_write_code_and_helpers: MagicMock,
mock_compare_results: MagicMock,
mock_run_and_parse: MagicMock,
) -> None:
"""A candidate 5x slower than baseline (under threshold) should proceed to benchmarking."""
baseline = _make_baseline(1000)
candidate_behavior_results = _make_test_results(5000) # 5x slower, under 10x threshold
candidate_benchmark_results = _make_test_results(5000)
mock_get_run_tmp_file.return_value = MagicMock()
mock_get_test_env.return_value = {}
mock_run_and_parse.side_effect = [(candidate_behavior_results, None), (candidate_benchmark_results, None)]
mock_compare_results.return_value = (True, [])
optimizer = _make_optimizer_mock(mock_run_and_parse, mock_compare_results)
optimizer.write_code_and_helpers = mock_write_code_and_helpers
optimizer.instrument_capture = mock_instrument_capture
optimizer.get_test_env = mock_get_test_env
optimizer.args.benchmark = False
optimizer.collect_async_metrics.return_value = (None, None)
result = _run_candidate(optimizer, baseline)
assert mock_run_and_parse.call_count == 2
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.run_and_parse_tests")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.compare_candidate_results")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.write_code_and_helpers")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.instrument_capture")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.get_test_env")
@patch("codeflash.optimization.function_optimizer.get_run_tmp_file")
def test_zero_baseline_runtime_skips_check(
self,
mock_get_run_tmp_file: MagicMock,
mock_get_test_env: MagicMock,
mock_instrument_capture: MagicMock,
mock_write_code_and_helpers: MagicMock,
mock_compare_results: MagicMock,
mock_run_and_parse: MagicMock,
) -> None:
"""When baseline behavioral runtime is 0, the timing gate should be skipped."""
baseline = OriginalCodeBaseline(
behavior_test_results=TestResults(), # Empty = 0 runtime
benchmarking_test_results=TestResults(),
line_profile_results={},
runtime=0,
coverage_results=None,
)
candidate_behavior_results = _make_test_results(50000)
candidate_benchmark_results = _make_test_results(50000)
mock_get_run_tmp_file.return_value = MagicMock()
mock_get_test_env.return_value = {}
mock_run_and_parse.side_effect = [(candidate_behavior_results, None), (candidate_benchmark_results, None)]
mock_compare_results.return_value = (True, [])
optimizer = _make_optimizer_mock(mock_run_and_parse, mock_compare_results)
optimizer.write_code_and_helpers = mock_write_code_and_helpers
optimizer.instrument_capture = mock_instrument_capture
optimizer.get_test_env = mock_get_test_env
optimizer.args.benchmark = False
optimizer.collect_async_metrics.return_value = (None, None)
result = _run_candidate(optimizer, baseline)
assert mock_run_and_parse.call_count == 2