mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
* chore: add gitignore entries for local eval repos, e2e fixtures, and env files * fix: restore clean bubble_sort_method.py test fixture The call-site ID commit re-contaminated this file with instrumentation decorators, causing tests to fail with missing CODEFLASH_LOOP_INDEX. * fix: resolve ruff and mypy errors in codeflash-python - Add import-not-found ignores for optional torch/jax imports - Extract magic column index to _STDOUT_COLUMN_INDEX constant - Fix unused variable in _instrument_sync.py - Cast cpu_time_ns to int for mypy arg-type * fix: add skip markers for optional deps and apply ruff formatting to tests Skip torch/jax/tensorflow tests when those packages are not installed. Move has_module helper to conftest.py for reuse across test files. Apply ruff format to all test files that drifted. * fix: resolve remaining ruff format and mypy errors - Add missing blank line in conftest.py (ruff format) - Remove unused import-untyped ignore on jax import (mypy unused-ignore) - Add type: ignore comments for object-typed SQLite row values * chore: bump codeflash-python to 0.1.1.dev0
881 lines
26 KiB
Python
881 lines
26 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from unittest.mock import Mock
|
|
|
|
from codeflash_core import performance_gain
|
|
from codeflash_python.analysis._coverage import (
|
|
CoverageData,
|
|
CoverageStatus,
|
|
FunctionCoverage,
|
|
)
|
|
from codeflash_python.benchmarking.models import ConcurrencyMetrics
|
|
from codeflash_python.context.models import CodeOptimizationContext
|
|
from codeflash_python.test_discovery.models import TestType
|
|
from codeflash_python.testing.models import (
|
|
FunctionTestInvocation,
|
|
InvocationId,
|
|
TestResults,
|
|
)
|
|
from codeflash_python.verification._critic import (
|
|
concurrency_gain,
|
|
coverage_critic,
|
|
get_pr_number,
|
|
quantity_of_tests_critic,
|
|
speedup_critic,
|
|
throughput_gain,
|
|
)
|
|
from codeflash_python.verification.models import OptimizedCandidateResult
|
|
|
|
|
|
def test_performance_gain() -> None:
|
|
"""performance_gain returns the correct relative speedup."""
|
|
assert (
|
|
performance_gain(original_runtime_ns=1000, optimized_runtime_ns=0)
|
|
== 0.0
|
|
)
|
|
|
|
assert (
|
|
performance_gain(original_runtime_ns=1000, optimized_runtime_ns=500)
|
|
== 1.0
|
|
)
|
|
|
|
assert (
|
|
performance_gain(original_runtime_ns=1000, optimized_runtime_ns=900)
|
|
== 0.1111111111111111
|
|
)
|
|
|
|
assert (
|
|
performance_gain(original_runtime_ns=1000, optimized_runtime_ns=1000)
|
|
== 0.0
|
|
)
|
|
|
|
assert (
|
|
performance_gain(original_runtime_ns=1000, optimized_runtime_ns=1100)
|
|
== -0.09090909090909091
|
|
)
|
|
|
|
|
|
def test_speedup_critic() -> None:
|
|
"""speedup_critic accepts candidates above the noise floor."""
|
|
original_code_runtime = 1000
|
|
best_runtime_until_now = 1000
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=800,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=12,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result,
|
|
original_code_runtime,
|
|
best_runtime_until_now,
|
|
disable_gh_action_noise=True,
|
|
) # 20% improvement
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=940,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert not speedup_critic(
|
|
candidate_result,
|
|
original_code_runtime,
|
|
best_runtime_until_now,
|
|
disable_gh_action_noise=True,
|
|
) # 6% improvement
|
|
|
|
original_code_runtime = 100000
|
|
best_runtime_until_now = 100000
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=94000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result,
|
|
original_code_runtime,
|
|
best_runtime_until_now,
|
|
disable_gh_action_noise=True,
|
|
) # 6% improvement
|
|
|
|
|
|
def test_generated_test_critic() -> None:
|
|
"""quantity_of_tests_critic requires enough passing tests."""
|
|
test_1 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_1",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_1"),
|
|
did_pass=True,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.GENERATED_REGRESSION,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=1,
|
|
)
|
|
|
|
test_2 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_2",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_2"),
|
|
did_pass=True,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.GENERATED_REGRESSION,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=1,
|
|
)
|
|
|
|
test_3 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_3",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_3"),
|
|
did_pass=True,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.EXISTING_UNIT_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=1,
|
|
)
|
|
|
|
test_4 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_4",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_4"),
|
|
did_pass=False,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.GENERATED_REGRESSION,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=1,
|
|
)
|
|
|
|
test_5 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_5",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_5"),
|
|
did_pass=True,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.REPLAY_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=1,
|
|
)
|
|
|
|
test_6 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_6",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_6"),
|
|
did_pass=True,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.GENERATED_REGRESSION,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=2,
|
|
)
|
|
|
|
test_7 = FunctionTestInvocation(
|
|
id=InvocationId(
|
|
test_module_path="",
|
|
test_class_name="",
|
|
test_function_name="test_7",
|
|
function_getting_tested="sorter",
|
|
iteration_id="",
|
|
),
|
|
file_name=Path("test_7"),
|
|
did_pass=True,
|
|
runtime=0,
|
|
test_framework="pytest",
|
|
test_type=TestType.EXISTING_UNIT_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
loop_index=1,
|
|
)
|
|
test_results = [
|
|
test_1,
|
|
test_2,
|
|
test_3,
|
|
test_4,
|
|
test_5,
|
|
test_6,
|
|
test_7,
|
|
test_1,
|
|
]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [
|
|
test_1,
|
|
test_2,
|
|
test_3,
|
|
test_6,
|
|
test_7,
|
|
test_1,
|
|
test_4,
|
|
test_1,
|
|
]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [
|
|
test_1,
|
|
test_3,
|
|
test_4,
|
|
test_2,
|
|
test_7,
|
|
test_1,
|
|
test_6,
|
|
test_1,
|
|
]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [test_1]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert not quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [
|
|
test_1,
|
|
test_2,
|
|
test_3,
|
|
test_4,
|
|
test_5,
|
|
test_1,
|
|
test_1,
|
|
test_1,
|
|
]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [test_1, test_4, test_6]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert not quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [test_4, test_5]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [
|
|
test_1,
|
|
test_2,
|
|
test_3,
|
|
test_4,
|
|
test_5,
|
|
test_1,
|
|
test_1,
|
|
test_1,
|
|
]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
get_pr_number.cache_clear()
|
|
os.environ["CODEFLASH_PR_NUMBER"] = "1234"
|
|
test_results = [test_1, test_2, test_3, test_6]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert not quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [test_1, test_2, test_3, test_4]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert not quantity_of_tests_critic(candidate_result)
|
|
|
|
test_results = [
|
|
test_1,
|
|
test_2,
|
|
test_3,
|
|
test_5,
|
|
test_1,
|
|
test_1,
|
|
test_1,
|
|
test_1,
|
|
]
|
|
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=100,
|
|
behavior_test_results=TestResults(test_results=test_results),
|
|
benchmarking_test_results=TestResults(),
|
|
total_candidate_timing=12,
|
|
optimization_candidate_index=0,
|
|
)
|
|
|
|
assert quantity_of_tests_critic(candidate_result)
|
|
|
|
del os.environ["CODEFLASH_PR_NUMBER"]
|
|
|
|
|
|
def test_coverage_critic() -> None:
|
|
"""coverage_critic passes when coverage is above the threshold."""
|
|
mock_code_context = Mock(spec=CodeOptimizationContext)
|
|
|
|
passing_coverage = CoverageData(
|
|
file_path=Path("test_file.py"),
|
|
coverage=100.0,
|
|
function_name="test_function",
|
|
functions_being_tested=["function1", "function2"],
|
|
graph={},
|
|
code_context=mock_code_context,
|
|
main_func_coverage=FunctionCoverage(
|
|
name="test_function",
|
|
coverage=100.0,
|
|
executed_lines=[10],
|
|
unexecuted_lines=[2],
|
|
executed_branches=[[5]],
|
|
unexecuted_branches=[[1]],
|
|
),
|
|
dependent_func_coverage=None,
|
|
status=CoverageStatus.PARSED_SUCCESSFULLY,
|
|
)
|
|
|
|
assert coverage_critic(passing_coverage) is True
|
|
|
|
border_coverage = CoverageData(
|
|
file_path=Path("test_file.py"),
|
|
coverage=60.0,
|
|
function_name="test_function",
|
|
functions_being_tested=["function1", "function2"],
|
|
graph={},
|
|
code_context=mock_code_context,
|
|
main_func_coverage=FunctionCoverage(
|
|
name="test_function",
|
|
coverage=50.0,
|
|
executed_lines=[10],
|
|
unexecuted_lines=[2],
|
|
executed_branches=[[5]],
|
|
unexecuted_branches=[[1]],
|
|
),
|
|
dependent_func_coverage=None,
|
|
status=CoverageStatus.PARSED_SUCCESSFULLY,
|
|
)
|
|
|
|
assert coverage_critic(border_coverage) is True
|
|
|
|
failing_coverage = CoverageData(
|
|
file_path=Path("test_file.py"),
|
|
coverage=30.0,
|
|
function_name="test_function",
|
|
functions_being_tested=["function1", "function2"],
|
|
graph={},
|
|
code_context=mock_code_context,
|
|
main_func_coverage=FunctionCoverage(
|
|
name="test_function",
|
|
coverage=0.0,
|
|
executed_lines=[],
|
|
unexecuted_lines=[10],
|
|
executed_branches=[],
|
|
unexecuted_branches=[[5]],
|
|
),
|
|
dependent_func_coverage=None,
|
|
status=CoverageStatus.PARSED_SUCCESSFULLY,
|
|
)
|
|
|
|
assert coverage_critic(failing_coverage) is False
|
|
|
|
|
|
def test_throughput_gain() -> None:
|
|
"""throughput_gain calculates relative throughput improvement."""
|
|
assert (
|
|
throughput_gain(original_throughput=100, optimized_throughput=150)
|
|
== 0.5
|
|
)
|
|
|
|
assert (
|
|
throughput_gain(original_throughput=100, optimized_throughput=100)
|
|
== 0.0
|
|
)
|
|
|
|
assert (
|
|
throughput_gain(original_throughput=100, optimized_throughput=80)
|
|
== -0.2
|
|
)
|
|
|
|
assert (
|
|
throughput_gain(original_throughput=0, optimized_throughput=50) == 0.0
|
|
)
|
|
|
|
assert (
|
|
throughput_gain(original_throughput=50, optimized_throughput=200)
|
|
== 3.0
|
|
)
|
|
|
|
|
|
def test_speedup_critic_with_async_throughput() -> None:
|
|
"""speedup_critic evaluates async throughput alongside runtime."""
|
|
original_code_runtime = 10000
|
|
original_async_throughput = 100
|
|
|
|
# Both runtime and throughput improve significantly
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=8000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=8000,
|
|
async_throughput=120,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# Runtime improves, throughput below threshold (should pass)
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=8000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=8000,
|
|
async_throughput=105,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# Throughput improves, runtime below threshold (should pass)
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=9800,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=9800,
|
|
async_throughput=120,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# No throughput data - falls back to runtime-only
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=8000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=8000,
|
|
async_throughput=None,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=None,
|
|
best_throughput_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# best_throughput_until_now comparison
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=8000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=8000,
|
|
async_throughput=115,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
assert not speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=7000,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=120,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# Zero original throughput (edge case)
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=8000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=8000,
|
|
async_throughput=50,
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=0,
|
|
best_throughput_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
|
|
def test_concurrency_gain() -> None:
|
|
"""concurrency_gain measures relative concurrency ratio improvement."""
|
|
original = ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=10_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=1.0,
|
|
)
|
|
optimized = ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=1_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=10.0,
|
|
)
|
|
assert concurrency_gain(original, optimized) == 9.0
|
|
|
|
same = ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=10_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=1.0,
|
|
)
|
|
assert concurrency_gain(original, same) == 0.0
|
|
|
|
slightly_better = ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=8_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=1.25,
|
|
)
|
|
assert concurrency_gain(original, slightly_better) == 0.25
|
|
|
|
zero_ratio = ConcurrencyMetrics(
|
|
sequential_time_ns=0,
|
|
concurrent_time_ns=1_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=0.0,
|
|
)
|
|
assert concurrency_gain(zero_ratio, optimized) == 0.0
|
|
|
|
|
|
def test_speedup_critic_with_concurrency_metrics() -> None:
|
|
"""speedup_critic accepts candidates with concurrency improvements."""
|
|
original_code_runtime = 10000
|
|
original_async_throughput = 100
|
|
|
|
original_concurrency = ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=10_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=1.0,
|
|
)
|
|
|
|
# Concurrency improves significantly (blocking -> non-blocking)
|
|
candidate_result = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=10000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=10000,
|
|
async_throughput=100,
|
|
concurrency_metrics=ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=1_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=10.0,
|
|
),
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
original_concurrency_metrics=original_concurrency,
|
|
best_concurrency_ratio_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# No concurrency improvement (falls back to runtime)
|
|
candidate_result_no_conc = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=8000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=8000,
|
|
async_throughput=100,
|
|
concurrency_metrics=ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=10_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=1.0,
|
|
),
|
|
)
|
|
|
|
assert speedup_critic(
|
|
candidate_result=candidate_result_no_conc,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
original_concurrency_metrics=original_concurrency,
|
|
best_concurrency_ratio_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# Concurrency below threshold (20% required)
|
|
candidate_result_below_threshold = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=10000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=10000,
|
|
async_throughput=100,
|
|
concurrency_metrics=ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=9_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=1.11,
|
|
),
|
|
)
|
|
|
|
assert not speedup_critic(
|
|
candidate_result=candidate_result_below_threshold,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
original_concurrency_metrics=original_concurrency,
|
|
best_concurrency_ratio_until_now=None,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
# best_concurrency_ratio_until_now comparison
|
|
candidate_result_good = OptimizedCandidateResult(
|
|
max_loop_count=5,
|
|
best_test_runtime=10000,
|
|
behavior_test_results=TestResults(),
|
|
benchmarking_test_results=TestResults(),
|
|
optimization_candidate_index=0,
|
|
total_candidate_timing=10000,
|
|
async_throughput=100,
|
|
concurrency_metrics=ConcurrencyMetrics(
|
|
sequential_time_ns=10_000_000,
|
|
concurrent_time_ns=2_000_000,
|
|
concurrency_factor=10,
|
|
concurrency_ratio=5.0,
|
|
),
|
|
)
|
|
|
|
assert not speedup_critic(
|
|
candidate_result=candidate_result_good,
|
|
original_code_runtime=original_code_runtime,
|
|
best_runtime_until_now=None,
|
|
original_async_throughput=original_async_throughput,
|
|
best_throughput_until_now=None,
|
|
original_concurrency_metrics=original_concurrency,
|
|
best_concurrency_ratio_until_now=10.0,
|
|
disable_gh_action_noise=True,
|
|
)
|
|
|
|
|
|
def test_concurrency_ratio_display_formatting() -> None:
|
|
"""Concurrency ratio display strings are formatted correctly."""
|
|
orig_ratio = 0.05
|
|
cand_ratio = 0.15
|
|
conc_gain = (
|
|
((cand_ratio - orig_ratio) / orig_ratio * 100) if orig_ratio > 0 else 0
|
|
)
|
|
display_string = (
|
|
f"Concurrency ratio: {orig_ratio:.2f}x "
|
|
f"\u2192 {cand_ratio:.2f}x ({conc_gain:+.1f}%)"
|
|
)
|
|
assert display_string == "Concurrency ratio: 0.05x \u2192 0.15x (+200.0%)"
|
|
|
|
orig_ratio = 1.0
|
|
cand_ratio = 10.0
|
|
conc_gain = (
|
|
((cand_ratio - orig_ratio) / orig_ratio * 100) if orig_ratio > 0 else 0
|
|
)
|
|
display_string = (
|
|
f"Concurrency ratio: {orig_ratio:.2f}x "
|
|
f"\u2192 {cand_ratio:.2f}x ({conc_gain:+.1f}%)"
|
|
)
|
|
assert display_string == "Concurrency ratio: 1.00x \u2192 10.00x (+900.0%)"
|
|
|
|
orig_ratio = 0.01
|
|
cand_ratio = 0.03
|
|
conc_gain = (
|
|
((cand_ratio - orig_ratio) / orig_ratio * 100) if orig_ratio > 0 else 0
|
|
)
|
|
display_string = (
|
|
f"Concurrency ratio: {orig_ratio:.2f}x "
|
|
f"\u2192 {cand_ratio:.2f}x ({conc_gain:+.1f}%)"
|
|
)
|
|
assert display_string == "Concurrency ratio: 0.01x \u2192 0.03x (+200.0%)"
|