codeflash/tests/test_critic.py

413 lines
12 KiB
Python
Raw Normal View History

2024-10-15 00:08:46 +00:00
import os
from pathlib import Path
from unittest.mock import Mock
2024-10-15 00:08:46 +00:00
from codeflash.code_utils.env_utils import get_pr_number
from codeflash.models.models import (
CodeOptimizationContext,
CoverageData,
CoverageStatus,
FunctionCoverage,
OptimizedCandidateResult,
)
from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
2024-10-27 21:43:28 +00:00
from codeflash.verification.test_results import FunctionTestInvocation, InvocationId, TestResults, TestType
2024-05-31 01:28:36 +00:00
def test_performance_gain() -> None:
2024-10-28 04:52:47 +00:00
assert performance_gain(original_runtime_ns=1000, optimized_runtime_ns=0) == 0.0
assert performance_gain(original_runtime_ns=1000, optimized_runtime_ns=500) == 1.0
assert performance_gain(original_runtime_ns=1000, optimized_runtime_ns=900) == 0.1111111111111111
assert performance_gain(original_runtime_ns=1000, optimized_runtime_ns=1000) == 0.0
assert performance_gain(original_runtime_ns=1000, optimized_runtime_ns=1100) == -0.09090909090909091
def test_speedup_critic() -> None:
2024-05-31 01:28:36 +00:00
original_code_runtime = 1000
best_runtime_until_now = 1000
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-05-31 01:28:36 +00:00
best_test_runtime=800,
behavior_test_results=TestResults(),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
optimization_candidate_index=0,
total_candidate_timing=12,
2024-05-31 01:28:36 +00:00
)
assert speedup_critic(candidate_result, original_code_runtime, best_runtime_until_now) # 20% improvement
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-05-31 01:28:36 +00:00
best_test_runtime=940,
behavior_test_results=TestResults(),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-05-31 01:28:36 +00:00
)
2024-10-27 21:43:28 +00:00
assert not speedup_critic(candidate_result, original_code_runtime, best_runtime_until_now) # 6% improvement
2024-05-31 01:28:36 +00:00
original_code_runtime = 100000
best_runtime_until_now = 100000
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-05-31 01:28:36 +00:00
best_test_runtime=94000,
behavior_test_results=TestResults(),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-05-31 01:28:36 +00:00
)
2024-08-12 22:45:34 +00:00
assert speedup_critic(candidate_result, original_code_runtime, best_runtime_until_now) # 6% improvement
def test_generated_test_critic() -> None:
test_1 = FunctionTestInvocation(
id=InvocationId(
test_module_path="",
test_class_name="",
test_function_name="test_1",
function_getting_tested="sorter",
iteration_id="",
),
file_name=Path("test_1"),
did_pass=True,
runtime=0,
test_framework="pytest",
test_type=TestType.GENERATED_REGRESSION,
return_value=None,
timed_out=False,
2024-10-03 19:06:48 +00:00
loop_index=1,
)
test_2 = FunctionTestInvocation(
id=InvocationId(
test_module_path="",
test_class_name="",
test_function_name="test_2",
function_getting_tested="sorter",
iteration_id="",
),
file_name=Path("test_2"),
did_pass=True,
runtime=0,
test_framework="pytest",
test_type=TestType.GENERATED_REGRESSION,
return_value=None,
timed_out=False,
2024-10-03 19:06:48 +00:00
loop_index=1,
)
test_3 = FunctionTestInvocation(
id=InvocationId(
test_module_path="",
test_class_name="",
test_function_name="test_3",
function_getting_tested="sorter",
iteration_id="",
),
file_name=Path("test_3"),
did_pass=True,
runtime=0,
test_framework="pytest",
test_type=TestType.EXISTING_UNIT_TEST,
return_value=None,
timed_out=False,
2024-10-03 19:06:48 +00:00
loop_index=1,
)
test_4 = FunctionTestInvocation(
id=InvocationId(
test_module_path="",
test_class_name="",
test_function_name="test_4",
function_getting_tested="sorter",
iteration_id="",
),
file_name=Path("test_4"),
did_pass=False,
runtime=0,
test_framework="pytest",
test_type=TestType.GENERATED_REGRESSION,
return_value=None,
timed_out=False,
2024-10-03 19:06:48 +00:00
loop_index=1,
)
2024-08-13 17:43:34 +00:00
test_5 = FunctionTestInvocation(
id=InvocationId(
test_module_path="",
test_class_name="",
test_function_name="test_5",
function_getting_tested="sorter",
iteration_id="",
),
file_name=Path("test_5"),
2024-08-13 17:43:34 +00:00
did_pass=True,
runtime=0,
test_framework="pytest",
test_type=TestType.REPLAY_TEST,
return_value=None,
timed_out=False,
2024-10-03 19:06:48 +00:00
loop_index=1,
2024-08-13 17:43:34 +00:00
)
test_6 = FunctionTestInvocation(
id=InvocationId(
test_module_path="",
test_class_name="",
test_function_name="test_6",
function_getting_tested="sorter",
iteration_id="",
),
file_name=Path("test_6"),
did_pass=True,
runtime=0,
test_framework="pytest",
test_type=TestType.GENERATED_REGRESSION,
return_value=None,
timed_out=False,
loop_index=2,
)
test_results = [test_1, test_2, test_3]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
)
2024-08-22 07:44:46 +00:00
assert quantity_of_tests_critic(candidate_result)
test_results = [test_1, test_3, test_6]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
)
2024-08-22 07:44:46 +00:00
assert quantity_of_tests_critic(candidate_result)
test_results = [test_1, test_3, test_4]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
)
2024-08-22 07:44:46 +00:00
assert quantity_of_tests_critic(candidate_result)
test_results = [test_1]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
)
2024-08-22 07:44:46 +00:00
assert not quantity_of_tests_critic(candidate_result)
test_results = [test_1, test_2]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
)
2024-08-22 07:44:46 +00:00
assert quantity_of_tests_critic(candidate_result)
test_results = [test_1, test_4, test_6]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
)
2024-08-22 07:44:46 +00:00
assert not quantity_of_tests_critic(candidate_result)
2024-08-13 17:43:34 +00:00
test_results = [test_4, test_5]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-08-13 17:43:34 +00:00
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-08-13 17:43:34 +00:00
)
2024-08-22 07:44:46 +00:00
assert quantity_of_tests_critic(candidate_result)
2024-08-13 17:43:34 +00:00
test_results = [test_1, test_2, test_3, test_4, test_5]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-08-13 17:43:34 +00:00
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-08-13 17:43:34 +00:00
)
2024-08-22 07:44:46 +00:00
assert quantity_of_tests_critic(candidate_result)
2024-10-15 00:08:46 +00:00
get_pr_number.cache_clear()
os.environ["CODEFLASH_PR_NUMBER"] = "1234"
test_results = [test_1, test_2, test_3, test_6]
2024-10-15 00:08:46 +00:00
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-10-15 00:08:46 +00:00
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-10-15 00:08:46 +00:00
)
assert not quantity_of_tests_critic(candidate_result)
test_results = [test_1, test_2, test_3, test_4]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-10-15 00:08:46 +00:00
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-10-15 00:08:46 +00:00
)
assert not quantity_of_tests_critic(candidate_result)
test_results = [test_1, test_2, test_3, test_5]
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
2024-10-15 00:08:46 +00:00
best_test_runtime=100,
behavior_test_results=TestResults(test_results=test_results),
benchmarking_test_results=TestResults(),
2024-10-27 21:43:28 +00:00
total_candidate_timing=12,
optimization_candidate_index=0,
2024-10-15 00:08:46 +00:00
)
assert quantity_of_tests_critic(candidate_result)
del os.environ["CODEFLASH_PR_NUMBER"]
def test_coverage_critic() -> None:
mock_code_context = Mock(spec=CodeOptimizationContext)
passing_coverage = CoverageData(
file_path=Path("test_file.py"),
coverage=100.0,
function_name="test_function",
functions_being_tested=["function1", "function2"],
graph={},
code_context=mock_code_context,
main_func_coverage=FunctionCoverage(
name="test_function",
coverage=100.0,
executed_lines=[10],
unexecuted_lines=[2],
executed_branches=[[5]],
unexecuted_branches=[[1]]
),
dependent_func_coverage=None,
status=CoverageStatus.PARSED_SUCCESSFULLY
)
assert coverage_critic(passing_coverage, "pytest") is True
border_coverage = CoverageData(
file_path=Path("test_file.py"),
coverage=50.0,
function_name="test_function",
functions_being_tested=["function1", "function2"],
graph={},
code_context=mock_code_context,
main_func_coverage=FunctionCoverage(
name="test_function",
coverage=50.0,
executed_lines=[10],
unexecuted_lines=[2],
executed_branches=[[5]],
unexecuted_branches=[[1]]
),
dependent_func_coverage=None,
status=CoverageStatus.PARSED_SUCCESSFULLY
)
assert coverage_critic(border_coverage, "pytest") is True
failing_coverage = CoverageData(
file_path=Path("test_file.py"),
coverage=30.0,
function_name="test_function",
functions_being_tested=["function1", "function2"],
graph={},
code_context=mock_code_context,
main_func_coverage=FunctionCoverage(
name="test_function",
coverage=0.0,
executed_lines=[],
unexecuted_lines=[10],
executed_branches=[],
unexecuted_branches=[[5]]
),
dependent_func_coverage=None,
status=CoverageStatus.PARSED_SUCCESSFULLY
)
assert coverage_critic(failing_coverage, "pytest") is False
unittest_coverage = CoverageData(
file_path=Path("test_file.py"),
coverage=0,
function_name="test_function",
functions_being_tested=["function1", "function2"],
graph={},
code_context=mock_code_context,
main_func_coverage=FunctionCoverage(
name="test_function",
coverage=0,
executed_lines=[10],
unexecuted_lines=[2],
executed_branches=[[5]],
unexecuted_branches=[[1]]
),
dependent_func_coverage=None,
status=CoverageStatus.PARSED_SUCCESSFULLY
)
assert coverage_critic(unittest_coverage, "unittest") is True