codeflash/tests/test_add_runtime_comments.py

1634 lines
72 KiB
Python
Raw Normal View History

2025-06-18 00:00:25 +00:00
import os
2025-06-06 02:55:32 +00:00
from pathlib import Path
2025-06-18 00:00:25 +00:00
from unittest.mock import Mock
import pytest
2025-06-06 02:55:32 +00:00
from codeflash.code_utils.edit_generated_tests import add_runtime_comments_to_generated_tests
2025-06-18 21:40:48 +00:00
from codeflash.models.models import GeneratedTests, GeneratedTestsList, InvocationId, FunctionTestInvocation, TestType, \
VerificationType, TestResults
2025-06-17 23:29:54 +00:00
from codeflash.verification.verification_utils import TestConfig
2025-06-06 02:55:32 +00:00
2025-07-03 20:11:39 +00:00
TestType.__test__ = False
TestConfig.__test__ = False
TestResults.__test__ = False
2025-06-18 00:00:25 +00:00
@pytest.fixture
def test_config():
"""Create a mock TestConfig for testing."""
config = Mock(spec=TestConfig)
2025-07-03 20:11:39 +00:00
config.project_root_path = Path(__file__).resolve().parent.parent
2025-06-18 03:15:24 +00:00
config.test_framework= "pytest"
2025-07-03 20:11:39 +00:00
config.tests_project_rootdir = Path(__file__).resolve().parent
config.tests_root = Path(__file__).resolve().parent
2025-06-18 00:00:25 +00:00
return config
2025-06-06 02:55:32 +00:00
2025-06-18 21:40:48 +00:00
class TestAddRuntimeComments:
"""Test cases for add_runtime_comments_to_generated_tests method."""
def create_test_invocation(
self, test_function_name: str, runtime: int, loop_index: int = 1, iteration_id: str = "1", did_pass: bool = True
) -> FunctionTestInvocation:
"""Helper to create test invocation objects."""
return FunctionTestInvocation(
loop_index=loop_index,
id=InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name=test_function_name,
function_getting_tested="test_function",
iteration_id=iteration_id,
),
file_name=Path("tests/test.py"),
did_pass=did_pass,
runtime=runtime,
test_framework="pytest",
test_type=TestType.GENERATED_REGRESSION,
return_value=None,
timed_out=False,
verification_type=VerificationType.FUNCTION_CALL,
)
def test_basic_runtime_comment_addition(self, test_config):
"""Test basic functionality of adding runtime comments."""
# Create test source code
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
codeflash_output = bubble_sort([3, 1, 2])
assert codeflash_output == [1, 2, 3]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py",
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
# Add test invocations with different runtimes
2025-06-21 00:33:27 +00:00
original_invocation = self.create_test_invocation("test_bubble_sort", 500_000, iteration_id='0') # 500μs
optimized_invocation = self.create_test_invocation("test_bubble_sort", 300_000, iteration_id='0') # 300μs
2025-06-18 21:40:48 +00:00
original_test_results.add(original_invocation)
optimized_test_results.add(optimized_invocation)
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that comments were added
modified_source = result.generated_tests[0].generated_original_test_source
assert "# 500μs -> 300μs" in modified_source
assert "codeflash_output = bubble_sort([3, 1, 2]) # 500μs -> 300μs" in modified_source
def test_multiple_test_functions(self, test_config):
"""Test handling multiple test functions in the same file."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
2025-07-02 22:59:50 +00:00
codeflash_output = quick_sort([3, 1, 2])
2025-06-18 21:40:48 +00:00
assert codeflash_output == [1, 2, 3]
def test_quick_sort():
codeflash_output = quick_sort([5, 2, 8])
assert codeflash_output == [2, 5, 8]
def helper_function():
return "not a test"
"""
2025-07-02 22:59:50 +00:00
qualified_name = "quick_sort"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results for both functions
original_test_results = TestResults()
optimized_test_results = TestResults()
# Add test invocations for both test functions
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000, iteration_id='0'))
2025-06-18 21:40:48 +00:00
2025-06-21 00:33:27 +00:00
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000, iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000, iteration_id='0'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
modified_source = result.generated_tests[0].generated_original_test_source
# Check that comments were added to both test functions
assert "# 500μs -> 300μs" in modified_source
assert "# 800μs -> 600μs" in modified_source
# Helper function should not have comments
assert (
"helper_function():" in modified_source
and "# " not in modified_source.split("helper_function():")[1].split("\n")[0]
)
def test_different_time_formats(self, test_config):
"""Test that different time ranges are formatted correctly with new precision rules."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_cases = [
(999, 500, "999ns -> 500ns"), # nanoseconds
(25_000, 18_000, "25.0μs -> 18.0μs"), # microseconds with precision
(500_000, 300_000, "500μs -> 300μs"), # microseconds full integers
(1_500_000, 800_000, "1.50ms -> 800μs"), # milliseconds with precision
(365_000_000, 290_000_000, "365ms -> 290ms"), # milliseconds full integers
(2_000_000_000, 1_500_000_000, "2.00s -> 1.50s"), # seconds with precision
]
for original_time, optimized_time, expected_comment in test_cases:
test_source = """def test_function():
2025-06-21 00:33:27 +00:00
#this comment will be removed in ast form
2025-06-18 21:40:48 +00:00
codeflash_output = some_function()
assert codeflash_output is not None
"""
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_function", original_time, iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_function", optimized_time, iteration_id='0'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 21:40:48 +00:00
)
modified_source = result.generated_tests[0].generated_original_test_source
assert f"# {expected_comment}" in modified_source
def test_missing_test_results(self, test_config):
"""Test behavior when test results are missing for a test function."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
codeflash_output = bubble_sort([3, 1, 2])
assert codeflash_output == [1, 2, 3]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create empty test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that no comments were added
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged
def test_partial_test_results(self, test_config):
"""Test behavior when only one set of test results is available."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
codeflash_output = bubble_sort([3, 1, 2])
assert codeflash_output == [1, 2, 3]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results with only original data
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, iteration_id='0'))
2025-06-18 21:40:48 +00:00
# No optimized results
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that no comments were added
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged
def test_multiple_runtimes_uses_minimum(self, test_config):
"""Test that when multiple runtimes exist, the minimum is used."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
codeflash_output = bubble_sort([3, 1, 2])
assert codeflash_output == [1, 2, 3]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results with multiple loop iterations
original_test_results = TestResults()
optimized_test_results = TestResults()
# Add multiple runs with different runtimes
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 600_000, loop_index=1,iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, loop_index=2,iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_bubble_sort", 550_000, loop_index=3,iteration_id='0'))
2025-06-18 21:40:48 +00:00
2025-06-21 00:33:27 +00:00
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 350_000, loop_index=1,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000, loop_index=2,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 320_000, loop_index=3,iteration_id='0'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that minimum times were used (500μs -> 300μs)
modified_source = result.generated_tests[0].generated_original_test_source
assert "# 500μs -> 300μs" in modified_source
def test_no_codeflash_output_assignment(self, test_config):
"""Test behavior when test doesn't have codeflash_output assignment."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='-1'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='-1'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that no comments were added (no codeflash_output assignment)
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged
def test_invalid_python_code_handling(self, test_config):
"""Test behavior when test source code is invalid Python."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort(:
2025-06-21 00:33:27 +00:00
codeflash_output = bubble_sort([3, 1, 2])
2025-06-18 21:40:48 +00:00
assert codeflash_output == [1, 2, 3]
2025-06-21 00:33:27 +00:00
""" # Invalid syntax: extra indentation
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='0'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality - should handle parse error gracefully
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that original test is preserved when parsing fails
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged due to parse error
def test_multiple_generated_tests(self, test_config):
"""Test handling multiple generated test objects."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source_1 = """def test_bubble_sort():
2025-07-02 22:59:50 +00:00
codeflash_output = quick_sort([3, 1, 2])
2025-06-18 21:40:48 +00:00
assert codeflash_output == [1, 2, 3]
"""
test_source_2 = """def test_quick_sort():
2025-06-21 00:33:27 +00:00
a=1
b=2
c=3
2025-06-18 21:40:48 +00:00
codeflash_output = quick_sort([5, 2, 8])
assert codeflash_output == [2, 5, 8]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "quick_sort"
2025-06-18 21:40:48 +00:00
generated_test_1 = GeneratedTests(
generated_original_test_source=test_source_1,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_test_2 = GeneratedTests(
generated_original_test_source=test_source_2,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test_1, generated_test_2])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000,iteration_id='3'))
2025-06-18 21:40:48 +00:00
2025-06-21 00:33:27 +00:00
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000,iteration_id='3'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that comments were added to both test files
modified_source_1 = result.generated_tests[0].generated_original_test_source
modified_source_2 = result.generated_tests[1].generated_original_test_source
assert "# 500μs -> 300μs" in modified_source_1
assert "# 800μs -> 600μs" in modified_source_2
def test_preserved_test_attributes(self, test_config):
"""Test that other test attributes are preserved during modification."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_bubble_sort():
codeflash_output = bubble_sort([3, 1, 2])
assert codeflash_output == [1, 2, 3]
"""
2025-07-02 22:59:50 +00:00
qualified_name = "bubble_sort"
2025-06-18 21:40:48 +00:00
original_behavior_source = "behavior test source"
original_perf_source = "perf test source"
2025-07-03 20:11:39 +00:00
original_behavior_path=test_config.tests_root / "test_module.py"
original_perf_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source=original_behavior_source,
instrumented_perf_test_source=original_perf_source,
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='0'))
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that other attributes are preserved
modified_test = result.generated_tests[0]
assert modified_test.instrumented_behavior_test_source == original_behavior_source
assert modified_test.instrumented_perf_test_source == original_perf_source
assert modified_test.behavior_file_path == original_behavior_path
assert modified_test.perf_file_path == original_perf_path
# Check that only the generated_original_test_source was modified
assert "# 500μs -> 300μs" in modified_test.generated_original_test_source
def test_multistatement_line_handling(self, test_config):
"""Test that runtime comments work correctly with multiple statements on one line."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 21:40:48 +00:00
test_source = """def test_mutation_of_input():
# Test that the input list is mutated in-place and returned
arr = [3, 1, 2]
codeflash_output = sorter(arr); result = codeflash_output
assert result == [1, 2, 3]
assert arr == [1, 2, 3] # Input should be mutated
"""
2025-07-02 22:59:50 +00:00
qualified_name = "sorter"
2025-06-18 21:40:48 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-18 21:40:48 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
2025-06-21 00:33:27 +00:00
original_test_results.add(self.create_test_invocation("test_mutation_of_input", 19_000,iteration_id='1')) # 19μs
optimized_test_results.add(self.create_test_invocation("test_mutation_of_input", 14_000,iteration_id='1')) # 14μs
2025-06-18 21:40:48 +00:00
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
2025-07-02 22:59:50 +00:00
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
2025-06-18 21:40:48 +00:00
# Check that comments were added to the correct line
modified_source = result.generated_tests[0].generated_original_test_source
assert "# 19.0μs -> 14.0μs" in modified_source
# Verify the comment is on the line with codeflash_output assignment
lines = modified_source.split("\n")
codeflash_line = None
for line in lines:
if "codeflash_output = sorter(arr)" in line:
codeflash_line = line
break
assert codeflash_line is not None, "Could not find codeflash_output assignment line"
assert "# 19.0μs -> 14.0μs" in codeflash_line, f"Comment not found in the correct line: {codeflash_line}"
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_simple_function(self, test_config):
"""Test adding runtime comments to a simple test function."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source = '''def test_function():
codeflash_output = some_function()
assert codeflash_output == expected
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-06 02:55:32 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-06 02:55:32 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
2025-06-18 00:00:25 +00:00
invocation_id = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_function",
iteration_id="0",
2025-06-06 02:55:32 +00:00
)
2025-06-18 00:00:25 +00:00
original_runtimes = {invocation_id: [1000000000, 1200000000]} # 1s, 1.2s in nanoseconds
optimized_runtimes = {invocation_id: [500000000, 600000000]} # 0.5s, 0.6s in nanoseconds
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
expected_source = '''def test_function():
2025-06-18 22:31:08 +00:00
codeflash_output = some_function() # 1.00s -> 500ms (100% faster)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_class_method(self, test_config):
"""Test adding runtime comments to a test method within a class."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source = '''class TestClass:
def test_function(self):
codeflash_output = some_function()
assert codeflash_output == expected
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-06 02:55:32 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-06 02:55:32 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
2025-06-18 00:00:25 +00:00
invocation_id = InvocationId(
test_module_path="tests.test_module",
test_class_name="TestClass",
test_function_name="test_function",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_function",
iteration_id="0",
2025-06-06 02:55:32 +00:00
)
2025-06-18 00:00:25 +00:00
original_runtimes = {invocation_id: [2000000000]} # 2s in nanoseconds
optimized_runtimes = {invocation_id: [1000000000]} # 1s in nanoseconds
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
expected_source = '''class TestClass:
def test_function(self):
2025-06-18 22:31:08 +00:00
codeflash_output = some_function() # 2.00s -> 1.00s (100% faster)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_multiple_assignments(self, test_config):
"""Test adding runtime comments when there are multiple codeflash_output assignments."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source = '''def test_function():
setup_data = prepare_test()
codeflash_output = some_function()
assert codeflash_output == expected
codeflash_output = another_function()
assert codeflash_output == expected2
2025-07-02 22:59:50 +00:00
codeflash_output = some_function()
assert codeflash_output == expected2
2025-06-18 00:00:25 +00:00
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-06 02:55:32 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-06 02:55:32 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
2025-06-21 00:33:27 +00:00
invocation_id1 = InvocationId(
2025-06-18 00:00:25 +00:00
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_function",
2025-06-21 00:33:27 +00:00
iteration_id="1",
)
invocation_id2 = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
2025-07-02 22:59:50 +00:00
function_getting_tested="some_function",
iteration_id="5",
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-21 00:33:27 +00:00
original_runtimes = {invocation_id1: [1500000000], invocation_id2: [10]} # 1.5s in nanoseconds
optimized_runtimes = {invocation_id1: [750000000], invocation_id2: [5]} # 0.75s in nanoseconds
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
expected_source = '''def test_function():
setup_data = prepare_test()
2025-06-18 22:31:08 +00:00
codeflash_output = some_function() # 1.50s -> 750ms (100% faster)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
2025-07-02 22:59:50 +00:00
codeflash_output = another_function()
assert codeflash_output == expected2
codeflash_output = some_function() # 10ns -> 5ns (100% faster)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected2
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_no_matching_runtimes(self, test_config):
"""Test that source remains unchanged when no matching runtimes are found."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source = '''def test_function():
codeflash_output = some_function()
assert codeflash_output == expected
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-06 02:55:32 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-06 02:55:32 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
2025-06-18 00:00:25 +00:00
# Different invocation ID that won't match
invocation_id = InvocationId(
test_module_path="tests.other_module",
test_class_name=None,
test_function_name="other_function",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_other_function",
iteration_id="0",
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
original_runtimes = {invocation_id: [1000000000]}
optimized_runtimes = {invocation_id: [500000000]}
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
# Source should remain unchanged
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == test_source
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_no_codeflash_output(self, test_config):
2025-07-02 22:59:50 +00:00
"""comments will still be added if codeflash output doesnt exist"""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source = '''def test_function():
result = some_function()
assert result == expected
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
expected = '''def test_function():
result = some_function() # 1.00s -> 500ms (100% faster)
assert result == expected
'''
2025-06-06 02:55:32 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-06 02:55:32 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
2025-06-18 00:00:25 +00:00
invocation_id = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_function",
iteration_id="0",
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
original_runtimes = {invocation_id: [1000000000]}
optimized_runtimes = {invocation_id: [500000000]}
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
# Source should remain unchanged
assert len(result.generated_tests) == 1
2025-07-02 22:59:50 +00:00
assert result.generated_tests[0].generated_original_test_source == expected
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_multiple_tests(self, test_config):
"""Test adding runtime comments to multiple generated tests."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source1 = '''def test_function1():
codeflash_output = some_function()
assert codeflash_output == expected
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
test_source2 = '''def test_function2():
2025-07-02 22:59:50 +00:00
codeflash_output = some_function()
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-18 00:00:25 +00:00
generated_test1 = GeneratedTests(
generated_original_test_source=test_source1,
2025-06-06 02:55:32 +00:00
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module1.py",
perf_file_path=test_config.tests_root / "test_perf1.py"
2025-06-06 02:55:32 +00:00
)
2025-06-18 00:00:25 +00:00
generated_test2 = GeneratedTests(
generated_original_test_source=test_source2,
2025-06-06 02:55:32 +00:00
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module2.py",
perf_file_path=test_config.tests_root / "test_perf2.py"
2025-06-06 02:55:32 +00:00
)
2025-06-18 00:00:25 +00:00
generated_tests = GeneratedTestsList(generated_tests=[generated_test1, generated_test2])
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
invocation_id1 = InvocationId(
test_module_path="tests.test_module1",
test_class_name=None,
test_function_name="test_function1",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_function",
iteration_id="0",
2025-06-06 02:55:32 +00:00
)
2025-06-18 00:00:25 +00:00
invocation_id2 = InvocationId(
test_module_path="tests.test_module2",
test_class_name=None,
test_function_name="test_function2",
2025-07-02 22:59:50 +00:00
function_getting_tested="some_function", # not used in this test throughout the entire test file
2025-06-18 03:15:24 +00:00
iteration_id = "0",
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
original_runtimes = {
invocation_id1: [1000000000], # 1s
invocation_id2: [2000000000], # 2s
}
optimized_runtimes = {
invocation_id1: [500000000], # 0.5s
invocation_id2: [800000000], # 0.8s
}
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
expected_source1 = '''def test_function1():
2025-06-18 22:31:08 +00:00
codeflash_output = some_function() # 1.00s -> 500ms (100% faster)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
expected_source2 = '''def test_function2():
2025-07-02 22:59:50 +00:00
codeflash_output = some_function() # 2.00s -> 800ms (150% faster)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
assert len(result.generated_tests) == 2
assert result.generated_tests[0].generated_original_test_source == expected_source1
assert result.generated_tests[1].generated_original_test_source == expected_source2
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
def test_add_runtime_comments_performance_regression(self, test_config):
"""Test adding runtime comments when optimized version is slower (negative performance gain)."""
2025-07-03 20:11:39 +00:00
os.chdir(test_config.project_root_path)
2025-06-18 00:00:25 +00:00
test_source = '''def test_function():
codeflash_output = some_function()
assert codeflash_output == expected
2025-06-21 00:03:51 +00:00
codeflash_output = some_function()
assert codeflash_output == expected
2025-06-18 00:00:25 +00:00
'''
2025-07-02 22:59:50 +00:00
qualified_name = "some_function"
2025-06-06 02:55:32 +00:00
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
2025-07-03 20:11:39 +00:00
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
2025-06-06 02:55:32 +00:00
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
2025-06-21 00:03:51 +00:00
invocation_id1 = InvocationId(
2025-06-18 00:00:25 +00:00
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
2025-06-18 03:15:24 +00:00
function_getting_tested="some_function",
iteration_id="0",
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-21 00:03:51 +00:00
invocation_id2 = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="2",
)
original_runtimes = {invocation_id1: [1000000000], invocation_id2: [2]} # 1s
optimized_runtimes = {invocation_id1: [1500000000], invocation_id2: [1]} # 1.5s (slower!)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
result = add_runtime_comments_to_generated_tests(
2025-07-02 22:59:50 +00:00
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
2025-06-18 00:00:25 +00:00
)
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
expected_source = '''def test_function():
2025-06-18 22:31:08 +00:00
codeflash_output = some_function() # 1.00s -> 1.50s (33.3% slower)
2025-06-18 00:00:25 +00:00
assert codeflash_output == expected
2025-06-21 00:03:51 +00:00
codeflash_output = some_function() # 2ns -> 1ns (100% faster)
assert codeflash_output == expected
2025-06-18 00:00:25 +00:00
'''
2025-06-06 02:55:32 +00:00
2025-06-18 00:00:25 +00:00
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
2025-07-03 21:17:12 +00:00
def test_basic_runtime_comment_addition_no_cfo(self, test_config):
"""Test basic functionality of adding runtime comments."""
# Create test source code
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
qualified_name = "bubble_sort"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py",
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
# Add test invocations with different runtimes
original_invocation = self.create_test_invocation("test_bubble_sort", 500_000, iteration_id='0') # 500μs
optimized_invocation = self.create_test_invocation("test_bubble_sort", 300_000, iteration_id='0') # 300μs
original_test_results.add(original_invocation)
optimized_test_results.add(optimized_invocation)
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that comments were added
modified_source = result.generated_tests[0].generated_original_test_source
assert "# 500μs -> 300μs" in modified_source
assert "result = bubble_sort([3, 1, 2]) # 500μs -> 300μs" in modified_source
def test_multiple_test_functions_no_cfo(self, test_config):
"""Test handling multiple test functions in the same file."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = quick_sort([3, 1, 2])
assert result == [1, 2, 3]
def test_quick_sort():
result = quick_sort([5, 2, 8]); assert result == [2, 5, 8]
def helper_function():
return "not a test"
"""
qualified_name = "quick_sort"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results for both functions
original_test_results = TestResults()
optimized_test_results = TestResults()
# Add test invocations for both test functions
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000, iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000, iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000, iteration_id='0'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
modified_source = result.generated_tests[0].generated_original_test_source
# Check that comments were added to both test functions
assert "# 500μs -> 300μs" in modified_source
assert "# 800μs -> 600μs" in modified_source
# Helper function should not have comments
assert (
"helper_function():" in modified_source
and "# " not in modified_source.split("helper_function():")[1].split("\n")[0]
)
def test_different_time_formats_no_cfo(self, test_config):
"""Test that different time ranges are formatted correctly with new precision rules."""
os.chdir(test_config.project_root_path)
test_cases = [
(999, 500, "999ns -> 500ns"), # nanoseconds
(25_000, 18_000, "25.0μs -> 18.0μs"), # microseconds with precision
(500_000, 300_000, "500μs -> 300μs"), # microseconds full integers
(1_500_000, 800_000, "1.50ms -> 800μs"), # milliseconds with precision
(365_000_000, 290_000_000, "365ms -> 290ms"), # milliseconds full integers
(2_000_000_000, 1_500_000_000, "2.00s -> 1.50s"), # seconds with precision
]
for original_time, optimized_time, expected_comment in test_cases:
test_source = """def test_function():
#this comment will be removed in ast form
result = some_function(); assert result is not None
"""
qualified_name = "some_function"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_function", original_time, iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_function", optimized_time, iteration_id='0'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
modified_source = result.generated_tests[0].generated_original_test_source
assert f"# {expected_comment}" in modified_source
def test_missing_test_results_no_cfo(self, test_config):
"""Test behavior when test results are missing for a test function."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
qualified_name = "bubble_sort"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create empty test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that no comments were added
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged
def test_partial_test_results_no_cfo(self, test_config):
"""Test behavior when only one set of test results is available."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
qualified_name = "bubble_sort"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results with only original data
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, iteration_id='0'))
# No optimized results
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that no comments were added
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged
def test_multiple_runtimes_uses_minimum_no_cfo(self, test_config):
"""Test that when multiple runtimes exist, the minimum is used."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
qualified_name = "bubble_sort"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results with multiple loop iterations
original_test_results = TestResults()
optimized_test_results = TestResults()
# Add multiple runs with different runtimes
original_test_results.add(self.create_test_invocation("test_bubble_sort", 600_000, loop_index=1,iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, loop_index=2,iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_bubble_sort", 550_000, loop_index=3,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 350_000, loop_index=1,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000, loop_index=2,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 320_000, loop_index=3,iteration_id='0'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that minimum times were used (500μs -> 300μs)
modified_source = result.generated_tests[0].generated_original_test_source
assert "# 500μs -> 300μs" in modified_source
def test_no_codeflash_output_assignment_invalid_iteration_id(self, test_config):
"""Test behavior when test doesn't have codeflash_output assignment."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
qualified_name = "bubble_sort"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='-1'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='-1'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that no comments were added (no codeflash_output assignment)
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged
def test_invalid_python_code_handling_no_cfo(self, test_config):
"""Test behavior when test source code is invalid Python."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort(:
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
""" # Invalid syntax: extra indentation
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
qualified_name = "bubble_sort"
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='0'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality - should handle parse error gracefully
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that original test is preserved when parsing fails
modified_source = result.generated_tests[0].generated_original_test_source
assert modified_source == test_source # Should be unchanged due to parse error
def test_multiple_generated_tests_no_cfo(self, test_config):
"""Test handling multiple generated test objects."""
os.chdir(test_config.project_root_path)
test_source_1 = """def test_bubble_sort():
codeflash_output = quick_sort([3, 1, 2]); assert codeflash_output == [1, 2, 3]
"""
test_source_2 = """def test_quick_sort():
a=1
b=2
c=3
result = quick_sort([5, 2, 8])
assert result == [2, 5, 8]
"""
qualified_name = "quick_sort"
generated_test_1 = GeneratedTests(
generated_original_test_source=test_source_1,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_test_2 = GeneratedTests(
generated_original_test_source=test_source_2,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test_1, generated_test_2])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='0'))
original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000,iteration_id='3'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000,iteration_id='3'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that comments were added to both test files
modified_source_1 = result.generated_tests[0].generated_original_test_source
modified_source_2 = result.generated_tests[1].generated_original_test_source
assert "# 500μs -> 300μs" in modified_source_1
assert "# 800μs -> 600μs" in modified_source_2
def test_preserved_test_attributes_no_cfo(self, test_config):
"""Test that other test attributes are preserved during modification."""
os.chdir(test_config.project_root_path)
test_source = """def test_bubble_sort():
result = bubble_sort([3, 1, 2])
assert result == [1, 2, 3]
"""
qualified_name = "bubble_sort"
original_behavior_source = "behavior test source"
original_perf_source = "perf test source"
original_behavior_path=test_config.tests_root / "test_module.py"
original_perf_path=test_config.tests_root / "test_perf.py"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source=original_behavior_source,
instrumented_perf_test_source=original_perf_source,
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000,iteration_id='0'))
optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000,iteration_id='0'))
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that other attributes are preserved
modified_test = result.generated_tests[0]
assert modified_test.instrumented_behavior_test_source == original_behavior_source
assert modified_test.instrumented_perf_test_source == original_perf_source
assert modified_test.behavior_file_path == original_behavior_path
assert modified_test.perf_file_path == original_perf_path
# Check that only the generated_original_test_source was modified
assert "# 500μs -> 300μs" in modified_test.generated_original_test_source
def test_multistatement_line_handling_no_cfo(self, test_config):
"""Test that runtime comments work correctly with multiple statements on one line."""
os.chdir(test_config.project_root_path)
test_source = """def test_mutation_of_input():
# Test that the input list is mutated in-place and returned
arr = [3, 1, 2]
res1 = sorter(arr); result = res1
assert result == [1, 2, 3]
assert arr == [1, 2, 3] # Input should be mutated
"""
qualified_name = "sorter"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Create test results
original_test_results = TestResults()
optimized_test_results = TestResults()
original_test_results.add(self.create_test_invocation("test_mutation_of_input", 19_000,iteration_id='1')) # 19μs
optimized_test_results.add(self.create_test_invocation("test_mutation_of_input", 14_000,iteration_id='1')) # 14μs
original_runtimes = original_test_results.usable_runtime_data_by_test_case()
optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case()
# Test the functionality
result = add_runtime_comments_to_generated_tests(qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes)
# Check that comments were added to the correct line
modified_source = result.generated_tests[0].generated_original_test_source
assert "# 19.0μs -> 14.0μs" in modified_source
# Verify the comment is on the line with codeflash_output assignment
lines = modified_source.split("\n")
codeflash_line = None
for line in lines:
if "res1 = sorter(arr)" in line:
codeflash_line = line
break
assert codeflash_line is not None, "Could not find codeflash_output assignment line"
assert "# 19.0μs -> 14.0μs" in codeflash_line, f"Comment not found in the correct line: {codeflash_line}"
def test_add_runtime_comments_simple_function_no_cfo(self, test_config):
"""Test adding runtime comments to a simple test function."""
os.chdir(test_config.project_root_path)
test_source = '''def test_function():
result = some_function(); assert result == expected
'''
qualified_name = "some_function"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
invocation_id = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="0",
)
original_runtimes = {invocation_id: [1000000000, 1200000000]} # 1s, 1.2s in nanoseconds
optimized_runtimes = {invocation_id: [500000000, 600000000]} # 0.5s, 0.6s in nanoseconds
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
expected_source = '''def test_function():
result = some_function(); assert result == expected # 1.00s -> 500ms (100% faster)
'''
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
def test_add_runtime_comments_class_method_no_cfo(self, test_config):
"""Test adding runtime comments to a test method within a class."""
os.chdir(test_config.project_root_path)
test_source = '''class TestClass:
def test_function(self):
result = some_function()
assert result == expected
'''
qualified_name = "some_function"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
invocation_id = InvocationId(
test_module_path="tests.test_module",
test_class_name="TestClass",
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="0",
)
original_runtimes = {invocation_id: [2000000000]} # 2s in nanoseconds
optimized_runtimes = {invocation_id: [1000000000]} # 1s in nanoseconds
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
expected_source = '''class TestClass:
def test_function(self):
result = some_function() # 2.00s -> 1.00s (100% faster)
assert result == expected
'''
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
def test_add_runtime_comments_multiple_assignments_no_cfo(self, test_config):
"""Test adding runtime comments when there are multiple codeflash_output assignments."""
os.chdir(test_config.project_root_path)
test_source = '''def test_function():
setup_data = prepare_test()
codeflash_output = some_function(); assert codeflash_output == expected
result = another_function(); assert result == expected2
codeflash_output = some_function()
assert codeflash_output == expected2
'''
qualified_name = "some_function"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
invocation_id1 = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="1",
)
invocation_id2 = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="5",
)
original_runtimes = {invocation_id1: [1500000000], invocation_id2: [10]} # 1.5s in nanoseconds
optimized_runtimes = {invocation_id1: [750000000], invocation_id2: [5]} # 0.75s in nanoseconds
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
expected_source = '''def test_function():
setup_data = prepare_test()
codeflash_output = some_function(); assert codeflash_output == expected # 1.50s -> 750ms (100% faster)
result = another_function(); assert result == expected2
codeflash_output = some_function() # 10ns -> 5ns (100% faster)
assert codeflash_output == expected2
'''
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source
def test_add_runtime_comments_no_matching_runtimes_no_cfo(self, test_config):
"""Test that source remains unchanged when no matching runtimes are found."""
os.chdir(test_config.project_root_path)
test_source = '''def test_function():
result = some_function()
assert result == expected
'''
qualified_name = "some_function"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
# Different invocation ID that won't match
invocation_id = InvocationId(
test_module_path="tests.other_module",
test_class_name=None,
test_function_name="other_function",
function_getting_tested="some_other_function",
iteration_id="0",
)
original_runtimes = {invocation_id: [1000000000]}
optimized_runtimes = {invocation_id: [500000000]}
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
# Source should remain unchanged
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == test_source
def test_add_runtime_comments_multiple_tests_no_cfo(self, test_config):
"""Test adding runtime comments to multiple generated tests."""
os.chdir(test_config.project_root_path)
test_source1 = '''def test_function1():
result = some_function()
assert result == expected
'''
test_source2 = '''def test_function2():
result = some_function()
assert result == expected
'''
qualified_name = "some_function"
generated_test1 = GeneratedTests(
generated_original_test_source=test_source1,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module1.py",
perf_file_path=test_config.tests_root / "test_perf1.py"
)
generated_test2 = GeneratedTests(
generated_original_test_source=test_source2,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module2.py",
perf_file_path=test_config.tests_root / "test_perf2.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test1, generated_test2])
invocation_id1 = InvocationId(
test_module_path="tests.test_module1",
test_class_name=None,
test_function_name="test_function1",
function_getting_tested="some_function",
iteration_id="0",
)
invocation_id2 = InvocationId(
test_module_path="tests.test_module2",
test_class_name=None,
test_function_name="test_function2",
function_getting_tested="some_function", # not used in this test throughout the entire test file
iteration_id = "0",
)
original_runtimes = {
invocation_id1: [1000000000], # 1s
invocation_id2: [2000000000], # 2s
}
optimized_runtimes = {
invocation_id1: [500000000], # 0.5s
invocation_id2: [800000000], # 0.8s
}
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
expected_source1 = '''def test_function1():
result = some_function() # 1.00s -> 500ms (100% faster)
assert result == expected
'''
expected_source2 = '''def test_function2():
result = some_function() # 2.00s -> 800ms (150% faster)
assert result == expected
'''
assert len(result.generated_tests) == 2
assert result.generated_tests[0].generated_original_test_source == expected_source1
assert result.generated_tests[1].generated_original_test_source == expected_source2
def test_add_runtime_comments_performance_regression_no_cfo(self, test_config):
"""Test adding runtime comments when optimized version is slower (negative performance gain)."""
os.chdir(test_config.project_root_path)
test_source = '''def test_function():
result = some_function(); assert codeflash_output == expected
codeflash_output = some_function()
assert codeflash_output == expected
'''
qualified_name = "some_function"
generated_test = GeneratedTests(
generated_original_test_source=test_source,
instrumented_behavior_test_source="",
instrumented_perf_test_source="",
behavior_file_path=test_config.tests_root / "test_module.py",
perf_file_path=test_config.tests_root / "test_perf.py"
)
generated_tests = GeneratedTestsList(generated_tests=[generated_test])
invocation_id1 = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="0",
)
invocation_id2 = InvocationId(
test_module_path="tests.test_module",
test_class_name=None,
test_function_name="test_function",
function_getting_tested="some_function",
iteration_id="2",
)
original_runtimes = {invocation_id1: [1000000000], invocation_id2: [2]} # 1s
optimized_runtimes = {invocation_id1: [1500000000], invocation_id2: [1]} # 1.5s (slower!)
result = add_runtime_comments_to_generated_tests(
qualified_name, test_config, generated_tests, original_runtimes, optimized_runtimes
)
expected_source = '''def test_function():
result = some_function(); assert codeflash_output == expected # 1.00s -> 1.50s (33.3% slower)
codeflash_output = some_function() # 2ns -> 1ns (100% faster)
assert codeflash_output == expected
'''
assert len(result.generated_tests) == 1
assert result.generated_tests[0].generated_original_test_source == expected_source