mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
* chore: add gitignore entries for local eval repos, e2e fixtures, and env files * fix: restore clean bubble_sort_method.py test fixture The call-site ID commit re-contaminated this file with instrumentation decorators, causing tests to fail with missing CODEFLASH_LOOP_INDEX. * fix: resolve ruff and mypy errors in codeflash-python - Add import-not-found ignores for optional torch/jax imports - Extract magic column index to _STDOUT_COLUMN_INDEX constant - Fix unused variable in _instrument_sync.py - Cast cpu_time_ns to int for mypy arg-type * fix: add skip markers for optional deps and apply ruff formatting to tests Skip torch/jax/tensorflow tests when those packages are not installed. Move has_module helper to conftest.py for reuse across test files. Apply ruff format to all test files that drifted. * fix: resolve remaining ruff format and mypy errors - Add missing blank line in conftest.py (ruff format) - Remove unused import-untyped ignore on jax import (mypy unused-ignore) - Add type: ignore comments for object-typed SQLite row values * chore: bump codeflash-python to 0.1.1.dev0
451 lines
15 KiB
Python
451 lines
15 KiB
Python
from __future__ import annotations
|
|
|
|
import importlib
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from codeflash_python._model import (
|
|
FunctionParent,
|
|
FunctionToOptimize,
|
|
TestingMode,
|
|
VerificationType,
|
|
)
|
|
from codeflash_python.test_discovery.models import CodePosition, TestType
|
|
from codeflash_python.testing._instrument_async import write_async_helper_file
|
|
from codeflash_python.testing._instrument_capture import (
|
|
instrument_codeflash_capture,
|
|
)
|
|
from codeflash_python.testing._instrument_sync import (
|
|
add_sync_decorator_to_function,
|
|
)
|
|
from codeflash_python.testing._instrumentation import (
|
|
inject_profiling_into_existing_test,
|
|
)
|
|
from codeflash_python.testing._parse_results import parse_test_results
|
|
from codeflash_python.testing._test_runner import run_behavioral_tests
|
|
from codeflash_python.testing.models import TestConfig, TestFile, TestFiles
|
|
from codeflash_python.verification._verification import compare_test_results
|
|
|
|
project_root = Path(__file__).parent.resolve()
|
|
|
|
|
|
def test_class_method_test_instrumentation_only() -> None:
|
|
"""Verifies instrumented test execution and result parsing without codeflash capture."""
|
|
raw_test_code = """from code_to_optimize.bubble_sort_method import BubbleSorter
|
|
|
|
|
|
def test_single_element_list():
|
|
obj = BubbleSorter()
|
|
result = obj.sorter([42])
|
|
"""
|
|
|
|
# Init paths
|
|
test_path = (
|
|
project_root
|
|
/ "code_to_optimize/tests/pytest/test_aiservice_behavior_results_temp.py"
|
|
).resolve()
|
|
test_path_perf = (
|
|
project_root
|
|
/ "code_to_optimize/tests/pytest/test_aiservice_behavior_results_perf_temp.py"
|
|
).resolve()
|
|
tests_root = project_root / "code_to_optimize/tests/pytest/"
|
|
project_root_path = project_root
|
|
run_cwd = project_root
|
|
old_cwd = os.getcwd()
|
|
os.chdir(run_cwd)
|
|
fto_path = (
|
|
project_root / "code_to_optimize/bubble_sort_method.py"
|
|
).resolve()
|
|
original_code = fto_path.read_text("utf-8")
|
|
|
|
function_to_optimize = FunctionToOptimize(
|
|
"sorter",
|
|
fto_path,
|
|
parents=(FunctionParent("BubbleSorter", "ClassDef"),),
|
|
)
|
|
|
|
try:
|
|
# Write raw test, instrument it, then add decorator to source
|
|
test_path.write_text(raw_test_code, encoding="utf-8")
|
|
|
|
success, new_test = inject_profiling_into_existing_test(
|
|
test_path,
|
|
[CodePosition(6, 13)],
|
|
function_to_optimize,
|
|
project_root_path,
|
|
mode=TestingMode.BEHAVIOR,
|
|
)
|
|
assert success
|
|
assert new_test is not None
|
|
test_path.write_text(new_test, encoding="utf-8")
|
|
|
|
# Write the async helper file and add sync decorator to source
|
|
write_async_helper_file(project_root_path)
|
|
add_sync_decorator_to_function(
|
|
fto_path,
|
|
function_to_optimize,
|
|
mode=TestingMode.BEHAVIOR,
|
|
project_root=project_root_path,
|
|
)
|
|
|
|
test_config = TestConfig(
|
|
tests_root=tests_root,
|
|
tests_project_rootdir=project_root_path,
|
|
project_root_path=project_root_path,
|
|
test_framework="pytest",
|
|
pytest_cmd="pytest",
|
|
)
|
|
test_env = os.environ.copy()
|
|
test_env["CODEFLASH_TEST_ITERATION"] = "0"
|
|
test_env["CODEFLASH_LOOP_INDEX"] = "1"
|
|
test_type = TestType.EXISTING_UNIT_TEST
|
|
test_files = TestFiles(
|
|
test_files=[
|
|
TestFile(
|
|
instrumented_behavior_file_path=test_path,
|
|
test_type=test_type,
|
|
original_file_path=test_path,
|
|
benchmarking_file_path=test_path_perf,
|
|
)
|
|
]
|
|
)
|
|
xml_path, run_result, _, _ = run_behavioral_tests(
|
|
test_files=test_files,
|
|
test_env=test_env,
|
|
cwd=test_config.project_root_path,
|
|
pytest_cmd=test_config.pytest_cmd,
|
|
)
|
|
test_results = parse_test_results(
|
|
test_xml_path=xml_path,
|
|
test_files=test_files,
|
|
test_config=test_config,
|
|
optimization_iteration=0,
|
|
run_result=run_result,
|
|
)
|
|
assert test_results[0].id.function_getting_tested == "sorter"
|
|
assert (
|
|
test_results[0].stdout
|
|
== "codeflash stdout : BubbleSorter.sorter() called\n"
|
|
)
|
|
assert (
|
|
test_results[0].id.test_function_name == "test_single_element_list"
|
|
)
|
|
assert test_results[0].did_pass
|
|
# return_value is ((args, kwargs, return_value),) in the new path
|
|
assert test_results[0].return_value[0][2] == [42]
|
|
|
|
# Replace with optimized code that mutated instance attribute
|
|
optimized_code_mutated_attr = """
|
|
import sys
|
|
|
|
|
|
class BubbleSorter:
|
|
|
|
def __init__(self, x=1):
|
|
self.x = x
|
|
|
|
def sorter(self, arr):
|
|
print("BubbleSorter.sorter() called")
|
|
for i in range(len(arr)):
|
|
for j in range(len(arr) - 1):
|
|
if arr[j] > arr[j + 1]:
|
|
temp = arr[j]
|
|
arr[j] = arr[j + 1]
|
|
arr[j + 1] = temp
|
|
print("stderr test", file=sys.stderr)
|
|
return arr
|
|
"""
|
|
fto_path.write_text(optimized_code_mutated_attr, "utf-8")
|
|
|
|
# Re-add sync decorator to the new source
|
|
add_sync_decorator_to_function(
|
|
fto_path,
|
|
function_to_optimize,
|
|
mode=TestingMode.BEHAVIOR,
|
|
project_root=project_root_path,
|
|
)
|
|
|
|
xml_path, run_result, _, _ = run_behavioral_tests(
|
|
test_files=test_files,
|
|
test_env=test_env,
|
|
cwd=test_config.project_root_path,
|
|
pytest_cmd=test_config.pytest_cmd,
|
|
)
|
|
test_results_mutated_attr = parse_test_results(
|
|
test_xml_path=xml_path,
|
|
test_files=test_files,
|
|
test_config=test_config,
|
|
optimization_iteration=0,
|
|
run_result=run_result,
|
|
)
|
|
# In the new decorator-based path, args (including self) are captured,
|
|
# so init state changes ARE detected even without explicit codeflash_capture
|
|
match, _ = compare_test_results(
|
|
test_results, test_results_mutated_attr
|
|
)
|
|
assert not match
|
|
assert (
|
|
test_results_mutated_attr[0].stdout
|
|
== "BubbleSorter.sorter() called\n"
|
|
)
|
|
finally:
|
|
fto_path.write_text(original_code, "utf-8")
|
|
test_path.unlink(missing_ok=True)
|
|
test_path_perf.unlink(missing_ok=True)
|
|
(project_root / "codeflash_async_wrapper.py").unlink(missing_ok=True)
|
|
os.chdir(old_cwd)
|
|
|
|
|
|
def test_class_method_full_instrumentation() -> None:
|
|
"""Verifies full instrumentation with codeflash capture for instance state verification."""
|
|
raw_test_code = """from code_to_optimize.bubble_sort_method import BubbleSorter
|
|
|
|
|
|
def test_single_element_list():
|
|
obj = BubbleSorter()
|
|
result = obj.sorter([3, 2, 1])
|
|
"""
|
|
|
|
# Init paths
|
|
test_path = (
|
|
project_root
|
|
/ "code_to_optimize/tests/pytest/test_aiservice_behavior_results_temp.py"
|
|
).resolve()
|
|
test_path_perf = (
|
|
project_root
|
|
/ "code_to_optimize/tests/pytest/test_aiservice_behavior_results_perf_temp.py"
|
|
).resolve()
|
|
tests_root = project_root / "code_to_optimize/tests/pytest/"
|
|
project_root_path = project_root
|
|
|
|
fto_path = (
|
|
project_root / "code_to_optimize/bubble_sort_method.py"
|
|
).resolve()
|
|
original_code = fto_path.read_text("utf-8")
|
|
function_to_optimize = FunctionToOptimize(
|
|
"sorter",
|
|
fto_path,
|
|
parents=(FunctionParent("BubbleSorter", "ClassDef"),),
|
|
)
|
|
|
|
try:
|
|
# Write raw test, instrument it, then add decorator to source
|
|
test_path.write_text(raw_test_code, encoding="utf-8")
|
|
|
|
original_cwd = Path.cwd()
|
|
os.chdir(project_root_path)
|
|
success, new_test = inject_profiling_into_existing_test(
|
|
test_path,
|
|
[CodePosition(6, 13)],
|
|
function_to_optimize,
|
|
project_root_path,
|
|
mode=TestingMode.BEHAVIOR,
|
|
)
|
|
os.chdir(original_cwd)
|
|
assert success
|
|
assert new_test is not None
|
|
test_path.write_text(new_test, encoding="utf-8")
|
|
|
|
# Write the async helper file and add sync decorator to source
|
|
write_async_helper_file(project_root_path)
|
|
add_sync_decorator_to_function(
|
|
fto_path,
|
|
function_to_optimize,
|
|
mode=TestingMode.BEHAVIOR,
|
|
project_root=project_root_path,
|
|
)
|
|
|
|
# Add codeflash capture decorator for __init__ state tracking
|
|
instrument_codeflash_capture(function_to_optimize, {}, tests_root)
|
|
|
|
test_config = TestConfig(
|
|
tests_root=tests_root,
|
|
tests_project_rootdir=project_root_path,
|
|
project_root_path=project_root_path,
|
|
test_framework="pytest",
|
|
pytest_cmd="pytest",
|
|
)
|
|
test_env = os.environ.copy()
|
|
test_env["CODEFLASH_TEST_ITERATION"] = "0"
|
|
test_env["CODEFLASH_LOOP_INDEX"] = "1"
|
|
test_type = TestType.EXISTING_UNIT_TEST
|
|
test_files = TestFiles(
|
|
test_files=[
|
|
TestFile(
|
|
instrumented_behavior_file_path=test_path,
|
|
test_type=test_type,
|
|
original_file_path=test_path,
|
|
benchmarking_file_path=test_path_perf,
|
|
)
|
|
]
|
|
)
|
|
xml_path, run_result, _, _ = run_behavioral_tests(
|
|
test_files=test_files,
|
|
test_env=test_env,
|
|
cwd=test_config.project_root_path,
|
|
pytest_cmd=test_config.pytest_cmd,
|
|
)
|
|
test_results = parse_test_results(
|
|
test_xml_path=xml_path,
|
|
test_files=test_files,
|
|
test_config=test_config,
|
|
optimization_iteration=0,
|
|
run_result=run_result,
|
|
)
|
|
# Verify instance_state result (from codeflash_capture)
|
|
assert (
|
|
test_results[0].id.function_getting_tested
|
|
== "BubbleSorter.__init__"
|
|
)
|
|
assert (
|
|
test_results[0].id.test_function_name == "test_single_element_list"
|
|
)
|
|
assert test_results[0].did_pass
|
|
assert test_results[0].return_value[0] == {"x": 0}
|
|
assert test_results[0].stdout == ""
|
|
|
|
# Verify function_to_optimize result (from sync decorator)
|
|
assert test_results[1].id.function_getting_tested == "sorter"
|
|
assert (
|
|
test_results[1].id.test_function_name == "test_single_element_list"
|
|
)
|
|
assert test_results[1].did_pass
|
|
# return_value is ((args, kwargs, return_value),) in the new path
|
|
assert test_results[1].return_value[0][2] == [1, 2, 3]
|
|
assert (
|
|
test_results[1].stdout
|
|
== "codeflash stdout : BubbleSorter.sorter() called\n"
|
|
)
|
|
|
|
# Replace with optimized code that mutated instance attribute
|
|
optimized_code_mutated_attr = """
|
|
import sys
|
|
|
|
|
|
class BubbleSorter:
|
|
|
|
def __init__(self, x=1):
|
|
self.x = x
|
|
|
|
def sorter(self, arr):
|
|
print("BubbleSorter.sorter() called")
|
|
for i in range(len(arr)):
|
|
for j in range(len(arr) - 1):
|
|
if arr[j] > arr[j + 1]:
|
|
temp = arr[j]
|
|
arr[j] = arr[j + 1]
|
|
arr[j + 1] = temp
|
|
print("stderr test", file=sys.stderr)
|
|
return arr
|
|
"""
|
|
fto_path.write_text(optimized_code_mutated_attr, "utf-8")
|
|
# Force reload of module
|
|
module_name = "code_to_optimize.bubble_sort_method"
|
|
if module_name not in sys.modules:
|
|
__import__(module_name)
|
|
importlib.reload(sys.modules[module_name])
|
|
|
|
# Re-add sync decorator and codeflash capture to the new source
|
|
add_sync_decorator_to_function(
|
|
fto_path,
|
|
function_to_optimize,
|
|
mode=TestingMode.BEHAVIOR,
|
|
project_root=project_root_path,
|
|
)
|
|
instrument_codeflash_capture(function_to_optimize, {}, tests_root)
|
|
xml_path, run_result, _, _ = run_behavioral_tests(
|
|
test_files=test_files,
|
|
test_env=test_env,
|
|
cwd=test_config.project_root_path,
|
|
pytest_cmd=test_config.pytest_cmd,
|
|
)
|
|
test_results_mutated_attr = parse_test_results(
|
|
test_xml_path=xml_path,
|
|
test_files=test_files,
|
|
test_config=test_config,
|
|
optimization_iteration=0,
|
|
run_result=run_result,
|
|
)
|
|
assert (
|
|
test_results_mutated_attr[0].id.function_getting_tested
|
|
== "BubbleSorter.__init__"
|
|
)
|
|
assert test_results_mutated_attr[0].return_value[0] == {"x": 1}
|
|
assert (
|
|
test_results_mutated_attr[0].verification_type
|
|
== VerificationType.INIT_STATE_FTO
|
|
)
|
|
assert test_results_mutated_attr[0].stdout == ""
|
|
# The test should fail because the instance attribute was mutated
|
|
match, _ = compare_test_results(
|
|
test_results, test_results_mutated_attr
|
|
)
|
|
assert not match
|
|
|
|
# Replace with optimized code that did not mutate existing
|
|
# instance attribute, but added a new one
|
|
optimized_code_new_attr = """
|
|
import sys
|
|
|
|
|
|
class BubbleSorter:
|
|
def __init__(self, x=0):
|
|
self.x = x
|
|
self.y = 2
|
|
|
|
def sorter(self, arr):
|
|
print("BubbleSorter.sorter() called")
|
|
for i in range(len(arr)):
|
|
for j in range(len(arr) - 1):
|
|
if arr[j] > arr[j + 1]:
|
|
temp = arr[j]
|
|
arr[j] = arr[j + 1]
|
|
arr[j + 1] = temp
|
|
print("stderr test", file=sys.stderr)
|
|
return arr
|
|
"""
|
|
fto_path.write_text(optimized_code_new_attr, "utf-8")
|
|
importlib.reload(sys.modules[module_name])
|
|
|
|
# Re-add sync decorator and codeflash capture
|
|
add_sync_decorator_to_function(
|
|
fto_path,
|
|
function_to_optimize,
|
|
mode=TestingMode.BEHAVIOR,
|
|
project_root=project_root_path,
|
|
)
|
|
instrument_codeflash_capture(function_to_optimize, {}, tests_root)
|
|
xml_path, run_result, _, _ = run_behavioral_tests(
|
|
test_files=test_files,
|
|
test_env=test_env,
|
|
cwd=test_config.project_root_path,
|
|
pytest_cmd=test_config.pytest_cmd,
|
|
)
|
|
test_results_new_attr = parse_test_results(
|
|
test_xml_path=xml_path,
|
|
test_files=test_files,
|
|
test_config=test_config,
|
|
optimization_iteration=0,
|
|
run_result=run_result,
|
|
)
|
|
assert (
|
|
test_results_new_attr[0].id.function_getting_tested
|
|
== "BubbleSorter.__init__"
|
|
)
|
|
assert test_results_new_attr[0].return_value[0] == {"x": 0, "y": 2}
|
|
assert (
|
|
test_results_new_attr[0].verification_type
|
|
== VerificationType.INIT_STATE_FTO
|
|
)
|
|
assert test_results_new_attr[0].stdout == ""
|
|
# In the new decorator-based path, args (including self) are captured.
|
|
# Adding a new instance attribute changes self, so the comparison
|
|
# detects a difference even though codeflash_capture considers it additive.
|
|
match, _ = compare_test_results(test_results, test_results_new_attr)
|
|
assert not match
|
|
finally:
|
|
fto_path.write_text(original_code, "utf-8")
|
|
test_path.unlink(missing_ok=True)
|
|
test_path_perf.unlink(missing_ok=True)
|
|
(project_root / "codeflash_async_wrapper.py").unlink(missing_ok=True)
|