codeflash-agent/packages/codeflash-python/tests/test_instrument_all_and_run.py
Kevin Turcios 919a673be2
Fix pre-existing CI lint and test failures (#40)
* chore: add gitignore entries for local eval repos, e2e fixtures, and env files

* fix: restore clean bubble_sort_method.py test fixture

The call-site ID commit re-contaminated this file with instrumentation
decorators, causing tests to fail with missing CODEFLASH_LOOP_INDEX.

* fix: resolve ruff and mypy errors in codeflash-python

- Add import-not-found ignores for optional torch/jax imports
- Extract magic column index to _STDOUT_COLUMN_INDEX constant
- Fix unused variable in _instrument_sync.py
- Cast cpu_time_ns to int for mypy arg-type

* fix: add skip markers for optional deps and apply ruff formatting to tests

Skip torch/jax/tensorflow tests when those packages are not installed.
Move has_module helper to conftest.py for reuse across test files.
Apply ruff format to all test files that drifted.

* fix: resolve remaining ruff format and mypy errors

- Add missing blank line in conftest.py (ruff format)
- Remove unused import-untyped ignore on jax import (mypy unused-ignore)
- Add type: ignore comments for object-typed SQLite row values

* chore: bump codeflash-python to 0.1.1.dev0
2026-04-28 18:39:46 -05:00

664 lines
24 KiB
Python

from __future__ import annotations
import importlib
import os
import sys
import tempfile
from pathlib import Path
from codeflash_python._model import (
FunctionParent,
FunctionToOptimize,
TestingMode,
)
from codeflash_python.test_discovery.models import CodePosition, TestType
from codeflash_python.testing._instrument_async import write_async_helper_file
from codeflash_python.testing._instrument_capture import (
instrument_codeflash_capture,
)
from codeflash_python.testing._instrument_sync import (
add_sync_decorator_to_function,
)
from codeflash_python.testing._instrumentation import (
inject_profiling_into_existing_test,
)
from codeflash_python.testing._parse_results import parse_test_results
from codeflash_python.testing._test_runner import run_behavioral_tests
from codeflash_python.testing.models import TestConfig, TestFile, TestFiles
from codeflash_python.verification._verification import compare_test_results
project_root = Path(__file__).parent.resolve()
def _run_and_parse(
test_files: TestFiles,
test_env: dict[str, str],
test_config: TestConfig,
) -> list[object]:
"""Run behavioral tests and parse results (replaces Optimizer.run_and_parse_tests)."""
xml_path, run_result, _, _ = run_behavioral_tests(
test_files=test_files,
test_env=test_env,
cwd=test_config.project_root_path,
pytest_cmd=test_config.pytest_cmd,
)
return parse_test_results(
test_xml_path=xml_path,
test_files=test_files,
test_config=test_config,
optimization_iteration=0,
run_result=run_result,
)
def test_bubble_sort_behavior_results() -> None:
code = """from code_to_optimize.bubble_sort import sorter
def test_sort():
input = [5, 4, 3, 2, 1, 0]
output = sorter(input)
assert output == [0, 1, 2, 3, 4, 5]
input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
output = sorter(input)
assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
test_path = (
project_root
/ "code_to_optimize/tests/pytest/test_perfinjector_bubble_sort_results_temp.py"
).resolve()
test_path_perf = (
project_root
/ "code_to_optimize/tests/pytest/test_perfinjector_bubble_sort_results_perf_temp.py"
).resolve()
fto_path = (project_root / "code_to_optimize/bubble_sort.py").resolve()
original_code = fto_path.read_text("utf-8")
try:
with test_path.open("w") as f:
f.write(code)
tests_root = (
project_root / "code_to_optimize/tests/pytest/"
).resolve()
project_root_path = project_root
original_cwd = Path.cwd()
run_cwd = project_root
func = FunctionToOptimize(
function_name="sorter",
parents=(),
file_path=Path(fto_path),
)
os.chdir(run_cwd)
success, new_test = inject_profiling_into_existing_test(
test_path,
[CodePosition(6, 13), CodePosition(10, 13)],
func,
project_root_path,
mode=TestingMode.BEHAVIOR,
)
os.chdir(original_cwd)
assert success
assert new_test is not None
with test_path.open("w") as f:
f.write(new_test)
# Write the async helper file (contains sync decorators too)
write_async_helper_file(project_root_path)
# Add sync decorator to the source function
add_sync_decorator_to_function(
fto_path,
func,
mode=TestingMode.BEHAVIOR,
project_root=project_root_path,
)
test_env = os.environ.copy()
test_env["CODEFLASH_TEST_ITERATION"] = "0"
test_env["CODEFLASH_LOOP_INDEX"] = "1"
test_type = TestType.EXISTING_UNIT_TEST
test_config = TestConfig(
tests_root=tests_root,
tests_project_rootdir=project_root_path,
project_root_path=project_root_path,
test_framework="pytest",
pytest_cmd="pytest",
)
test_files = TestFiles(
test_files=[
TestFile(
instrumented_behavior_file_path=test_path,
test_type=test_type,
original_file_path=test_path,
benchmarking_file_path=test_path_perf,
)
]
)
test_results = _run_and_parse(test_files, test_env, test_config)
# New decorator captures stdout directly -- the function prints two lines
assert test_results[0].id.function_getting_tested == "sorter"
assert test_results[0].id.test_class_name is None
assert test_results[0].id.test_function_name == "test_sort"
assert (
test_results[0].id.test_module_path
== "code_to_optimize.tests.pytest.test_perfinjector_bubble_sort_results_temp"
)
assert test_results[0].runtime > 0
assert test_results[0].did_pass
# return_value is ((args, kwargs, return_value),) in the new path
assert test_results[0].return_value[0][2] == [0, 1, 2, 3, 4, 5]
out_str = (
"codeflash stdout: Sorting list\nresult: [0, 1, 2, 3, 4, 5]\n"
)
assert test_results[0].stdout == out_str
assert test_results[1].id.function_getting_tested == "sorter"
assert test_results[1].id.test_class_name is None
assert test_results[1].id.test_function_name == "test_sort"
assert (
test_results[1].id.test_module_path
== "code_to_optimize.tests.pytest.test_perfinjector_bubble_sort_results_temp"
)
assert test_results[1].runtime > 0
assert test_results[1].did_pass
results2 = _run_and_parse(test_files, test_env, test_config)
match, _ = compare_test_results(test_results, results2)
assert match
finally:
fto_path.write_text(original_code, "utf-8")
test_path.unlink(missing_ok=True)
test_path_perf.unlink(missing_ok=True)
(project_root / "codeflash_async_wrapper.py").unlink(missing_ok=True)
def test_method_full_instrumentation() -> None:
code = """from code_to_optimize.bubble_sort_method import BubbleSorter
def test_sort():
input = [5, 4, 3, 2, 1, 0]
sort_class = BubbleSorter()
output = sort_class.sorter(input)
assert output == [0, 1, 2, 3, 4, 5]
input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
sort_class = BubbleSorter()
output = sort_class.sorter(input)
assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
fto_path = (
project_root / "code_to_optimize/bubble_sort_method.py"
).resolve()
original_code = fto_path.read_text("utf-8")
fto = FunctionToOptimize(
function_name="sorter",
parents=(FunctionParent(name="BubbleSorter", type="ClassDef"),),
file_path=Path(fto_path),
)
tests_root = (project_root / "code_to_optimize/tests/pytest/").resolve()
test_path = tests_root / "test_class_method_behavior_results_temp.py"
test_path_perf = (
tests_root / "test_class_method_behavior_results_perf_temp.py"
)
project_root_path = project_root
try:
# Write and instrument the test file
test_path.write_text(code, encoding="utf-8")
original_cwd = Path.cwd()
os.chdir(project_root_path)
success, new_test = inject_profiling_into_existing_test(
test_path,
[CodePosition(7, 13), CodePosition(12, 13)],
fto,
project_root_path,
)
os.chdir(original_cwd)
assert success
assert new_test is not None
test_path.write_text(new_test, encoding="utf-8")
# Write the async helper file and add sync decorator to source
write_async_helper_file(project_root_path)
add_sync_decorator_to_function(
fto_path,
fto,
mode=TestingMode.BEHAVIOR,
project_root=project_root_path,
)
# Add codeflash capture for __init__ state
instrument_codeflash_capture(fto, {}, tests_root)
test_env = os.environ.copy()
test_env["CODEFLASH_TEST_ITERATION"] = "0"
test_env["CODEFLASH_LOOP_INDEX"] = "1"
test_type = TestType.EXISTING_UNIT_TEST
test_config = TestConfig(
tests_root=tests_root,
tests_project_rootdir=project_root_path,
project_root_path=project_root_path,
test_framework="pytest",
pytest_cmd="pytest",
)
test_files = TestFiles(
test_files=[
TestFile(
instrumented_behavior_file_path=test_path,
test_type=test_type,
original_file_path=test_path,
benchmarking_file_path=test_path_perf,
)
]
)
test_results = _run_and_parse(test_files, test_env, test_config)
assert len(test_results) == 4
# Order: init results (from codeflash_capture) then sorter results (from sync decorator)
assert (
test_results[0].id.function_getting_tested
== "BubbleSorter.__init__"
)
assert test_results[0].id.test_function_name == "test_sort"
assert test_results[0].did_pass
assert test_results[0].return_value[0] == {"x": 0}
assert (
test_results[1].id.function_getting_tested
== "BubbleSorter.__init__"
)
assert test_results[1].id.test_function_name == "test_sort"
assert test_results[1].did_pass
assert test_results[1].return_value[0] == {"x": 0}
assert test_results[2].id.function_getting_tested == "sorter"
assert test_results[2].id.test_class_name is None
assert test_results[2].id.test_function_name == "test_sort"
assert (
test_results[2].id.test_module_path
== "code_to_optimize.tests.pytest.test_class_method_behavior_results_temp"
)
assert test_results[2].runtime > 0
assert test_results[2].did_pass
# return_value is ((args, kwargs, return_value),) in the new path
assert test_results[2].return_value[0][2] == [0, 1, 2, 3, 4, 5]
assert (
test_results[2].stdout
== "codeflash stdout : BubbleSorter.sorter() called\n"
)
match, _ = compare_test_results(test_results, test_results)
assert match
assert test_results[3].id.function_getting_tested == "sorter"
assert test_results[3].id.test_class_name is None
assert test_results[3].id.test_function_name == "test_sort"
assert (
test_results[3].id.test_module_path
== "code_to_optimize.tests.pytest.test_class_method_behavior_results_temp"
)
assert test_results[3].runtime > 0
assert test_results[3].did_pass
assert (
test_results[3].stdout
== "codeflash stdout : BubbleSorter.sorter() called\n"
)
results2 = _run_and_parse(test_files, test_env, test_config)
match, _ = compare_test_results(test_results, results2)
assert match
# Replace with optimized code that mutated instance attribute
optimized_code = """
class BubbleSorter:
def __init__(self, x=1):
self.x = x
def sorter(self, arr):
for i in range(len(arr)):
for j in range(len(arr) - 1):
if arr[j] > arr[j + 1]:
temp = arr[j]
arr[j] = arr[j + 1]
arr[j + 1] = temp
return arr
"""
fto_path.write_text(optimized_code, "utf-8")
# Force reload of module
module_name = "code_to_optimize.bubble_sort_method"
if module_name not in sys.modules:
__import__(module_name)
importlib.reload(sys.modules[module_name])
# Re-add sync decorator and codeflash capture to the new source
add_sync_decorator_to_function(
fto_path,
fto,
mode=TestingMode.BEHAVIOR,
project_root=project_root_path,
)
instrument_codeflash_capture(fto, {}, tests_root)
test_config = TestConfig(
tests_root=tests_root,
tests_project_rootdir=project_root_path,
project_root_path=project_root_path,
test_framework="pytest",
pytest_cmd="pytest",
)
test_files = TestFiles(
test_files=[
TestFile(
instrumented_behavior_file_path=test_path,
test_type=test_type,
original_file_path=test_path,
benchmarking_file_path=test_path_perf,
)
]
)
new_test_results = _run_and_parse(test_files, test_env, test_config)
assert len(new_test_results) == 4
# Order: init results then sorter results
assert (
new_test_results[0].id.function_getting_tested
== "BubbleSorter.__init__"
)
assert new_test_results[0].id.test_function_name == "test_sort"
assert new_test_results[0].did_pass
assert new_test_results[0].return_value[0] == {"x": 1}
assert (
new_test_results[1].id.function_getting_tested
== "BubbleSorter.__init__"
)
assert new_test_results[1].id.test_function_name == "test_sort"
assert new_test_results[1].did_pass
assert new_test_results[1].return_value[0] == {"x": 1}
assert new_test_results[2].id.function_getting_tested == "sorter"
assert new_test_results[2].id.test_class_name is None
assert new_test_results[2].id.test_function_name == "test_sort"
assert (
new_test_results[2].id.test_module_path
== "code_to_optimize.tests.pytest.test_class_method_behavior_results_temp"
)
assert new_test_results[2].runtime > 0
assert new_test_results[2].did_pass
assert new_test_results[2].return_value[0][2] == [0, 1, 2, 3, 4, 5]
assert new_test_results[3].id.function_getting_tested == "sorter"
assert new_test_results[3].id.test_class_name is None
assert new_test_results[3].id.test_function_name == "test_sort"
assert (
new_test_results[3].id.test_module_path
== "code_to_optimize.tests.pytest.test_class_method_behavior_results_temp"
)
assert new_test_results[3].runtime > 0
assert new_test_results[3].did_pass
match, _ = compare_test_results(test_results, new_test_results)
assert not match
finally:
fto_path.write_text(original_code, "utf-8")
test_path.unlink(missing_ok=True)
test_path_perf.unlink(missing_ok=True)
(project_root / "codeflash_async_wrapper.py").unlink(missing_ok=True)
def test_classmethod_full_instrumentation() -> None:
code = """from code_to_optimize.bubble_sort_method import BubbleSorter
def test_sort():
input = [5, 4, 3, 2, 1, 0]
output = BubbleSorter.sorter_classmethod(input)
assert output == [0, 1, 2, 3, 4, 5]
input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
output = BubbleSorter.sorter_classmethod(input)
assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
fto_path = (
project_root / "code_to_optimize/bubble_sort_method.py"
).resolve()
original_code = fto_path.read_text("utf-8")
fto = FunctionToOptimize(
function_name="sorter_classmethod",
parents=(FunctionParent(name="BubbleSorter", type="ClassDef"),),
file_path=Path(fto_path),
)
tests_root = (project_root / "code_to_optimize/tests/pytest/").resolve()
test_path = tests_root / "test_classmethod_behavior_results_temp.py"
test_path_perf = (
tests_root / "test_classmethod_behavior_results_perf_temp.py"
)
project_root_path = project_root
try:
# Write and instrument the test file
test_path.write_text(code, encoding="utf-8")
original_cwd = Path.cwd()
os.chdir(project_root_path)
success, new_test = inject_profiling_into_existing_test(
test_path,
[CodePosition(6, 13), CodePosition(10, 13)],
fto,
project_root_path,
)
os.chdir(original_cwd)
assert success
assert new_test is not None
test_path.write_text(new_test, encoding="utf-8")
# Write the async helper file and add sync decorator to source
write_async_helper_file(project_root_path)
add_sync_decorator_to_function(
fto_path,
fto,
mode=TestingMode.BEHAVIOR,
project_root=project_root_path,
)
# Add codeflash capture
instrument_codeflash_capture(fto, {}, tests_root)
test_env = os.environ.copy()
test_env["CODEFLASH_TEST_ITERATION"] = "0"
test_env["CODEFLASH_LOOP_INDEX"] = "1"
test_type = TestType.EXISTING_UNIT_TEST
test_config = TestConfig(
tests_root=tests_root,
tests_project_rootdir=project_root_path,
project_root_path=project_root_path,
test_framework="pytest",
pytest_cmd="pytest",
)
test_files = TestFiles(
test_files=[
TestFile(
instrumented_behavior_file_path=test_path,
test_type=test_type,
original_file_path=test_path,
benchmarking_file_path=test_path_perf,
)
]
)
test_results = _run_and_parse(test_files, test_env, test_config)
assert len(test_results) == 2
assert (
test_results[0].id.function_getting_tested == "sorter_classmethod"
)
assert test_results[0].id.test_class_name is None
assert test_results[0].id.test_function_name == "test_sort"
assert (
test_results[0].id.test_module_path
== "code_to_optimize.tests.pytest.test_classmethod_behavior_results_temp"
)
assert test_results[0].runtime > 0
assert test_results[0].did_pass
assert test_results[0].return_value[0][2] == [0, 1, 2, 3, 4, 5]
assert (
test_results[0].stdout
== "codeflash stdout : BubbleSorter.sorter_classmethod() called\n"
)
match, _ = compare_test_results(test_results, test_results)
assert match
assert (
test_results[1].id.function_getting_tested == "sorter_classmethod"
)
assert test_results[1].id.test_class_name is None
assert test_results[1].id.test_function_name == "test_sort"
assert (
test_results[1].id.test_module_path
== "code_to_optimize.tests.pytest.test_classmethod_behavior_results_temp"
)
assert test_results[1].runtime > 0
assert test_results[1].did_pass
assert (
test_results[1].stdout
== "codeflash stdout : BubbleSorter.sorter_classmethod() called\n"
)
results2 = _run_and_parse(test_files, test_env, test_config)
match, _ = compare_test_results(test_results, results2)
assert match
finally:
fto_path.write_text(original_code, "utf-8")
test_path.unlink(missing_ok=True)
test_path_perf.unlink(missing_ok=True)
(project_root / "codeflash_async_wrapper.py").unlink(missing_ok=True)
def test_staticmethod_full_instrumentation() -> None:
code = """from code_to_optimize.bubble_sort_method import BubbleSorter
def test_sort():
input = [5, 4, 3, 2, 1, 0]
output = BubbleSorter.sorter_staticmethod(input)
assert output == [0, 1, 2, 3, 4, 5]
input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
output = BubbleSorter.sorter_staticmethod(input)
assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
fto_path = (
project_root / "code_to_optimize/bubble_sort_method.py"
).resolve()
original_code = fto_path.read_text("utf-8")
fto = FunctionToOptimize(
function_name="sorter_staticmethod",
parents=(FunctionParent(name="BubbleSorter", type="ClassDef"),),
file_path=Path(fto_path),
)
tests_root = (project_root / "code_to_optimize/tests/pytest/").resolve()
test_path = tests_root / "test_staticmethod_behavior_results_temp.py"
test_path_perf = (
tests_root / "test_staticmethod_behavior_results_perf_temp.py"
)
project_root_path = project_root
try:
# Write and instrument the test file
test_path.write_text(code, encoding="utf-8")
original_cwd = Path.cwd()
os.chdir(project_root_path)
success, new_test = inject_profiling_into_existing_test(
test_path,
[CodePosition(6, 13), CodePosition(10, 13)],
fto,
project_root_path,
)
os.chdir(original_cwd)
assert success
assert new_test is not None
test_path.write_text(new_test, encoding="utf-8")
# Write the async helper file and add sync decorator to source
write_async_helper_file(project_root_path)
add_sync_decorator_to_function(
fto_path,
fto,
mode=TestingMode.BEHAVIOR,
project_root=project_root_path,
)
# Add codeflash capture
instrument_codeflash_capture(fto, {}, tests_root)
test_env = os.environ.copy()
test_env["CODEFLASH_TEST_ITERATION"] = "0"
test_env["CODEFLASH_LOOP_INDEX"] = "1"
test_type = TestType.EXISTING_UNIT_TEST
test_config = TestConfig(
tests_root=tests_root,
tests_project_rootdir=project_root_path,
project_root_path=project_root_path,
test_framework="pytest",
pytest_cmd="pytest",
)
test_files = TestFiles(
test_files=[
TestFile(
instrumented_behavior_file_path=test_path,
test_type=test_type,
original_file_path=test_path,
benchmarking_file_path=test_path_perf,
)
]
)
test_results = _run_and_parse(test_files, test_env, test_config)
assert len(test_results) == 2
assert (
test_results[0].id.function_getting_tested == "sorter_staticmethod"
)
assert test_results[0].id.test_class_name is None
assert test_results[0].id.test_function_name == "test_sort"
assert (
test_results[0].id.test_module_path
== "code_to_optimize.tests.pytest.test_staticmethod_behavior_results_temp"
)
assert test_results[0].runtime > 0
assert test_results[0].did_pass
assert test_results[0].return_value[0][2] == [0, 1, 2, 3, 4, 5]
assert (
test_results[0].stdout
== "codeflash stdout : BubbleSorter.sorter_staticmethod() called\n"
)
match, _ = compare_test_results(test_results, test_results)
assert match
assert (
test_results[1].id.function_getting_tested == "sorter_staticmethod"
)
assert test_results[1].id.test_class_name is None
assert test_results[1].id.test_function_name == "test_sort"
assert (
test_results[1].id.test_module_path
== "code_to_optimize.tests.pytest.test_staticmethod_behavior_results_temp"
)
assert test_results[1].runtime > 0
assert test_results[1].did_pass
assert (
test_results[1].stdout
== "codeflash stdout : BubbleSorter.sorter_staticmethod() called\n"
)
results2 = _run_and_parse(test_files, test_env, test_config)
match, _ = compare_test_results(test_results, results2)
assert match
finally:
fto_path.write_text(original_code, "utf-8")
test_path.unlink(missing_ok=True)
test_path_perf.unlink(missing_ok=True)
(project_root / "codeflash_async_wrapper.py").unlink(missing_ok=True)