mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Measure both wall-clock time (perf_counter_ns) and CPU thread time (thread_time_ns) in instrumented test code. cpu_runtime is now a required int field on FunctionTestInvocation, stored in the SQLite test_results table as a 10th column. Also fixes the sleeptime.py bug (10e9 → 1e9 divisor) and removes the binary pickle parser (parse_test_return_values_bin) since no writer exists in the current codebase — SQLite is the sole data capture path.
509 lines
16 KiB
Python
509 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from codeflash_python._model import VerificationType
|
|
from codeflash_python.benchmarking.models import BenchmarkKey
|
|
from codeflash_python.test_discovery.models import TestType
|
|
from codeflash_python.testing.models import (
|
|
FunctionTestInvocation,
|
|
InvocationId,
|
|
TestConfig,
|
|
TestFile,
|
|
TestFiles,
|
|
TestResults,
|
|
)
|
|
|
|
|
|
def make_invocation_id(
|
|
*,
|
|
module: str = "tests.test_foo",
|
|
cls: str | None = "TestFoo",
|
|
func: str | None = "test_bar",
|
|
target: str = "bar",
|
|
iteration: str | None = "0",
|
|
) -> InvocationId:
|
|
"""Create an InvocationId with sensible defaults."""
|
|
return InvocationId(
|
|
test_module_path=module,
|
|
test_class_name=cls,
|
|
test_function_name=func,
|
|
function_getting_tested=target,
|
|
iteration_id=iteration,
|
|
)
|
|
|
|
|
|
def make_invocation(
|
|
*,
|
|
loop_index: int = 0,
|
|
inv_id: InvocationId | None = None,
|
|
did_pass: bool = True,
|
|
runtime: int | None = 100,
|
|
) -> FunctionTestInvocation:
|
|
"""Create a FunctionTestInvocation with sensible defaults."""
|
|
return FunctionTestInvocation(
|
|
loop_index=loop_index,
|
|
id=inv_id or make_invocation_id(),
|
|
file_name=Path("tests/test_foo.py"),
|
|
did_pass=did_pass,
|
|
runtime=runtime,
|
|
test_framework="pytest",
|
|
test_type=TestType.EXISTING_UNIT_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
)
|
|
|
|
|
|
class TestInvocationId:
|
|
"""InvocationId identity and parsing."""
|
|
|
|
def test_id_with_class(self) -> None:
|
|
"""id() includes class prefix when test_class_name is set."""
|
|
inv = make_invocation_id(cls="TestFoo", func="test_bar")
|
|
assert "tests.test_foo:TestFoo.test_bar:bar:0" == inv.id()
|
|
|
|
def test_id_without_class(self) -> None:
|
|
"""id() has no class prefix when test_class_name is None."""
|
|
inv = make_invocation_id(cls=None, func="test_bar")
|
|
assert "tests.test_foo:test_bar:bar:0" == inv.id()
|
|
|
|
def test_fn_qualified_name_with_class(self) -> None:
|
|
"""Returns 'Class.function' when class is present."""
|
|
inv = make_invocation_id(cls="TestFoo", func="test_bar")
|
|
assert "TestFoo.test_bar" == inv.test_fn_qualified_name()
|
|
|
|
def test_fn_qualified_name_without_class(self) -> None:
|
|
"""Returns just 'function' when class is None."""
|
|
inv = make_invocation_id(cls=None, func="test_bar")
|
|
assert "test_bar" == inv.test_fn_qualified_name()
|
|
|
|
def test_from_str_id_with_class(self) -> None:
|
|
"""Parses 'module:Class.test:func:iter' correctly."""
|
|
result = InvocationId.from_str_id(
|
|
"tests.test_foo:TestFoo.test_bar:bar:0",
|
|
)
|
|
assert "tests.test_foo" == result.test_module_path
|
|
assert "TestFoo" == result.test_class_name
|
|
assert "test_bar" == result.test_function_name
|
|
assert "bar" == result.function_getting_tested
|
|
assert "0" == result.iteration_id
|
|
|
|
def test_from_str_id_without_class(self) -> None:
|
|
"""Parses 'module:test:func:iter' when no class present."""
|
|
result = InvocationId.from_str_id(
|
|
"tests.test_foo:test_bar:bar:0",
|
|
)
|
|
assert result.test_class_name is None
|
|
assert "test_bar" == result.test_function_name
|
|
|
|
def test_from_str_id_with_iteration_override(self) -> None:
|
|
"""iteration_id parameter overrides the one in the string."""
|
|
result = InvocationId.from_str_id(
|
|
"tests.test_foo:test_bar:bar:0",
|
|
iteration_id="5",
|
|
)
|
|
assert "5" == result.iteration_id
|
|
|
|
def test_from_str_id_invalid(self) -> None:
|
|
"""Raises ValueError for malformed input."""
|
|
with pytest.raises(ValueError, match="Expected 4"):
|
|
InvocationId.from_str_id("bad:input")
|
|
|
|
def test_frozen(self) -> None:
|
|
"""Cannot set attributes on frozen instance."""
|
|
inv = make_invocation_id()
|
|
with pytest.raises(AttributeError):
|
|
inv.test_module_path = "other" # type: ignore[misc]
|
|
|
|
|
|
class TestFunctionTestInvocation:
|
|
"""FunctionTestInvocation data and properties."""
|
|
|
|
def test_unique_invocation_loop_id(self) -> None:
|
|
"""Combines loop_index and id string."""
|
|
inv = make_invocation(loop_index=3)
|
|
expected = f"3:{inv.id.id()}"
|
|
assert expected == inv.unique_invocation_loop_id
|
|
|
|
def test_default_verification_type(self) -> None:
|
|
"""Defaults to FUNCTION_CALL when not specified."""
|
|
inv = make_invocation()
|
|
assert VerificationType.FUNCTION_CALL == inv.verification_type
|
|
|
|
def test_explicit_verification_type(self) -> None:
|
|
"""Accepts explicit verification type."""
|
|
inv = FunctionTestInvocation(
|
|
loop_index=0,
|
|
id=make_invocation_id(),
|
|
file_name=Path("tests/test_foo.py"),
|
|
did_pass=True,
|
|
runtime=100,
|
|
test_framework="pytest",
|
|
test_type=TestType.EXISTING_UNIT_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
verification_type=VerificationType.INIT_STATE_FTO,
|
|
)
|
|
assert VerificationType.INIT_STATE_FTO == inv.verification_type
|
|
|
|
def test_frozen(self) -> None:
|
|
"""Cannot modify attributes on frozen instance."""
|
|
inv = make_invocation()
|
|
with pytest.raises(AttributeError):
|
|
inv.did_pass = False # type: ignore[misc]
|
|
|
|
|
|
class TestTestResults:
|
|
"""TestResults collection behavior."""
|
|
|
|
def test_add_and_len(self) -> None:
|
|
"""Adding an invocation increases length."""
|
|
results = TestResults()
|
|
results.add(make_invocation())
|
|
assert 1 == len(results)
|
|
|
|
def test_add_dedup(self) -> None:
|
|
"""Adding same uid twice only stores once."""
|
|
inv = make_invocation()
|
|
results = TestResults()
|
|
results.add(inv)
|
|
results.add(inv)
|
|
assert 1 == len(results)
|
|
|
|
def test_merge(self) -> None:
|
|
"""Merges two TestResults together."""
|
|
r1 = TestResults()
|
|
r1.add(make_invocation(loop_index=0))
|
|
r2 = TestResults()
|
|
r2.add(make_invocation(loop_index=1))
|
|
r1.merge(r2)
|
|
assert 2 == len(r1)
|
|
|
|
def test_merge_duplicate_raises(self) -> None:
|
|
"""Duplicate uid in merge raises ValueError."""
|
|
inv = make_invocation()
|
|
r1 = TestResults()
|
|
r1.add(inv)
|
|
r2 = TestResults()
|
|
r2.add(inv)
|
|
with pytest.raises(ValueError, match="Duplicate"):
|
|
r1.merge(r2)
|
|
|
|
def test_get_by_uid(self) -> None:
|
|
"""Lookup by unique_invocation_loop_id returns the invocation."""
|
|
inv = make_invocation()
|
|
results = TestResults()
|
|
results.add(inv)
|
|
found = results.get_by_unique_invocation_loop_id(
|
|
inv.unique_invocation_loop_id,
|
|
)
|
|
assert inv == found
|
|
|
|
def test_get_by_uid_missing(self) -> None:
|
|
"""Returns None for unknown uid."""
|
|
results = TestResults()
|
|
assert results.get_by_unique_invocation_loop_id("x") is None
|
|
|
|
def test_number_of_loops(self) -> None:
|
|
"""Returns max loop_index across all results."""
|
|
results = TestResults()
|
|
results.add(make_invocation(loop_index=0))
|
|
results.add(make_invocation(loop_index=3))
|
|
assert 3 == results.number_of_loops()
|
|
|
|
def test_number_of_loops_empty(self) -> None:
|
|
"""Returns 0 for empty results."""
|
|
assert 0 == TestResults().number_of_loops()
|
|
|
|
def test_total_passed_runtime(self) -> None:
|
|
"""Sum of minimum runtimes across passing test cases."""
|
|
inv_id = make_invocation_id()
|
|
results = TestResults()
|
|
results.add(
|
|
make_invocation(loop_index=0, inv_id=inv_id, runtime=200),
|
|
)
|
|
results.add(
|
|
make_invocation(loop_index=1, inv_id=inv_id, runtime=100),
|
|
)
|
|
assert 100 == results.total_passed_runtime()
|
|
|
|
def test_total_passed_runtime_excludes_failed(self) -> None:
|
|
"""Failed invocations are excluded from runtime sum."""
|
|
results = TestResults()
|
|
results.add(make_invocation(loop_index=0, runtime=200))
|
|
results.add(
|
|
make_invocation(
|
|
loop_index=1,
|
|
inv_id=make_invocation_id(func="test_fail"),
|
|
did_pass=False,
|
|
runtime=50,
|
|
),
|
|
)
|
|
assert 200 == results.total_passed_runtime()
|
|
|
|
def test_iter_and_bool(self) -> None:
|
|
"""Iteration yields invocations; empty is falsy, non-empty truthy."""
|
|
results = TestResults()
|
|
assert not results
|
|
inv = make_invocation()
|
|
results.add(inv)
|
|
assert results
|
|
assert [inv] == list(results)
|
|
|
|
def test_contains(self) -> None:
|
|
"""Invocation in results returns True."""
|
|
inv = make_invocation()
|
|
results = TestResults()
|
|
results.add(inv)
|
|
assert inv in results
|
|
|
|
def test_getitem(self) -> None:
|
|
"""Index access returns the correct invocation."""
|
|
inv = make_invocation()
|
|
results = TestResults()
|
|
results.add(inv)
|
|
assert inv == results[0]
|
|
|
|
|
|
class TestTestFile:
|
|
"""TestFile and TestFiles collection behavior."""
|
|
|
|
def test_get_test_type_by_instrumented_path(
|
|
self,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
"""Finds matching test type by instrumented file path."""
|
|
instrumented = tmp_path / "instrumented_test.py"
|
|
instrumented.touch()
|
|
tf = TestFile(
|
|
original_file_path=tmp_path / "test_orig.py",
|
|
instrumented_behavior_file_path=instrumented,
|
|
test_type=TestType.GENERATED_REGRESSION,
|
|
)
|
|
files = TestFiles(test_files=[tf])
|
|
result = files.get_test_type_by_instrumented_file_path(
|
|
instrumented,
|
|
)
|
|
assert TestType.GENERATED_REGRESSION == result
|
|
|
|
def test_get_test_type_by_original_path(
|
|
self,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
"""Finds test type by original file path."""
|
|
original = tmp_path / "test_orig.py"
|
|
original.touch()
|
|
tf = TestFile(original_file_path=original)
|
|
files = TestFiles(test_files=[tf])
|
|
result = files.get_test_type_by_original_file_path(original)
|
|
assert TestType.EXISTING_UNIT_TEST == result
|
|
|
|
def test_get_test_type_missing(self) -> None:
|
|
"""Returns None for unknown path."""
|
|
files = TestFiles()
|
|
result = files.get_test_type_by_instrumented_file_path(
|
|
Path("/nonexistent.py"),
|
|
)
|
|
assert result is None
|
|
|
|
|
|
class TestTestConfig:
|
|
"""TestConfig defaults and construction."""
|
|
|
|
def test_config_defaults(self) -> None:
|
|
"""test_framework defaults to 'pytest'."""
|
|
config = TestConfig(tests_project_rootdir=Path("/project"))
|
|
assert "pytest" == config.test_framework
|
|
assert "pytest" == config.pytest_cmd
|
|
|
|
def test_frozen(self) -> None:
|
|
"""Cannot modify attributes on frozen instance."""
|
|
config = TestConfig(tests_project_rootdir=Path("/project"))
|
|
with pytest.raises(AttributeError):
|
|
config.test_framework = "unittest" # type: ignore[misc]
|
|
|
|
|
|
def _make_replay_invocation(
|
|
*,
|
|
module: str,
|
|
func: str = "test_replay",
|
|
loop_index: int = 0,
|
|
runtime: int = 100,
|
|
) -> FunctionTestInvocation:
|
|
"""Create a REPLAY_TEST invocation with a given module path."""
|
|
return FunctionTestInvocation(
|
|
loop_index=loop_index,
|
|
id=InvocationId(
|
|
test_module_path=module,
|
|
test_class_name=None,
|
|
test_function_name=func,
|
|
function_getting_tested="target",
|
|
iteration_id="0",
|
|
),
|
|
file_name=Path("tests/test_replay.py"),
|
|
did_pass=True,
|
|
runtime=runtime,
|
|
test_framework="pytest",
|
|
test_type=TestType.REPLAY_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
)
|
|
|
|
|
|
class TestGroupByBenchmarks:
|
|
"""TestResults.group_by_benchmarks grouping behaviour."""
|
|
|
|
def test_groups_replay_results_by_benchmark_key(
|
|
self,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
"""Replay results are grouped under matching benchmark keys."""
|
|
project_root = tmp_path
|
|
replay_dir = tmp_path / "replay"
|
|
replay_dir.mkdir()
|
|
|
|
bk = BenchmarkKey(
|
|
module_path="benchmarks.test_sort",
|
|
function_name="sort_fn",
|
|
)
|
|
# module_name_from_file_path converts the replay dir path
|
|
# into a dotted prefix: replay/test_benchmarks_test_sort__replay_test_
|
|
# => replay.test_benchmarks_test_sort__replay_test_
|
|
expected_prefix = "replay.test_benchmarks_test_sort__replay_test_"
|
|
|
|
results = TestResults()
|
|
matching = _make_replay_invocation(
|
|
module=expected_prefix + "0",
|
|
runtime=200,
|
|
)
|
|
non_matching = _make_replay_invocation(
|
|
module="other.module",
|
|
func="test_other",
|
|
runtime=50,
|
|
)
|
|
results.add(matching)
|
|
results.add(non_matching)
|
|
|
|
grouped = results.group_by_benchmarks(
|
|
[bk],
|
|
replay_dir,
|
|
project_root,
|
|
)
|
|
assert bk in grouped
|
|
assert 1 == len(grouped[bk])
|
|
assert matching in grouped[bk]
|
|
|
|
def test_non_replay_results_are_excluded(
|
|
self,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
"""Only REPLAY_TEST results are included in grouping."""
|
|
project_root = tmp_path
|
|
replay_dir = tmp_path / "replay"
|
|
replay_dir.mkdir()
|
|
|
|
bk = BenchmarkKey(
|
|
module_path="benchmarks.test_sort",
|
|
function_name="sort_fn",
|
|
)
|
|
prefix = "replay.test_benchmarks_test_sort__replay_test_"
|
|
|
|
results = TestResults()
|
|
# An existing unit test whose module path happens to match.
|
|
unit_inv = FunctionTestInvocation(
|
|
loop_index=0,
|
|
id=InvocationId(
|
|
test_module_path=prefix + "0",
|
|
test_class_name=None,
|
|
test_function_name="test_unit",
|
|
function_getting_tested="target",
|
|
iteration_id="0",
|
|
),
|
|
file_name=Path("tests/test_unit.py"),
|
|
did_pass=True,
|
|
runtime=100,
|
|
test_framework="pytest",
|
|
test_type=TestType.EXISTING_UNIT_TEST,
|
|
return_value=None,
|
|
cpu_runtime=0,
|
|
timed_out=False,
|
|
)
|
|
results.add(unit_inv)
|
|
|
|
grouped = results.group_by_benchmarks(
|
|
[bk],
|
|
replay_dir,
|
|
project_root,
|
|
)
|
|
assert 0 == len(grouped[bk])
|
|
|
|
def test_empty_results_returns_empty_groups(
|
|
self,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
"""Empty TestResults produces empty groups."""
|
|
project_root = tmp_path
|
|
replay_dir = tmp_path / "replay"
|
|
replay_dir.mkdir()
|
|
|
|
bk = BenchmarkKey(
|
|
module_path="benchmarks.test_sort",
|
|
function_name="sort_fn",
|
|
)
|
|
results = TestResults()
|
|
grouped = results.group_by_benchmarks(
|
|
[bk],
|
|
replay_dir,
|
|
project_root,
|
|
)
|
|
assert 0 == len(grouped[bk])
|
|
|
|
def test_multiple_benchmark_keys(
|
|
self,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
"""Results are correctly distributed across multiple keys."""
|
|
project_root = tmp_path
|
|
replay_dir = tmp_path / "replay"
|
|
replay_dir.mkdir()
|
|
|
|
bk_a = BenchmarkKey(
|
|
module_path="benchmarks.test_a",
|
|
function_name="fn_a",
|
|
)
|
|
bk_b = BenchmarkKey(
|
|
module_path="benchmarks.test_b",
|
|
function_name="fn_b",
|
|
)
|
|
|
|
prefix_a = "replay.test_benchmarks_test_a__replay_test_"
|
|
prefix_b = "replay.test_benchmarks_test_b__replay_test_"
|
|
|
|
results = TestResults()
|
|
inv_a = _make_replay_invocation(
|
|
module=prefix_a + "0",
|
|
func="test_a",
|
|
runtime=100,
|
|
)
|
|
inv_b = _make_replay_invocation(
|
|
module=prefix_b + "0",
|
|
func="test_b",
|
|
runtime=200,
|
|
)
|
|
results.add(inv_a)
|
|
results.add(inv_b)
|
|
|
|
grouped = results.group_by_benchmarks(
|
|
[bk_a, bk_b],
|
|
replay_dir,
|
|
project_root,
|
|
)
|
|
assert 1 == len(grouped[bk_a])
|
|
assert inv_a in grouped[bk_a]
|
|
assert 1 == len(grouped[bk_b])
|
|
assert inv_b in grouped[bk_b]
|