codeflash-agent/packages/codeflash-python/src/codeflash_python/testing/models.py
Kevin Turcios eb6a0be717 feat: add dual-clock instrumentation (wall + CPU time) and remove dead binary parser
Measure both wall-clock time (perf_counter_ns) and CPU thread time
(thread_time_ns) in instrumented test code. cpu_runtime is now a required
int field on FunctionTestInvocation, stored in the SQLite test_results
table as a 10th column.

Also fixes the sleeptime.py bug (10e9 → 1e9 divisor) and removes the
binary pickle parser (parse_test_return_values_bin) since no writer
exists in the current codebase — SQLite is the sole data capture path.
2026-04-24 02:21:22 -05:00

390 lines
13 KiB
Python

"""Data models for test execution and results."""
from __future__ import annotations
import logging
from collections import Counter, defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
import attrs
import libcst as cst
from .._model import VerificationType
from ..test_discovery.models import TestType
if TYPE_CHECKING:
from collections.abc import Iterator
from ..benchmarking.models import BenchmarkKey
from ..test_discovery.models import TestsInFile
log = logging.getLogger(__name__)
@attrs.frozen
class InvocationId:
"""Identifies a specific test function invocation."""
test_module_path: str
test_class_name: str | None
test_function_name: str | None
function_getting_tested: str
iteration_id: str | None
def id(self) -> str:
"""Return a unique string identifier for this invocation."""
class_prefix = (
f"{self.test_class_name}." if self.test_class_name else ""
)
return (
f"{self.test_module_path}:{class_prefix}"
f"{self.test_function_name}:"
f"{self.function_getting_tested}:{self.iteration_id}"
)
def test_fn_qualified_name(self) -> str:
"""Return *ClassName.test_function* or just *test_function*."""
if self.test_class_name:
return f"{self.test_class_name}.{self.test_function_name}"
return str(self.test_function_name)
@staticmethod
def find_func_in_class(
class_node: cst.ClassDef,
func_name: str,
) -> cst.FunctionDef | None:
"""Find a function definition inside a class node."""
for stmt in class_node.body.body:
if (
isinstance(stmt, cst.FunctionDef)
and stmt.name.value == func_name
):
return stmt
return None
def get_src_code(self, test_path: Path) -> str | None:
"""Extract the source code of this test function from *test_path*."""
if not test_path.exists():
return None
try:
test_src = test_path.read_text(encoding="utf-8")
module_node = cst.parse_module(test_src)
except (cst.ParserSyntaxError, UnicodeDecodeError):
return (
f"# Test: {self.test_function_name}\n"
f"# File: {test_path.name}\n"
f"# Testing function: {self.function_getting_tested}"
)
if self.test_class_name:
for stmt in module_node.body:
if (
isinstance(stmt, cst.ClassDef)
and stmt.name.value == self.test_class_name
):
func_node = self.find_func_in_class(
stmt,
self.test_function_name or "",
)
if func_node:
return module_node.code_for_node(
func_node,
).strip()
return None
for stmt in module_node.body:
if (
isinstance(stmt, cst.FunctionDef)
and stmt.name.value == self.test_function_name
):
return module_node.code_for_node(stmt).strip()
return None
@staticmethod
def from_str_id(
string_id: str,
iteration_id: str | None = None,
) -> InvocationId:
"""Parse an invocation id from its string form."""
components = string_id.split(":")
if len(components) != 4: # noqa: PLR2004
msg = (
f"Expected 4 colon-separated components, "
f"got {len(components)}: {string_id!r}"
)
raise ValueError(msg)
second_components = components[1].split(".")
if len(second_components) == 1:
test_class_name = None
test_function_name = second_components[0]
else:
test_class_name = second_components[0]
test_function_name = second_components[1]
return InvocationId(
test_module_path=components[0],
test_class_name=test_class_name,
test_function_name=test_function_name,
function_getting_tested=components[2],
iteration_id=(iteration_id or components[3]),
)
@attrs.frozen
class FunctionTestInvocation:
"""A single function invocation result from a test run."""
loop_index: int
id: InvocationId
file_name: Path = attrs.field(converter=Path)
did_pass: bool
runtime: int | None
test_framework: str
test_type: TestType
return_value: object | None
cpu_runtime: int
timed_out: bool | None
verification_type: str | None = VerificationType.FUNCTION_CALL
stdout: str | None = None
@property
def unique_invocation_loop_id(self) -> str:
"""Return a unique id incorporating the loop index."""
return f"{self.loop_index}:{self.id.id()}"
@attrs.define
class TestResults:
"""Collection of test invocation results."""
__test__ = False
test_results: list[FunctionTestInvocation] = attrs.Factory(list)
test_result_idx: dict[str, int] = attrs.Factory(dict)
perf_stdout: str | None = None
test_failures: dict[str, str] | None = None
def add(
self,
function_test_invocation: FunctionTestInvocation,
) -> None:
"""Add an invocation, skipping duplicates."""
uid = function_test_invocation.unique_invocation_loop_id
if uid in self.test_result_idx:
log.debug("Test result with id %s already exists, skipping", uid)
return
self.test_result_idx[uid] = len(self.test_results)
self.test_results.append(function_test_invocation)
def merge(self, other: TestResults) -> None:
"""Merge another *TestResults* into this one."""
offset = len(self.test_results)
self.test_results.extend(other.test_results)
for key, idx in other.test_result_idx.items():
if key in self.test_result_idx:
msg = f"Duplicate test result id: {key}"
raise ValueError(msg)
self.test_result_idx[key] = idx + offset
def get_by_unique_invocation_loop_id(
self,
uid: str,
) -> FunctionTestInvocation | None:
"""Look up an invocation by its unique loop id."""
try:
return self.test_results[self.test_result_idx[uid]]
except (IndexError, KeyError):
return None
def number_of_loops(self) -> int:
"""Return the maximum loop index across all results."""
if not self.test_results:
return 0
return max(r.loop_index for r in self.test_results)
def usable_runtime_data_by_test_case(
self,
) -> dict[InvocationId, list[int]]:
"""Return runtimes grouped by invocation id (passing only)."""
by_id: dict[InvocationId, list[int]] = {}
for result in self.test_results:
if result.did_pass and result.runtime is not None:
by_id.setdefault(result.id, []).append(result.runtime)
return by_id
def total_passed_runtime(self) -> int:
"""Sum of minimum runtimes across all passing test cases.
Each test case's runtime is the minimum across all loop
iterations. Returns nanoseconds.
"""
return sum(
min(runtimes)
for runtimes in self.usable_runtime_data_by_test_case().values()
)
def file_to_no_of_tests(
self,
test_functions_to_remove: list[str],
) -> Counter[Path]:
"""Count generated regression results per file, excluding *test_functions_to_remove*."""
counts: Counter[Path] = Counter()
for result in self.test_results:
if (
result.test_type == TestType.GENERATED_REGRESSION
and result.id.test_function_name
not in test_functions_to_remove
):
counts[result.file_name] += 1
return counts
def __iter__(self) -> Iterator[FunctionTestInvocation]:
"""Iterate over test invocation results."""
return iter(self.test_results)
def __len__(self) -> int:
"""Return the number of test invocation results."""
return len(self.test_results)
def __getitem__(self, index: int) -> FunctionTestInvocation:
"""Return the test invocation result at the given index."""
return self.test_results[index]
def __bool__(self) -> bool:
"""Return True if there are any test results."""
return bool(self.test_results)
def __contains__(
self,
value: object,
) -> bool:
"""Check if a test invocation result is in this collection."""
return value in self.test_results
def get_all_unique_invocation_loop_ids(self) -> set[str]:
"""Return the set of all unique invocation loop ids."""
return {
result.unique_invocation_loop_id for result in self.test_results
}
def get_test_pass_fail_report_by_type(
self,
) -> dict[TestType, dict[str, int]]:
"""Count passed/failed tests grouped by test type."""
report: dict[TestType, dict[str, int]] = {
tt: {"passed": 0, "failed": 0} for tt in TestType
}
for result in self.test_results:
if result.loop_index != 1:
continue
if result.did_pass:
report[result.test_type]["passed"] += 1
else:
report[result.test_type]["failed"] += 1
return report
def group_by_benchmarks(
self,
benchmark_keys: list[BenchmarkKey],
benchmark_replay_test_dir: Path,
project_root: Path,
) -> dict[BenchmarkKey, TestResults]:
"""Group replay test results by benchmark key.
Each benchmark key maps to the :class:`TestResults` whose
replay test module path starts with the expected prefix
derived from the benchmark's module path.
"""
from ..test_discovery.linking import ( # noqa: PLC0415
module_name_from_file_path,
)
test_results_by_benchmark: dict[BenchmarkKey, TestResults] = (
defaultdict(TestResults)
)
benchmark_module_path: dict[BenchmarkKey, str] = {}
for benchmark_key in benchmark_keys:
benchmark_module_path[benchmark_key] = module_name_from_file_path(
benchmark_replay_test_dir.resolve()
/ (
"test_"
+ benchmark_key.module_path.replace(".", "_")
+ "__replay_test_"
),
project_root,
)
for test_result in self.test_results:
if test_result.test_type == TestType.REPLAY_TEST:
for bk, mod_path in benchmark_module_path.items():
if test_result.id.test_module_path.startswith(
mod_path,
):
test_results_by_benchmark[bk].add(test_result)
return test_results_by_benchmark
@attrs.frozen
class TestFile:
"""A test file ready for execution."""
__test__ = False
original_file_path: Path = attrs.field(converter=Path)
instrumented_behavior_file_path: Path | None = None
benchmarking_file_path: Path | None = None
test_type: TestType = TestType.EXISTING_UNIT_TEST
tests_in_file: tuple[TestsInFile, ...] = ()
@attrs.define
class TestFiles:
"""Collection of test files for a test run."""
__test__ = False
test_files: list[TestFile] = attrs.Factory(list)
def get_test_type_by_instrumented_file_path(
self,
path: Path,
) -> TestType | None:
"""Find the test type for an instrumented file path."""
resolved = path.resolve()
for tf in self.test_files:
if (
tf.instrumented_behavior_file_path
and tf.instrumented_behavior_file_path.resolve() == resolved
):
return tf.test_type
if (
tf.benchmarking_file_path
and tf.benchmarking_file_path.resolve() == resolved
):
return tf.test_type
return None
def get_test_type_by_original_file_path(
self,
path: Path,
) -> TestType | None:
"""Find the test type for an original file path."""
resolved = path.resolve()
for tf in self.test_files:
if tf.original_file_path.resolve() == resolved:
return tf.test_type
return None
@attrs.frozen
class TestConfig:
"""Configuration for test execution."""
__test__ = False
tests_project_rootdir: Path = attrs.field(converter=Path)
test_framework: str = "pytest"
pytest_cmd: str = "pytest"
tests_root: str | Path = "tests"
project_root_path: str | Path = "."
use_cache: bool = True
module_root: Path | None = None