mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Measure both wall-clock time (perf_counter_ns) and CPU thread time (thread_time_ns) in instrumented test code. cpu_runtime is now a required int field on FunctionTestInvocation, stored in the SQLite test_results table as a 10th column. Also fixes the sleeptime.py bug (10e9 → 1e9 divisor) and removes the binary pickle parser (parse_test_return_values_bin) since no writer exists in the current codebase — SQLite is the sole data capture path.
390 lines
13 KiB
Python
390 lines
13 KiB
Python
"""Data models for test execution and results."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from collections import Counter, defaultdict
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
import attrs
|
|
import libcst as cst
|
|
|
|
from .._model import VerificationType
|
|
from ..test_discovery.models import TestType
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Iterator
|
|
|
|
from ..benchmarking.models import BenchmarkKey
|
|
from ..test_discovery.models import TestsInFile
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
@attrs.frozen
|
|
class InvocationId:
|
|
"""Identifies a specific test function invocation."""
|
|
|
|
test_module_path: str
|
|
test_class_name: str | None
|
|
test_function_name: str | None
|
|
function_getting_tested: str
|
|
iteration_id: str | None
|
|
|
|
def id(self) -> str:
|
|
"""Return a unique string identifier for this invocation."""
|
|
class_prefix = (
|
|
f"{self.test_class_name}." if self.test_class_name else ""
|
|
)
|
|
return (
|
|
f"{self.test_module_path}:{class_prefix}"
|
|
f"{self.test_function_name}:"
|
|
f"{self.function_getting_tested}:{self.iteration_id}"
|
|
)
|
|
|
|
def test_fn_qualified_name(self) -> str:
|
|
"""Return *ClassName.test_function* or just *test_function*."""
|
|
if self.test_class_name:
|
|
return f"{self.test_class_name}.{self.test_function_name}"
|
|
return str(self.test_function_name)
|
|
|
|
@staticmethod
|
|
def find_func_in_class(
|
|
class_node: cst.ClassDef,
|
|
func_name: str,
|
|
) -> cst.FunctionDef | None:
|
|
"""Find a function definition inside a class node."""
|
|
for stmt in class_node.body.body:
|
|
if (
|
|
isinstance(stmt, cst.FunctionDef)
|
|
and stmt.name.value == func_name
|
|
):
|
|
return stmt
|
|
return None
|
|
|
|
def get_src_code(self, test_path: Path) -> str | None:
|
|
"""Extract the source code of this test function from *test_path*."""
|
|
if not test_path.exists():
|
|
return None
|
|
try:
|
|
test_src = test_path.read_text(encoding="utf-8")
|
|
module_node = cst.parse_module(test_src)
|
|
except (cst.ParserSyntaxError, UnicodeDecodeError):
|
|
return (
|
|
f"# Test: {self.test_function_name}\n"
|
|
f"# File: {test_path.name}\n"
|
|
f"# Testing function: {self.function_getting_tested}"
|
|
)
|
|
|
|
if self.test_class_name:
|
|
for stmt in module_node.body:
|
|
if (
|
|
isinstance(stmt, cst.ClassDef)
|
|
and stmt.name.value == self.test_class_name
|
|
):
|
|
func_node = self.find_func_in_class(
|
|
stmt,
|
|
self.test_function_name or "",
|
|
)
|
|
if func_node:
|
|
return module_node.code_for_node(
|
|
func_node,
|
|
).strip()
|
|
return None
|
|
|
|
for stmt in module_node.body:
|
|
if (
|
|
isinstance(stmt, cst.FunctionDef)
|
|
and stmt.name.value == self.test_function_name
|
|
):
|
|
return module_node.code_for_node(stmt).strip()
|
|
return None
|
|
|
|
@staticmethod
|
|
def from_str_id(
|
|
string_id: str,
|
|
iteration_id: str | None = None,
|
|
) -> InvocationId:
|
|
"""Parse an invocation id from its string form."""
|
|
components = string_id.split(":")
|
|
if len(components) != 4: # noqa: PLR2004
|
|
msg = (
|
|
f"Expected 4 colon-separated components, "
|
|
f"got {len(components)}: {string_id!r}"
|
|
)
|
|
raise ValueError(msg)
|
|
second_components = components[1].split(".")
|
|
if len(second_components) == 1:
|
|
test_class_name = None
|
|
test_function_name = second_components[0]
|
|
else:
|
|
test_class_name = second_components[0]
|
|
test_function_name = second_components[1]
|
|
return InvocationId(
|
|
test_module_path=components[0],
|
|
test_class_name=test_class_name,
|
|
test_function_name=test_function_name,
|
|
function_getting_tested=components[2],
|
|
iteration_id=(iteration_id or components[3]),
|
|
)
|
|
|
|
|
|
@attrs.frozen
|
|
class FunctionTestInvocation:
|
|
"""A single function invocation result from a test run."""
|
|
|
|
loop_index: int
|
|
id: InvocationId
|
|
file_name: Path = attrs.field(converter=Path)
|
|
did_pass: bool
|
|
runtime: int | None
|
|
test_framework: str
|
|
test_type: TestType
|
|
return_value: object | None
|
|
cpu_runtime: int
|
|
timed_out: bool | None
|
|
verification_type: str | None = VerificationType.FUNCTION_CALL
|
|
stdout: str | None = None
|
|
|
|
@property
|
|
def unique_invocation_loop_id(self) -> str:
|
|
"""Return a unique id incorporating the loop index."""
|
|
return f"{self.loop_index}:{self.id.id()}"
|
|
|
|
|
|
@attrs.define
|
|
class TestResults:
|
|
"""Collection of test invocation results."""
|
|
|
|
__test__ = False
|
|
|
|
test_results: list[FunctionTestInvocation] = attrs.Factory(list)
|
|
test_result_idx: dict[str, int] = attrs.Factory(dict)
|
|
perf_stdout: str | None = None
|
|
test_failures: dict[str, str] | None = None
|
|
|
|
def add(
|
|
self,
|
|
function_test_invocation: FunctionTestInvocation,
|
|
) -> None:
|
|
"""Add an invocation, skipping duplicates."""
|
|
uid = function_test_invocation.unique_invocation_loop_id
|
|
if uid in self.test_result_idx:
|
|
log.debug("Test result with id %s already exists, skipping", uid)
|
|
return
|
|
self.test_result_idx[uid] = len(self.test_results)
|
|
self.test_results.append(function_test_invocation)
|
|
|
|
def merge(self, other: TestResults) -> None:
|
|
"""Merge another *TestResults* into this one."""
|
|
offset = len(self.test_results)
|
|
self.test_results.extend(other.test_results)
|
|
for key, idx in other.test_result_idx.items():
|
|
if key in self.test_result_idx:
|
|
msg = f"Duplicate test result id: {key}"
|
|
raise ValueError(msg)
|
|
self.test_result_idx[key] = idx + offset
|
|
|
|
def get_by_unique_invocation_loop_id(
|
|
self,
|
|
uid: str,
|
|
) -> FunctionTestInvocation | None:
|
|
"""Look up an invocation by its unique loop id."""
|
|
try:
|
|
return self.test_results[self.test_result_idx[uid]]
|
|
except (IndexError, KeyError):
|
|
return None
|
|
|
|
def number_of_loops(self) -> int:
|
|
"""Return the maximum loop index across all results."""
|
|
if not self.test_results:
|
|
return 0
|
|
return max(r.loop_index for r in self.test_results)
|
|
|
|
def usable_runtime_data_by_test_case(
|
|
self,
|
|
) -> dict[InvocationId, list[int]]:
|
|
"""Return runtimes grouped by invocation id (passing only)."""
|
|
by_id: dict[InvocationId, list[int]] = {}
|
|
for result in self.test_results:
|
|
if result.did_pass and result.runtime is not None:
|
|
by_id.setdefault(result.id, []).append(result.runtime)
|
|
return by_id
|
|
|
|
def total_passed_runtime(self) -> int:
|
|
"""Sum of minimum runtimes across all passing test cases.
|
|
|
|
Each test case's runtime is the minimum across all loop
|
|
iterations. Returns nanoseconds.
|
|
"""
|
|
return sum(
|
|
min(runtimes)
|
|
for runtimes in self.usable_runtime_data_by_test_case().values()
|
|
)
|
|
|
|
def file_to_no_of_tests(
|
|
self,
|
|
test_functions_to_remove: list[str],
|
|
) -> Counter[Path]:
|
|
"""Count generated regression results per file, excluding *test_functions_to_remove*."""
|
|
counts: Counter[Path] = Counter()
|
|
for result in self.test_results:
|
|
if (
|
|
result.test_type == TestType.GENERATED_REGRESSION
|
|
and result.id.test_function_name
|
|
not in test_functions_to_remove
|
|
):
|
|
counts[result.file_name] += 1
|
|
return counts
|
|
|
|
def __iter__(self) -> Iterator[FunctionTestInvocation]:
|
|
"""Iterate over test invocation results."""
|
|
return iter(self.test_results)
|
|
|
|
def __len__(self) -> int:
|
|
"""Return the number of test invocation results."""
|
|
return len(self.test_results)
|
|
|
|
def __getitem__(self, index: int) -> FunctionTestInvocation:
|
|
"""Return the test invocation result at the given index."""
|
|
return self.test_results[index]
|
|
|
|
def __bool__(self) -> bool:
|
|
"""Return True if there are any test results."""
|
|
return bool(self.test_results)
|
|
|
|
def __contains__(
|
|
self,
|
|
value: object,
|
|
) -> bool:
|
|
"""Check if a test invocation result is in this collection."""
|
|
return value in self.test_results
|
|
|
|
def get_all_unique_invocation_loop_ids(self) -> set[str]:
|
|
"""Return the set of all unique invocation loop ids."""
|
|
return {
|
|
result.unique_invocation_loop_id for result in self.test_results
|
|
}
|
|
|
|
def get_test_pass_fail_report_by_type(
|
|
self,
|
|
) -> dict[TestType, dict[str, int]]:
|
|
"""Count passed/failed tests grouped by test type."""
|
|
report: dict[TestType, dict[str, int]] = {
|
|
tt: {"passed": 0, "failed": 0} for tt in TestType
|
|
}
|
|
for result in self.test_results:
|
|
if result.loop_index != 1:
|
|
continue
|
|
if result.did_pass:
|
|
report[result.test_type]["passed"] += 1
|
|
else:
|
|
report[result.test_type]["failed"] += 1
|
|
return report
|
|
|
|
def group_by_benchmarks(
|
|
self,
|
|
benchmark_keys: list[BenchmarkKey],
|
|
benchmark_replay_test_dir: Path,
|
|
project_root: Path,
|
|
) -> dict[BenchmarkKey, TestResults]:
|
|
"""Group replay test results by benchmark key.
|
|
|
|
Each benchmark key maps to the :class:`TestResults` whose
|
|
replay test module path starts with the expected prefix
|
|
derived from the benchmark's module path.
|
|
"""
|
|
from ..test_discovery.linking import ( # noqa: PLC0415
|
|
module_name_from_file_path,
|
|
)
|
|
|
|
test_results_by_benchmark: dict[BenchmarkKey, TestResults] = (
|
|
defaultdict(TestResults)
|
|
)
|
|
benchmark_module_path: dict[BenchmarkKey, str] = {}
|
|
for benchmark_key in benchmark_keys:
|
|
benchmark_module_path[benchmark_key] = module_name_from_file_path(
|
|
benchmark_replay_test_dir.resolve()
|
|
/ (
|
|
"test_"
|
|
+ benchmark_key.module_path.replace(".", "_")
|
|
+ "__replay_test_"
|
|
),
|
|
project_root,
|
|
)
|
|
for test_result in self.test_results:
|
|
if test_result.test_type == TestType.REPLAY_TEST:
|
|
for bk, mod_path in benchmark_module_path.items():
|
|
if test_result.id.test_module_path.startswith(
|
|
mod_path,
|
|
):
|
|
test_results_by_benchmark[bk].add(test_result)
|
|
return test_results_by_benchmark
|
|
|
|
|
|
@attrs.frozen
|
|
class TestFile:
|
|
"""A test file ready for execution."""
|
|
|
|
__test__ = False
|
|
|
|
original_file_path: Path = attrs.field(converter=Path)
|
|
instrumented_behavior_file_path: Path | None = None
|
|
benchmarking_file_path: Path | None = None
|
|
test_type: TestType = TestType.EXISTING_UNIT_TEST
|
|
tests_in_file: tuple[TestsInFile, ...] = ()
|
|
|
|
|
|
@attrs.define
|
|
class TestFiles:
|
|
"""Collection of test files for a test run."""
|
|
|
|
__test__ = False
|
|
|
|
test_files: list[TestFile] = attrs.Factory(list)
|
|
|
|
def get_test_type_by_instrumented_file_path(
|
|
self,
|
|
path: Path,
|
|
) -> TestType | None:
|
|
"""Find the test type for an instrumented file path."""
|
|
resolved = path.resolve()
|
|
for tf in self.test_files:
|
|
if (
|
|
tf.instrumented_behavior_file_path
|
|
and tf.instrumented_behavior_file_path.resolve() == resolved
|
|
):
|
|
return tf.test_type
|
|
if (
|
|
tf.benchmarking_file_path
|
|
and tf.benchmarking_file_path.resolve() == resolved
|
|
):
|
|
return tf.test_type
|
|
return None
|
|
|
|
def get_test_type_by_original_file_path(
|
|
self,
|
|
path: Path,
|
|
) -> TestType | None:
|
|
"""Find the test type for an original file path."""
|
|
resolved = path.resolve()
|
|
for tf in self.test_files:
|
|
if tf.original_file_path.resolve() == resolved:
|
|
return tf.test_type
|
|
return None
|
|
|
|
|
|
@attrs.frozen
|
|
class TestConfig:
|
|
"""Configuration for test execution."""
|
|
|
|
__test__ = False
|
|
|
|
tests_project_rootdir: Path = attrs.field(converter=Path)
|
|
test_framework: str = "pytest"
|
|
pytest_cmd: str = "pytest"
|
|
tests_root: str | Path = "tests"
|
|
project_root_path: str | Path = "."
|
|
use_cache: bool = True
|
|
module_root: Path | None = None
|