Rename TestDiff/TestDiffScope to BehaviorDiff/BehaviorDiffScope

These classes represent behavioral verification diffs, not tests. The
Test* prefix caused pytest to attempt collection and emit warnings.
This commit is contained in:
Kevin Turcios 2026-04-23 04:37:24 -05:00
parent 9e893675c9
commit 4f98b5421f
8 changed files with 91 additions and 91 deletions

View file

@ -16,22 +16,22 @@ from codeflash_api.languages.python._markdown import (
split_markdown_code,
)
from codeflash_api.repair.schemas import (
TestDiff,
TestDiffScope,
BehaviorDiff,
BehaviorDiffScope,
)
log = logging.getLogger(__name__)
SCOPE_DESCRIPTIONS: dict[TestDiffScope, str] = {
TestDiffScope.RETURN_VALUE: (
SCOPE_DESCRIPTIONS: dict[BehaviorDiffScope, str] = {
BehaviorDiffScope.RETURN_VALUE: (
"The function returned a different value in the"
" optimized code compared to the original."
),
TestDiffScope.STDOUT: (
BehaviorDiffScope.STDOUT: (
"The output printed to stdout is different in the"
" optimized code compared to the original."
),
TestDiffScope.DID_PASS: (
BehaviorDiffScope.DID_PASS: (
"The test passed in one version but failed in the"
" other (a change in pass/fail behavior)."
),
@ -39,7 +39,7 @@ SCOPE_DESCRIPTIONS: dict[TestDiffScope, str] = {
def build_test_details(
test_diffs: list[TestDiff],
test_diffs: list[BehaviorDiff],
language: str = "python",
) -> str:
"""
@ -59,7 +59,7 @@ def build_test_details(
def _format_single_diff(
diff: TestDiff,
diff: BehaviorDiff,
sections: defaultdict[str, list[str]],
seen_headers: set[str],
test_error_label: str,
@ -92,7 +92,7 @@ def _format_single_diff(
scope_desc = SCOPE_DESCRIPTIONS.get(diff.scope, diff.scope.value)
detail_lines = [f"- {scope_desc}"]
if diff.scope != TestDiffScope.DID_PASS:
if diff.scope != BehaviorDiffScope.DID_PASS:
detail_lines.append(f" Expected: {diff.original_value!r}")
detail_lines.append(f" Got: {diff.candidate_value!r}")
else:
@ -110,7 +110,7 @@ def build_user_prompt(
template: str,
original_source_code: str,
modified_source_code: str,
test_diffs: list[TestDiff],
test_diffs: list[BehaviorDiff],
language: str = "python",
) -> str:
"""

View file

@ -6,7 +6,7 @@ from typing import Any
from pydantic import BaseModel
class TestDiffScope(str, enum.Enum):
class BehaviorDiffScope(str, enum.Enum):
"""
The dimension on which a test diff was observed.
"""
@ -17,12 +17,12 @@ class TestDiffScope(str, enum.Enum):
TIMED_OUT = "timed_out"
class TestDiff(BaseModel):
class BehaviorDiff(BaseModel):
"""
A single behavioural difference between original and optimised code.
"""
scope: TestDiffScope
scope: BehaviorDiffScope
original_value: (
bool | str | int | float | dict[str, Any] | list[Any] | None
) = None
@ -45,7 +45,7 @@ class CodeRepairRequest(BaseModel):
optimization_id: str
original_source_code: str
modified_source_code: str
test_diffs: list[TestDiff]
test_diffs: list[BehaviorDiff]
language: str = "python"
rerun_trace_id: str | None = None

View file

@ -12,8 +12,8 @@ from codeflash_api.repair._context import (
)
from codeflash_api.repair.schemas import (
CodeRepairRequest,
TestDiff,
TestDiffScope,
BehaviorDiff,
BehaviorDiffScope,
)
# -------------------------------------------------------------------
@ -42,8 +42,8 @@ class TestCodeRepairRequest:
"""
A request with all fields deserializes.
"""
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value="1",
candidate_value="2",
original_pass=True,
@ -60,18 +60,18 @@ class TestCodeRepairRequest:
rerun_trace_id=str(uuid.uuid4()),
)
assert 1 == len(req.test_diffs)
assert TestDiffScope.RETURN_VALUE == req.test_diffs[0].scope
assert BehaviorDiffScope.RETURN_VALUE == req.test_diffs[0].scope
class TestTestDiff:
"""Tests for TestDiff schema."""
class TestBehaviorDiff:
"""Tests for BehaviorDiff schema."""
def test_all_scopes(self):
"""
Every scope enum value can be used.
"""
for scope in TestDiffScope:
diff = TestDiff(
for scope in BehaviorDiffScope:
diff = BehaviorDiff(
scope=scope,
original_pass=True,
candidate_pass=False,
@ -82,8 +82,8 @@ class TestTestDiff:
"""
Optional fields default to None.
"""
diff = TestDiff(
scope=TestDiffScope.DID_PASS,
diff = BehaviorDiff(
scope=BehaviorDiffScope.DID_PASS,
original_pass=True,
candidate_pass=False,
)
@ -112,8 +112,8 @@ class TestBuildTestDetails:
"""
Return value diffs show Expected/Got lines.
"""
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value="hello",
candidate_value="world",
original_pass=True,
@ -129,8 +129,8 @@ class TestBuildTestDetails:
"""
DID_PASS diffs show pass/fail status.
"""
diff = TestDiff(
scope=TestDiffScope.DID_PASS,
diff = BehaviorDiff(
scope=BehaviorDiffScope.DID_PASS,
original_pass=True,
candidate_pass=False,
test_src_code="test_something",
@ -143,8 +143,8 @@ class TestBuildTestDetails:
"""
Pytest errors appear in the output when present.
"""
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=1,
candidate_value=2,
original_pass=True,
@ -161,16 +161,16 @@ class TestBuildTestDetails:
Multiple diffs for the same test source share a header.
"""
diffs = [
TestDiff(
scope=TestDiffScope.RETURN_VALUE,
BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=1,
candidate_value=2,
original_pass=True,
candidate_pass=False,
test_src_code="test_func",
),
TestDiff(
scope=TestDiffScope.STDOUT,
BehaviorDiff(
scope=BehaviorDiffScope.STDOUT,
original_value="out",
candidate_value="err",
original_pass=True,
@ -186,16 +186,16 @@ class TestBuildTestDetails:
Different test sources get separate sections.
"""
diffs = [
TestDiff(
scope=TestDiffScope.RETURN_VALUE,
BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=1,
candidate_value=2,
original_pass=True,
candidate_pass=False,
test_src_code="test_a",
),
TestDiff(
scope=TestDiffScope.RETURN_VALUE,
BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=3,
candidate_value=4,
original_pass=True,
@ -211,8 +211,8 @@ class TestBuildTestDetails:
"""
Non-python language uses 'Test error' label.
"""
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=1,
candidate_value=2,
original_pass=True,
@ -227,8 +227,8 @@ class TestBuildTestDetails:
"""
Missing test source shows 'Not available'.
"""
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=1,
candidate_value=2,
original_pass=True,
@ -255,8 +255,8 @@ class TestBuildUserPrompt:
"Modified: {modified_source_code}\n"
"Tests: {test_details}"
)
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=1,
candidate_value=2,
original_pass=True,

View file

@ -3,17 +3,17 @@
from ._baseline import establish_original_code_baseline
from ._verification import compare_test_results
from .models import (
BehaviorDiff,
BehaviorDiffScope,
OptimizedCandidateResult,
OriginalCodeBaseline,
TestDiff,
TestDiffScope,
)
__all__ = [
"BehaviorDiff",
"BehaviorDiffScope",
"OptimizedCandidateResult",
"OriginalCodeBaseline",
"TestDiff",
"TestDiffScope",
"compare_test_results",
"establish_original_code_baseline",
]

View file

@ -11,7 +11,7 @@ from typing import TYPE_CHECKING
from .._model import VerificationType
from ..test_discovery.models import TestType
from ._comparator import comparator
from .models import TestDiff, TestDiffScope
from .models import BehaviorDiff, BehaviorDiffScope
if TYPE_CHECKING:
from ..testing.models import TestResults
@ -22,7 +22,7 @@ INCREASED_RECURSION_LIMIT = 5000
_reprlib_repr = reprlib.Repr()
_reprlib_repr.maxstring = 1500
_test_diff_repr = _reprlib_repr.repr
_behavior_diff_repr = _reprlib_repr.repr
def safe_repr(obj: object) -> str:
@ -48,7 +48,7 @@ def compare_test_results( # noqa: C901, PLR0912
original_results: TestResults,
candidate_results: TestResults,
pass_fail_only: bool = False, # noqa: FBT001, FBT002
) -> tuple[bool, list[TestDiff]]:
) -> tuple[bool, list[BehaviorDiff]]:
"""Compare original and candidate test results for behavioral equivalence.
Returns a tuple of (all_match, diffs). When *pass_fail_only* is True,
@ -66,7 +66,7 @@ def compare_test_results( # noqa: C901, PLR0912
| candidate_results.get_all_unique_invocation_loop_ids()
)
test_diffs: list[TestDiff] = []
test_diffs: list[BehaviorDiff] = []
did_all_timeout = True
for test_id in test_ids_superset:
@ -143,8 +143,8 @@ def compare_test_results( # noqa: C901, PLR0912
TestType.REPLAY_TEST,
} and (cdd_test_result.did_pass != original_test_result.did_pass):
test_diffs.append(
TestDiff(
scope=TestDiffScope.DID_PASS,
BehaviorDiff(
scope=BehaviorDiffScope.DID_PASS,
original_value=str(original_test_result.did_pass),
candidate_value=str(cdd_test_result.did_pass),
test_src_code=(
@ -164,14 +164,14 @@ def compare_test_results( # noqa: C901, PLR0912
superset_obj=superset_obj,
):
test_diffs.append(
TestDiff(
scope=TestDiffScope.RETURN_VALUE,
original_value=_test_diff_repr(
BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_value=_behavior_diff_repr(
safe_repr(
original_test_result.return_value,
),
),
candidate_value=_test_diff_repr(
candidate_value=_behavior_diff_repr(
safe_repr(
cdd_test_result.return_value,
),
@ -214,8 +214,8 @@ def compare_test_results( # noqa: C901, PLR0912
)
):
test_diffs.append(
TestDiff(
scope=TestDiffScope.STDOUT,
BehaviorDiff(
scope=BehaviorDiffScope.STDOUT,
original_value=str(original_test_result.stdout),
candidate_value=str(cdd_test_result.stdout),
test_src_code=(

View file

@ -14,7 +14,7 @@ if TYPE_CHECKING:
from ..testing.models import TestResults
class TestDiffScope(str, enum.Enum):
class BehaviorDiffScope(str, enum.Enum):
"""Scope of a behavioral difference between original and candidate."""
RETURN_VALUE = "return_value"
@ -23,10 +23,10 @@ class TestDiffScope(str, enum.Enum):
@attrs.frozen
class TestDiff:
class BehaviorDiff:
"""A single behavioral difference between original and candidate."""
scope: TestDiffScope
scope: BehaviorDiffScope
original_pass: bool
candidate_pass: bool
original_value: str | None = None

View file

@ -501,16 +501,16 @@ class TestRunTestsAndBenchmark:
) -> None:
"""When behavioral tests fail with diffs, diffs are stored."""
from codeflash_python.verification.models import (
TestDiff,
TestDiffScope,
BehaviorDiff,
BehaviorDiffScope,
)
eval_ctx = EvaluationContext()
failed_diffs: dict[str, list[Any]] = {}
bench_results: dict[str, TestResults] = {}
diff_obj = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff_obj = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_pass=True,
candidate_pass=True,
original_value="42",

View file

@ -18,8 +18,8 @@ from codeflash_python.verification._verification import (
)
from codeflash_python.verification.models import (
OptimizedCandidateResult,
TestDiff,
TestDiffScope,
BehaviorDiff,
BehaviorDiffScope,
)
@ -125,7 +125,7 @@ class TestCompareTestResults:
assert match is False
assert 1 == len(diffs)
assert TestDiffScope.DID_PASS == diffs[0].scope
assert BehaviorDiffScope.DID_PASS == diffs[0].scope
assert diffs[0].original_pass is True
assert diffs[0].candidate_pass is False
@ -142,7 +142,7 @@ class TestCompareTestResults:
assert match is False
assert 1 == len(diffs)
assert TestDiffScope.RETURN_VALUE == diffs[0].scope
assert BehaviorDiffScope.RETURN_VALUE == diffs[0].scope
def test_stdout_mismatch(self) -> None:
"""Same return values but different stdout produces STDOUT diff."""
@ -165,7 +165,7 @@ class TestCompareTestResults:
assert match is False
assert 1 == len(diffs)
assert TestDiffScope.STDOUT == diffs[0].scope
assert BehaviorDiffScope.STDOUT == diffs[0].scope
def test_pass_fail_only_skips_return_values(self) -> None:
"""When pass_fail_only=True, return value diffs are ignored."""
@ -265,7 +265,7 @@ class TestCompareTestResults:
assert [] == diffs
def test_multiple_diffs_collected(self) -> None:
"""Multiple mismatches produce multiple TestDiff entries."""
"""Multiple mismatches produce multiple BehaviorDiff entries."""
original = make_results(
make_invocation(
test_function="test_a",
@ -296,8 +296,8 @@ class TestCompareTestResults:
assert match is False
assert 2 == len(diffs)
scopes = {d.scope for d in diffs}
assert TestDiffScope.DID_PASS in scopes
assert TestDiffScope.RETURN_VALUE in scopes
assert BehaviorDiffScope.DID_PASS in scopes
assert BehaviorDiffScope.RETURN_VALUE in scopes
class TestPerformanceGain:
@ -348,23 +348,23 @@ class TestPerformanceGain:
assert result < 0.01
class TestTestDiffScope:
"""TestDiffScope enum values."""
class TestBehaviorDiffScope:
"""BehaviorDiffScope enum values."""
def test_values(self) -> None:
"""The three enum values exist with expected string values."""
assert "return_value" == TestDiffScope.RETURN_VALUE.value
assert "stdout" == TestDiffScope.STDOUT.value
assert "did_pass" == TestDiffScope.DID_PASS.value
assert "return_value" == BehaviorDiffScope.RETURN_VALUE.value
assert "stdout" == BehaviorDiffScope.STDOUT.value
assert "did_pass" == BehaviorDiffScope.DID_PASS.value
class TestTestDiff:
"""TestDiff frozen data class."""
class TestBehaviorDiff:
"""BehaviorDiff frozen data class."""
def test_construction(self) -> None:
"""Can construct with all fields."""
diff = TestDiff(
scope=TestDiffScope.RETURN_VALUE,
diff = BehaviorDiff(
scope=BehaviorDiffScope.RETURN_VALUE,
original_pass=True,
candidate_pass=True,
original_value="42",
@ -374,7 +374,7 @@ class TestTestDiff:
original_pytest_error=None,
)
assert TestDiffScope.RETURN_VALUE == diff.scope
assert BehaviorDiffScope.RETURN_VALUE == diff.scope
assert diff.original_pass is True
assert diff.candidate_pass is True
assert "42" == diff.original_value
@ -385,19 +385,19 @@ class TestTestDiff:
def test_frozen(self) -> None:
"""Raises on attribute assignment."""
diff = TestDiff(
scope=TestDiffScope.DID_PASS,
diff = BehaviorDiff(
scope=BehaviorDiffScope.DID_PASS,
original_pass=True,
candidate_pass=False,
)
with pytest.raises(attrs.exceptions.FrozenInstanceError):
diff.scope = TestDiffScope.STDOUT # type: ignore[misc]
diff.scope = BehaviorDiffScope.STDOUT # type: ignore[misc]
def test_default_none_fields(self) -> None:
"""Optional fields default to None."""
diff = TestDiff(
scope=TestDiffScope.STDOUT,
diff = BehaviorDiff(
scope=BehaviorDiffScope.STDOUT,
original_pass=True,
candidate_pass=True,
)