mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Rename TestDiff/TestDiffScope to BehaviorDiff/BehaviorDiffScope
These classes represent behavioral verification diffs, not tests. The Test* prefix caused pytest to attempt collection and emit warnings.
This commit is contained in:
parent
9e893675c9
commit
4f98b5421f
8 changed files with 91 additions and 91 deletions
|
|
@ -16,22 +16,22 @@ from codeflash_api.languages.python._markdown import (
|
||||||
split_markdown_code,
|
split_markdown_code,
|
||||||
)
|
)
|
||||||
from codeflash_api.repair.schemas import (
|
from codeflash_api.repair.schemas import (
|
||||||
TestDiff,
|
BehaviorDiff,
|
||||||
TestDiffScope,
|
BehaviorDiffScope,
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
SCOPE_DESCRIPTIONS: dict[TestDiffScope, str] = {
|
SCOPE_DESCRIPTIONS: dict[BehaviorDiffScope, str] = {
|
||||||
TestDiffScope.RETURN_VALUE: (
|
BehaviorDiffScope.RETURN_VALUE: (
|
||||||
"The function returned a different value in the"
|
"The function returned a different value in the"
|
||||||
" optimized code compared to the original."
|
" optimized code compared to the original."
|
||||||
),
|
),
|
||||||
TestDiffScope.STDOUT: (
|
BehaviorDiffScope.STDOUT: (
|
||||||
"The output printed to stdout is different in the"
|
"The output printed to stdout is different in the"
|
||||||
" optimized code compared to the original."
|
" optimized code compared to the original."
|
||||||
),
|
),
|
||||||
TestDiffScope.DID_PASS: (
|
BehaviorDiffScope.DID_PASS: (
|
||||||
"The test passed in one version but failed in the"
|
"The test passed in one version but failed in the"
|
||||||
" other (a change in pass/fail behavior)."
|
" other (a change in pass/fail behavior)."
|
||||||
),
|
),
|
||||||
|
|
@ -39,7 +39,7 @@ SCOPE_DESCRIPTIONS: dict[TestDiffScope, str] = {
|
||||||
|
|
||||||
|
|
||||||
def build_test_details(
|
def build_test_details(
|
||||||
test_diffs: list[TestDiff],
|
test_diffs: list[BehaviorDiff],
|
||||||
language: str = "python",
|
language: str = "python",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -59,7 +59,7 @@ def build_test_details(
|
||||||
|
|
||||||
|
|
||||||
def _format_single_diff(
|
def _format_single_diff(
|
||||||
diff: TestDiff,
|
diff: BehaviorDiff,
|
||||||
sections: defaultdict[str, list[str]],
|
sections: defaultdict[str, list[str]],
|
||||||
seen_headers: set[str],
|
seen_headers: set[str],
|
||||||
test_error_label: str,
|
test_error_label: str,
|
||||||
|
|
@ -92,7 +92,7 @@ def _format_single_diff(
|
||||||
|
|
||||||
scope_desc = SCOPE_DESCRIPTIONS.get(diff.scope, diff.scope.value)
|
scope_desc = SCOPE_DESCRIPTIONS.get(diff.scope, diff.scope.value)
|
||||||
detail_lines = [f"- {scope_desc}"]
|
detail_lines = [f"- {scope_desc}"]
|
||||||
if diff.scope != TestDiffScope.DID_PASS:
|
if diff.scope != BehaviorDiffScope.DID_PASS:
|
||||||
detail_lines.append(f" Expected: {diff.original_value!r}")
|
detail_lines.append(f" Expected: {diff.original_value!r}")
|
||||||
detail_lines.append(f" Got: {diff.candidate_value!r}")
|
detail_lines.append(f" Got: {diff.candidate_value!r}")
|
||||||
else:
|
else:
|
||||||
|
|
@ -110,7 +110,7 @@ def build_user_prompt(
|
||||||
template: str,
|
template: str,
|
||||||
original_source_code: str,
|
original_source_code: str,
|
||||||
modified_source_code: str,
|
modified_source_code: str,
|
||||||
test_diffs: list[TestDiff],
|
test_diffs: list[BehaviorDiff],
|
||||||
language: str = "python",
|
language: str = "python",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from typing import Any
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class TestDiffScope(str, enum.Enum):
|
class BehaviorDiffScope(str, enum.Enum):
|
||||||
"""
|
"""
|
||||||
The dimension on which a test diff was observed.
|
The dimension on which a test diff was observed.
|
||||||
"""
|
"""
|
||||||
|
|
@ -17,12 +17,12 @@ class TestDiffScope(str, enum.Enum):
|
||||||
TIMED_OUT = "timed_out"
|
TIMED_OUT = "timed_out"
|
||||||
|
|
||||||
|
|
||||||
class TestDiff(BaseModel):
|
class BehaviorDiff(BaseModel):
|
||||||
"""
|
"""
|
||||||
A single behavioural difference between original and optimised code.
|
A single behavioural difference between original and optimised code.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
scope: TestDiffScope
|
scope: BehaviorDiffScope
|
||||||
original_value: (
|
original_value: (
|
||||||
bool | str | int | float | dict[str, Any] | list[Any] | None
|
bool | str | int | float | dict[str, Any] | list[Any] | None
|
||||||
) = None
|
) = None
|
||||||
|
|
@ -45,7 +45,7 @@ class CodeRepairRequest(BaseModel):
|
||||||
optimization_id: str
|
optimization_id: str
|
||||||
original_source_code: str
|
original_source_code: str
|
||||||
modified_source_code: str
|
modified_source_code: str
|
||||||
test_diffs: list[TestDiff]
|
test_diffs: list[BehaviorDiff]
|
||||||
language: str = "python"
|
language: str = "python"
|
||||||
rerun_trace_id: str | None = None
|
rerun_trace_id: str | None = None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,8 @@ from codeflash_api.repair._context import (
|
||||||
)
|
)
|
||||||
from codeflash_api.repair.schemas import (
|
from codeflash_api.repair.schemas import (
|
||||||
CodeRepairRequest,
|
CodeRepairRequest,
|
||||||
TestDiff,
|
BehaviorDiff,
|
||||||
TestDiffScope,
|
BehaviorDiffScope,
|
||||||
)
|
)
|
||||||
|
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
|
|
@ -42,8 +42,8 @@ class TestCodeRepairRequest:
|
||||||
"""
|
"""
|
||||||
A request with all fields deserializes.
|
A request with all fields deserializes.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value="1",
|
original_value="1",
|
||||||
candidate_value="2",
|
candidate_value="2",
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -60,18 +60,18 @@ class TestCodeRepairRequest:
|
||||||
rerun_trace_id=str(uuid.uuid4()),
|
rerun_trace_id=str(uuid.uuid4()),
|
||||||
)
|
)
|
||||||
assert 1 == len(req.test_diffs)
|
assert 1 == len(req.test_diffs)
|
||||||
assert TestDiffScope.RETURN_VALUE == req.test_diffs[0].scope
|
assert BehaviorDiffScope.RETURN_VALUE == req.test_diffs[0].scope
|
||||||
|
|
||||||
|
|
||||||
class TestTestDiff:
|
class TestBehaviorDiff:
|
||||||
"""Tests for TestDiff schema."""
|
"""Tests for BehaviorDiff schema."""
|
||||||
|
|
||||||
def test_all_scopes(self):
|
def test_all_scopes(self):
|
||||||
"""
|
"""
|
||||||
Every scope enum value can be used.
|
Every scope enum value can be used.
|
||||||
"""
|
"""
|
||||||
for scope in TestDiffScope:
|
for scope in BehaviorDiffScope:
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=scope,
|
scope=scope,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=False,
|
candidate_pass=False,
|
||||||
|
|
@ -82,8 +82,8 @@ class TestTestDiff:
|
||||||
"""
|
"""
|
||||||
Optional fields default to None.
|
Optional fields default to None.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.DID_PASS,
|
scope=BehaviorDiffScope.DID_PASS,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=False,
|
candidate_pass=False,
|
||||||
)
|
)
|
||||||
|
|
@ -112,8 +112,8 @@ class TestBuildTestDetails:
|
||||||
"""
|
"""
|
||||||
Return value diffs show Expected/Got lines.
|
Return value diffs show Expected/Got lines.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value="hello",
|
original_value="hello",
|
||||||
candidate_value="world",
|
candidate_value="world",
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -129,8 +129,8 @@ class TestBuildTestDetails:
|
||||||
"""
|
"""
|
||||||
DID_PASS diffs show pass/fail status.
|
DID_PASS diffs show pass/fail status.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.DID_PASS,
|
scope=BehaviorDiffScope.DID_PASS,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=False,
|
candidate_pass=False,
|
||||||
test_src_code="test_something",
|
test_src_code="test_something",
|
||||||
|
|
@ -143,8 +143,8 @@ class TestBuildTestDetails:
|
||||||
"""
|
"""
|
||||||
Pytest errors appear in the output when present.
|
Pytest errors appear in the output when present.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=1,
|
original_value=1,
|
||||||
candidate_value=2,
|
candidate_value=2,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -161,16 +161,16 @@ class TestBuildTestDetails:
|
||||||
Multiple diffs for the same test source share a header.
|
Multiple diffs for the same test source share a header.
|
||||||
"""
|
"""
|
||||||
diffs = [
|
diffs = [
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=1,
|
original_value=1,
|
||||||
candidate_value=2,
|
candidate_value=2,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=False,
|
candidate_pass=False,
|
||||||
test_src_code="test_func",
|
test_src_code="test_func",
|
||||||
),
|
),
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.STDOUT,
|
scope=BehaviorDiffScope.STDOUT,
|
||||||
original_value="out",
|
original_value="out",
|
||||||
candidate_value="err",
|
candidate_value="err",
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -186,16 +186,16 @@ class TestBuildTestDetails:
|
||||||
Different test sources get separate sections.
|
Different test sources get separate sections.
|
||||||
"""
|
"""
|
||||||
diffs = [
|
diffs = [
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=1,
|
original_value=1,
|
||||||
candidate_value=2,
|
candidate_value=2,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=False,
|
candidate_pass=False,
|
||||||
test_src_code="test_a",
|
test_src_code="test_a",
|
||||||
),
|
),
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=3,
|
original_value=3,
|
||||||
candidate_value=4,
|
candidate_value=4,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -211,8 +211,8 @@ class TestBuildTestDetails:
|
||||||
"""
|
"""
|
||||||
Non-python language uses 'Test error' label.
|
Non-python language uses 'Test error' label.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=1,
|
original_value=1,
|
||||||
candidate_value=2,
|
candidate_value=2,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -227,8 +227,8 @@ class TestBuildTestDetails:
|
||||||
"""
|
"""
|
||||||
Missing test source shows 'Not available'.
|
Missing test source shows 'Not available'.
|
||||||
"""
|
"""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=1,
|
original_value=1,
|
||||||
candidate_value=2,
|
candidate_value=2,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
@ -255,8 +255,8 @@ class TestBuildUserPrompt:
|
||||||
"Modified: {modified_source_code}\n"
|
"Modified: {modified_source_code}\n"
|
||||||
"Tests: {test_details}"
|
"Tests: {test_details}"
|
||||||
)
|
)
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=1,
|
original_value=1,
|
||||||
candidate_value=2,
|
candidate_value=2,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
|
|
|
||||||
|
|
@ -3,17 +3,17 @@
|
||||||
from ._baseline import establish_original_code_baseline
|
from ._baseline import establish_original_code_baseline
|
||||||
from ._verification import compare_test_results
|
from ._verification import compare_test_results
|
||||||
from .models import (
|
from .models import (
|
||||||
|
BehaviorDiff,
|
||||||
|
BehaviorDiffScope,
|
||||||
OptimizedCandidateResult,
|
OptimizedCandidateResult,
|
||||||
OriginalCodeBaseline,
|
OriginalCodeBaseline,
|
||||||
TestDiff,
|
|
||||||
TestDiffScope,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
"BehaviorDiff",
|
||||||
|
"BehaviorDiffScope",
|
||||||
"OptimizedCandidateResult",
|
"OptimizedCandidateResult",
|
||||||
"OriginalCodeBaseline",
|
"OriginalCodeBaseline",
|
||||||
"TestDiff",
|
|
||||||
"TestDiffScope",
|
|
||||||
"compare_test_results",
|
"compare_test_results",
|
||||||
"establish_original_code_baseline",
|
"establish_original_code_baseline",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from typing import TYPE_CHECKING
|
||||||
from .._model import VerificationType
|
from .._model import VerificationType
|
||||||
from ..test_discovery.models import TestType
|
from ..test_discovery.models import TestType
|
||||||
from ._comparator import comparator
|
from ._comparator import comparator
|
||||||
from .models import TestDiff, TestDiffScope
|
from .models import BehaviorDiff, BehaviorDiffScope
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from ..testing.models import TestResults
|
from ..testing.models import TestResults
|
||||||
|
|
@ -22,7 +22,7 @@ INCREASED_RECURSION_LIMIT = 5000
|
||||||
|
|
||||||
_reprlib_repr = reprlib.Repr()
|
_reprlib_repr = reprlib.Repr()
|
||||||
_reprlib_repr.maxstring = 1500
|
_reprlib_repr.maxstring = 1500
|
||||||
_test_diff_repr = _reprlib_repr.repr
|
_behavior_diff_repr = _reprlib_repr.repr
|
||||||
|
|
||||||
|
|
||||||
def safe_repr(obj: object) -> str:
|
def safe_repr(obj: object) -> str:
|
||||||
|
|
@ -48,7 +48,7 @@ def compare_test_results( # noqa: C901, PLR0912
|
||||||
original_results: TestResults,
|
original_results: TestResults,
|
||||||
candidate_results: TestResults,
|
candidate_results: TestResults,
|
||||||
pass_fail_only: bool = False, # noqa: FBT001, FBT002
|
pass_fail_only: bool = False, # noqa: FBT001, FBT002
|
||||||
) -> tuple[bool, list[TestDiff]]:
|
) -> tuple[bool, list[BehaviorDiff]]:
|
||||||
"""Compare original and candidate test results for behavioral equivalence.
|
"""Compare original and candidate test results for behavioral equivalence.
|
||||||
|
|
||||||
Returns a tuple of (all_match, diffs). When *pass_fail_only* is True,
|
Returns a tuple of (all_match, diffs). When *pass_fail_only* is True,
|
||||||
|
|
@ -66,7 +66,7 @@ def compare_test_results( # noqa: C901, PLR0912
|
||||||
| candidate_results.get_all_unique_invocation_loop_ids()
|
| candidate_results.get_all_unique_invocation_loop_ids()
|
||||||
)
|
)
|
||||||
|
|
||||||
test_diffs: list[TestDiff] = []
|
test_diffs: list[BehaviorDiff] = []
|
||||||
did_all_timeout = True
|
did_all_timeout = True
|
||||||
|
|
||||||
for test_id in test_ids_superset:
|
for test_id in test_ids_superset:
|
||||||
|
|
@ -143,8 +143,8 @@ def compare_test_results( # noqa: C901, PLR0912
|
||||||
TestType.REPLAY_TEST,
|
TestType.REPLAY_TEST,
|
||||||
} and (cdd_test_result.did_pass != original_test_result.did_pass):
|
} and (cdd_test_result.did_pass != original_test_result.did_pass):
|
||||||
test_diffs.append(
|
test_diffs.append(
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.DID_PASS,
|
scope=BehaviorDiffScope.DID_PASS,
|
||||||
original_value=str(original_test_result.did_pass),
|
original_value=str(original_test_result.did_pass),
|
||||||
candidate_value=str(cdd_test_result.did_pass),
|
candidate_value=str(cdd_test_result.did_pass),
|
||||||
test_src_code=(
|
test_src_code=(
|
||||||
|
|
@ -164,14 +164,14 @@ def compare_test_results( # noqa: C901, PLR0912
|
||||||
superset_obj=superset_obj,
|
superset_obj=superset_obj,
|
||||||
):
|
):
|
||||||
test_diffs.append(
|
test_diffs.append(
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_value=_test_diff_repr(
|
original_value=_behavior_diff_repr(
|
||||||
safe_repr(
|
safe_repr(
|
||||||
original_test_result.return_value,
|
original_test_result.return_value,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
candidate_value=_test_diff_repr(
|
candidate_value=_behavior_diff_repr(
|
||||||
safe_repr(
|
safe_repr(
|
||||||
cdd_test_result.return_value,
|
cdd_test_result.return_value,
|
||||||
),
|
),
|
||||||
|
|
@ -214,8 +214,8 @@ def compare_test_results( # noqa: C901, PLR0912
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
test_diffs.append(
|
test_diffs.append(
|
||||||
TestDiff(
|
BehaviorDiff(
|
||||||
scope=TestDiffScope.STDOUT,
|
scope=BehaviorDiffScope.STDOUT,
|
||||||
original_value=str(original_test_result.stdout),
|
original_value=str(original_test_result.stdout),
|
||||||
candidate_value=str(cdd_test_result.stdout),
|
candidate_value=str(cdd_test_result.stdout),
|
||||||
test_src_code=(
|
test_src_code=(
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ if TYPE_CHECKING:
|
||||||
from ..testing.models import TestResults
|
from ..testing.models import TestResults
|
||||||
|
|
||||||
|
|
||||||
class TestDiffScope(str, enum.Enum):
|
class BehaviorDiffScope(str, enum.Enum):
|
||||||
"""Scope of a behavioral difference between original and candidate."""
|
"""Scope of a behavioral difference between original and candidate."""
|
||||||
|
|
||||||
RETURN_VALUE = "return_value"
|
RETURN_VALUE = "return_value"
|
||||||
|
|
@ -23,10 +23,10 @@ class TestDiffScope(str, enum.Enum):
|
||||||
|
|
||||||
|
|
||||||
@attrs.frozen
|
@attrs.frozen
|
||||||
class TestDiff:
|
class BehaviorDiff:
|
||||||
"""A single behavioral difference between original and candidate."""
|
"""A single behavioral difference between original and candidate."""
|
||||||
|
|
||||||
scope: TestDiffScope
|
scope: BehaviorDiffScope
|
||||||
original_pass: bool
|
original_pass: bool
|
||||||
candidate_pass: bool
|
candidate_pass: bool
|
||||||
original_value: str | None = None
|
original_value: str | None = None
|
||||||
|
|
|
||||||
|
|
@ -501,16 +501,16 @@ class TestRunTestsAndBenchmark:
|
||||||
) -> None:
|
) -> None:
|
||||||
"""When behavioral tests fail with diffs, diffs are stored."""
|
"""When behavioral tests fail with diffs, diffs are stored."""
|
||||||
from codeflash_python.verification.models import (
|
from codeflash_python.verification.models import (
|
||||||
TestDiff,
|
BehaviorDiff,
|
||||||
TestDiffScope,
|
BehaviorDiffScope,
|
||||||
)
|
)
|
||||||
|
|
||||||
eval_ctx = EvaluationContext()
|
eval_ctx = EvaluationContext()
|
||||||
failed_diffs: dict[str, list[Any]] = {}
|
failed_diffs: dict[str, list[Any]] = {}
|
||||||
bench_results: dict[str, TestResults] = {}
|
bench_results: dict[str, TestResults] = {}
|
||||||
|
|
||||||
diff_obj = TestDiff(
|
diff_obj = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=True,
|
candidate_pass=True,
|
||||||
original_value="42",
|
original_value="42",
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,8 @@ from codeflash_python.verification._verification import (
|
||||||
)
|
)
|
||||||
from codeflash_python.verification.models import (
|
from codeflash_python.verification.models import (
|
||||||
OptimizedCandidateResult,
|
OptimizedCandidateResult,
|
||||||
TestDiff,
|
BehaviorDiff,
|
||||||
TestDiffScope,
|
BehaviorDiffScope,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -125,7 +125,7 @@ class TestCompareTestResults:
|
||||||
|
|
||||||
assert match is False
|
assert match is False
|
||||||
assert 1 == len(diffs)
|
assert 1 == len(diffs)
|
||||||
assert TestDiffScope.DID_PASS == diffs[0].scope
|
assert BehaviorDiffScope.DID_PASS == diffs[0].scope
|
||||||
assert diffs[0].original_pass is True
|
assert diffs[0].original_pass is True
|
||||||
assert diffs[0].candidate_pass is False
|
assert diffs[0].candidate_pass is False
|
||||||
|
|
||||||
|
|
@ -142,7 +142,7 @@ class TestCompareTestResults:
|
||||||
|
|
||||||
assert match is False
|
assert match is False
|
||||||
assert 1 == len(diffs)
|
assert 1 == len(diffs)
|
||||||
assert TestDiffScope.RETURN_VALUE == diffs[0].scope
|
assert BehaviorDiffScope.RETURN_VALUE == diffs[0].scope
|
||||||
|
|
||||||
def test_stdout_mismatch(self) -> None:
|
def test_stdout_mismatch(self) -> None:
|
||||||
"""Same return values but different stdout produces STDOUT diff."""
|
"""Same return values but different stdout produces STDOUT diff."""
|
||||||
|
|
@ -165,7 +165,7 @@ class TestCompareTestResults:
|
||||||
|
|
||||||
assert match is False
|
assert match is False
|
||||||
assert 1 == len(diffs)
|
assert 1 == len(diffs)
|
||||||
assert TestDiffScope.STDOUT == diffs[0].scope
|
assert BehaviorDiffScope.STDOUT == diffs[0].scope
|
||||||
|
|
||||||
def test_pass_fail_only_skips_return_values(self) -> None:
|
def test_pass_fail_only_skips_return_values(self) -> None:
|
||||||
"""When pass_fail_only=True, return value diffs are ignored."""
|
"""When pass_fail_only=True, return value diffs are ignored."""
|
||||||
|
|
@ -265,7 +265,7 @@ class TestCompareTestResults:
|
||||||
assert [] == diffs
|
assert [] == diffs
|
||||||
|
|
||||||
def test_multiple_diffs_collected(self) -> None:
|
def test_multiple_diffs_collected(self) -> None:
|
||||||
"""Multiple mismatches produce multiple TestDiff entries."""
|
"""Multiple mismatches produce multiple BehaviorDiff entries."""
|
||||||
original = make_results(
|
original = make_results(
|
||||||
make_invocation(
|
make_invocation(
|
||||||
test_function="test_a",
|
test_function="test_a",
|
||||||
|
|
@ -296,8 +296,8 @@ class TestCompareTestResults:
|
||||||
assert match is False
|
assert match is False
|
||||||
assert 2 == len(diffs)
|
assert 2 == len(diffs)
|
||||||
scopes = {d.scope for d in diffs}
|
scopes = {d.scope for d in diffs}
|
||||||
assert TestDiffScope.DID_PASS in scopes
|
assert BehaviorDiffScope.DID_PASS in scopes
|
||||||
assert TestDiffScope.RETURN_VALUE in scopes
|
assert BehaviorDiffScope.RETURN_VALUE in scopes
|
||||||
|
|
||||||
|
|
||||||
class TestPerformanceGain:
|
class TestPerformanceGain:
|
||||||
|
|
@ -348,23 +348,23 @@ class TestPerformanceGain:
|
||||||
assert result < 0.01
|
assert result < 0.01
|
||||||
|
|
||||||
|
|
||||||
class TestTestDiffScope:
|
class TestBehaviorDiffScope:
|
||||||
"""TestDiffScope enum values."""
|
"""BehaviorDiffScope enum values."""
|
||||||
|
|
||||||
def test_values(self) -> None:
|
def test_values(self) -> None:
|
||||||
"""The three enum values exist with expected string values."""
|
"""The three enum values exist with expected string values."""
|
||||||
assert "return_value" == TestDiffScope.RETURN_VALUE.value
|
assert "return_value" == BehaviorDiffScope.RETURN_VALUE.value
|
||||||
assert "stdout" == TestDiffScope.STDOUT.value
|
assert "stdout" == BehaviorDiffScope.STDOUT.value
|
||||||
assert "did_pass" == TestDiffScope.DID_PASS.value
|
assert "did_pass" == BehaviorDiffScope.DID_PASS.value
|
||||||
|
|
||||||
|
|
||||||
class TestTestDiff:
|
class TestBehaviorDiff:
|
||||||
"""TestDiff frozen data class."""
|
"""BehaviorDiff frozen data class."""
|
||||||
|
|
||||||
def test_construction(self) -> None:
|
def test_construction(self) -> None:
|
||||||
"""Can construct with all fields."""
|
"""Can construct with all fields."""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.RETURN_VALUE,
|
scope=BehaviorDiffScope.RETURN_VALUE,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=True,
|
candidate_pass=True,
|
||||||
original_value="42",
|
original_value="42",
|
||||||
|
|
@ -374,7 +374,7 @@ class TestTestDiff:
|
||||||
original_pytest_error=None,
|
original_pytest_error=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert TestDiffScope.RETURN_VALUE == diff.scope
|
assert BehaviorDiffScope.RETURN_VALUE == diff.scope
|
||||||
assert diff.original_pass is True
|
assert diff.original_pass is True
|
||||||
assert diff.candidate_pass is True
|
assert diff.candidate_pass is True
|
||||||
assert "42" == diff.original_value
|
assert "42" == diff.original_value
|
||||||
|
|
@ -385,19 +385,19 @@ class TestTestDiff:
|
||||||
|
|
||||||
def test_frozen(self) -> None:
|
def test_frozen(self) -> None:
|
||||||
"""Raises on attribute assignment."""
|
"""Raises on attribute assignment."""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.DID_PASS,
|
scope=BehaviorDiffScope.DID_PASS,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=False,
|
candidate_pass=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(attrs.exceptions.FrozenInstanceError):
|
with pytest.raises(attrs.exceptions.FrozenInstanceError):
|
||||||
diff.scope = TestDiffScope.STDOUT # type: ignore[misc]
|
diff.scope = BehaviorDiffScope.STDOUT # type: ignore[misc]
|
||||||
|
|
||||||
def test_default_none_fields(self) -> None:
|
def test_default_none_fields(self) -> None:
|
||||||
"""Optional fields default to None."""
|
"""Optional fields default to None."""
|
||||||
diff = TestDiff(
|
diff = BehaviorDiff(
|
||||||
scope=TestDiffScope.STDOUT,
|
scope=BehaviorDiffScope.STDOUT,
|
||||||
original_pass=True,
|
original_pass=True,
|
||||||
candidate_pass=True,
|
candidate_pass=True,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue