codeflash-agent/packages/codeflash-python/tests/test_model_test_results.py

from __future__ import annotations

from pathlib import Path

import pytest

from codeflash_python._model import VerificationType
from codeflash_python.benchmarking.models import BenchmarkKey
from codeflash_python.test_discovery.models import TestType
from codeflash_python.testing.models import (
    FunctionTestInvocation,
    InvocationId,
    TestConfig,
    TestFile,
    TestFiles,
    TestResults,
)


def make_invocation_id(
    *,
    module: str = "tests.test_foo",
    cls: str | None = "TestFoo",
    func: str | None = "test_bar",
    target: str = "bar",
    iteration: str | None = "0",
) -> InvocationId:
    """Create an InvocationId with sensible defaults."""
    return InvocationId(
        test_module_path=module,
        test_class_name=cls,
        test_function_name=func,
        function_getting_tested=target,
        iteration_id=iteration,
    )


def make_invocation(
    *,
    loop_index: int = 0,
    inv_id: InvocationId | None = None,
    did_pass: bool = True,
    runtime: int | None = 100,
) -> FunctionTestInvocation:
    """Create a FunctionTestInvocation with sensible defaults."""
    return FunctionTestInvocation(
        loop_index=loop_index,
        id=inv_id or make_invocation_id(),
        file_name=Path("tests/test_foo.py"),
        did_pass=did_pass,
        runtime=runtime,
        test_framework="pytest",
        test_type=TestType.EXISTING_UNIT_TEST,
        return_value=None,
        cpu_runtime=0,
        timed_out=False,
    )


class TestInvocationId:
    """InvocationId identity and parsing."""

    def test_id_with_class(self) -> None:
        """id() includes class prefix when test_class_name is set."""
        inv = make_invocation_id(cls="TestFoo", func="test_bar")
        assert "tests.test_foo:TestFoo.test_bar:bar:0" == inv.id()

    def test_id_without_class(self) -> None:
        """id() has no class prefix when test_class_name is None."""
        inv = make_invocation_id(cls=None, func="test_bar")
        assert "tests.test_foo:test_bar:bar:0" == inv.id()

    def test_fn_qualified_name_with_class(self) -> None:
        """Returns 'Class.function' when class is present."""
        inv = make_invocation_id(cls="TestFoo", func="test_bar")
        assert "TestFoo.test_bar" == inv.test_fn_qualified_name()

    def test_fn_qualified_name_without_class(self) -> None:
        """Returns just 'function' when class is None."""
        inv = make_invocation_id(cls=None, func="test_bar")
        assert "test_bar" == inv.test_fn_qualified_name()

    def test_from_str_id_with_class(self) -> None:
        """Parses 'module:Class.test:func:iter' correctly."""
        result = InvocationId.from_str_id(
            "tests.test_foo:TestFoo.test_bar:bar:0",
        )
        assert "tests.test_foo" == result.test_module_path
        assert "TestFoo" == result.test_class_name
        assert "test_bar" == result.test_function_name
        assert "bar" == result.function_getting_tested
        assert "0" == result.iteration_id

    def test_from_str_id_without_class(self) -> None:
        """Parses 'module:test:func:iter' when no class present."""
        result = InvocationId.from_str_id(
            "tests.test_foo:test_bar:bar:0",
        )
        assert result.test_class_name is None
        assert "test_bar" == result.test_function_name

    def test_from_str_id_with_iteration_override(self) -> None:
        """iteration_id parameter overrides the one in the string."""
        result = InvocationId.from_str_id(
            "tests.test_foo:test_bar:bar:0",
            iteration_id="5",
        )
        assert "5" == result.iteration_id

    def test_from_str_id_invalid(self) -> None:
        """Raises ValueError for malformed input."""
        with pytest.raises(ValueError, match="Expected 4"):
            InvocationId.from_str_id("bad:input")

    def test_frozen(self) -> None:
        """Cannot set attributes on frozen instance."""
        inv = make_invocation_id()
        with pytest.raises(AttributeError):
            inv.test_module_path = "other"  # type: ignore[misc]


class TestFunctionTestInvocation:
    """FunctionTestInvocation data and properties."""

    def test_unique_invocation_loop_id(self) -> None:
        """Combines loop_index and id string."""
        inv = make_invocation(loop_index=3)
        expected = f"3:{inv.id.id()}"
        assert expected == inv.unique_invocation_loop_id

    def test_default_verification_type(self) -> None:
        """Defaults to FUNCTION_CALL when not specified."""
        inv = make_invocation()
        assert VerificationType.FUNCTION_CALL == inv.verification_type

    def test_explicit_verification_type(self) -> None:
        """Accepts explicit verification type."""
        inv = FunctionTestInvocation(
            loop_index=0,
            id=make_invocation_id(),
            file_name=Path("tests/test_foo.py"),
            did_pass=True,
            runtime=100,
            test_framework="pytest",
            test_type=TestType.EXISTING_UNIT_TEST,
            return_value=None,
            cpu_runtime=0,
            timed_out=False,
            verification_type=VerificationType.INIT_STATE_FTO,
        )
        assert VerificationType.INIT_STATE_FTO == inv.verification_type

    def test_frozen(self) -> None:
        """Cannot modify attributes on frozen instance."""
        inv = make_invocation()
        with pytest.raises(AttributeError):
            inv.did_pass = False  # type: ignore[misc]


class TestTestResults:
    """TestResults collection behavior."""

    def test_add_and_len(self) -> None:
        """Adding an invocation increases length."""
        results = TestResults()
        results.add(make_invocation())
        assert 1 == len(results)

    def test_add_dedup(self) -> None:
        """Adding same uid twice only stores once."""
        inv = make_invocation()
        results = TestResults()
        results.add(inv)
        results.add(inv)
        assert 1 == len(results)

    def test_merge(self) -> None:
        """Merges two TestResults together."""
        r1 = TestResults()
        r1.add(make_invocation(loop_index=0))
        r2 = TestResults()
        r2.add(make_invocation(loop_index=1))
        r1.merge(r2)
        assert 2 == len(r1)

    def test_merge_duplicate_raises(self) -> None:
        """Duplicate uid in merge raises ValueError."""
        inv = make_invocation()
        r1 = TestResults()
        r1.add(inv)
        r2 = TestResults()
        r2.add(inv)
        with pytest.raises(ValueError, match="Duplicate"):
            r1.merge(r2)

    def test_get_by_uid(self) -> None:
        """Lookup by unique_invocation_loop_id returns the invocation."""
        inv = make_invocation()
        results = TestResults()
        results.add(inv)
        found = results.get_by_unique_invocation_loop_id(
            inv.unique_invocation_loop_id,
        )
        assert inv == found

    def test_get_by_uid_missing(self) -> None:
        """Returns None for unknown uid."""
        results = TestResults()
        assert results.get_by_unique_invocation_loop_id("x") is None

    def test_number_of_loops(self) -> None:
        """Returns max loop_index across all results."""
        results = TestResults()
        results.add(make_invocation(loop_index=0))
        results.add(make_invocation(loop_index=3))
        assert 3 == results.number_of_loops()

    def test_number_of_loops_empty(self) -> None:
        """Returns 0 for empty results."""
        assert 0 == TestResults().number_of_loops()

    def test_total_passed_runtime(self) -> None:
        """Sum of minimum runtimes across passing test cases."""
        inv_id = make_invocation_id()
        results = TestResults()
        results.add(
            make_invocation(loop_index=0, inv_id=inv_id, runtime=200),
        )
        results.add(
            make_invocation(loop_index=1, inv_id=inv_id, runtime=100),
        )
        assert 100 == results.total_passed_runtime()

    def test_total_passed_runtime_excludes_failed(self) -> None:
        """Failed invocations are excluded from runtime sum."""
        results = TestResults()
        results.add(make_invocation(loop_index=0, runtime=200))
        results.add(
            make_invocation(
                loop_index=1,
                inv_id=make_invocation_id(func="test_fail"),
                did_pass=False,
                runtime=50,
            ),
        )
        assert 200 == results.total_passed_runtime()

    def test_iter_and_bool(self) -> None:
        """Iteration yields invocations; empty is falsy, non-empty truthy."""
        results = TestResults()
        assert not results
        inv = make_invocation()
        results.add(inv)
        assert results
        assert [inv] == list(results)

    def test_contains(self) -> None:
        """Invocation in results returns True."""
        inv = make_invocation()
        results = TestResults()
        results.add(inv)
        assert inv in results

    def test_getitem(self) -> None:
        """Index access returns the correct invocation."""
        inv = make_invocation()
        results = TestResults()
        results.add(inv)
        assert inv == results[0]


class TestTestFile:
    """TestFile and TestFiles collection behavior."""

    def test_get_test_type_by_instrumented_path(
        self,
        tmp_path: Path,
    ) -> None:
        """Finds matching test type by instrumented file path."""
        instrumented = tmp_path / "instrumented_test.py"
        instrumented.touch()
        tf = TestFile(
            original_file_path=tmp_path / "test_orig.py",
            instrumented_behavior_file_path=instrumented,
            test_type=TestType.GENERATED_REGRESSION,
        )
        files = TestFiles(test_files=[tf])
        result = files.get_test_type_by_instrumented_file_path(
            instrumented,
        )
        assert TestType.GENERATED_REGRESSION == result

    def test_get_test_type_by_original_path(
        self,
        tmp_path: Path,
    ) -> None:
        """Finds test type by original file path."""
        original = tmp_path / "test_orig.py"
        original.touch()
        tf = TestFile(original_file_path=original)
        files = TestFiles(test_files=[tf])
        result = files.get_test_type_by_original_file_path(original)
        assert TestType.EXISTING_UNIT_TEST == result

    def test_get_test_type_missing(self) -> None:
        """Returns None for unknown path."""
        files = TestFiles()
        result = files.get_test_type_by_instrumented_file_path(
            Path("/nonexistent.py"),
        )
        assert result is None


class TestTestConfig:
    """TestConfig defaults and construction."""

    def test_config_defaults(self) -> None:
        """test_framework defaults to 'pytest'."""
        config = TestConfig(tests_project_rootdir=Path("/project"))
        assert "pytest" == config.test_framework
        assert "pytest" == config.pytest_cmd

    def test_frozen(self) -> None:
        """Cannot modify attributes on frozen instance."""
        config = TestConfig(tests_project_rootdir=Path("/project"))
        with pytest.raises(AttributeError):
            config.test_framework = "unittest"  # type: ignore[misc]


def _make_replay_invocation(
    *,
    module: str,
    func: str = "test_replay",
    loop_index: int = 0,
    runtime: int = 100,
) -> FunctionTestInvocation:
    """Create a REPLAY_TEST invocation with a given module path."""
    return FunctionTestInvocation(
        loop_index=loop_index,
        id=InvocationId(
            test_module_path=module,
            test_class_name=None,
            test_function_name=func,
            function_getting_tested="target",
            iteration_id="0",
        ),
        file_name=Path("tests/test_replay.py"),
        did_pass=True,
        runtime=runtime,
        test_framework="pytest",
        test_type=TestType.REPLAY_TEST,
        return_value=None,
        cpu_runtime=0,
        timed_out=False,
    )


class TestGroupByBenchmarks:
    """TestResults.group_by_benchmarks grouping behaviour."""

    def test_groups_replay_results_by_benchmark_key(
        self,
        tmp_path: Path,
    ) -> None:
        """Replay results are grouped under matching benchmark keys."""
        project_root = tmp_path
        replay_dir = tmp_path / "replay"
        replay_dir.mkdir()

        bk = BenchmarkKey(
            module_path="benchmarks.test_sort",
            function_name="sort_fn",
        )
        # module_name_from_file_path converts the replay dir path
        # into a dotted prefix: replay/test_benchmarks_test_sort__replay_test_
        # => replay.test_benchmarks_test_sort__replay_test_
        expected_prefix = "replay.test_benchmarks_test_sort__replay_test_"

        results = TestResults()
        matching = _make_replay_invocation(
            module=expected_prefix + "0",
            runtime=200,
        )
        non_matching = _make_replay_invocation(
            module="other.module",
            func="test_other",
            runtime=50,
        )
        results.add(matching)
        results.add(non_matching)

        grouped = results.group_by_benchmarks(
            [bk],
            replay_dir,
            project_root,
        )
        assert bk in grouped
        assert 1 == len(grouped[bk])
        assert matching in grouped[bk]

    def test_non_replay_results_are_excluded(
        self,
        tmp_path: Path,
    ) -> None:
        """Only REPLAY_TEST results are included in grouping."""
        project_root = tmp_path
        replay_dir = tmp_path / "replay"
        replay_dir.mkdir()

        bk = BenchmarkKey(
            module_path="benchmarks.test_sort",
            function_name="sort_fn",
        )
        prefix = "replay.test_benchmarks_test_sort__replay_test_"

        results = TestResults()
        # An existing unit test whose module path happens to match.
        unit_inv = FunctionTestInvocation(
            loop_index=0,
            id=InvocationId(
                test_module_path=prefix + "0",
                test_class_name=None,
                test_function_name="test_unit",
                function_getting_tested="target",
                iteration_id="0",
            ),
            file_name=Path("tests/test_unit.py"),
            did_pass=True,
            runtime=100,
            test_framework="pytest",
            test_type=TestType.EXISTING_UNIT_TEST,
            return_value=None,
            cpu_runtime=0,
            timed_out=False,
        )
        results.add(unit_inv)

        grouped = results.group_by_benchmarks(
            [bk],
            replay_dir,
            project_root,
        )
        assert 0 == len(grouped[bk])

    def test_empty_results_returns_empty_groups(
        self,
        tmp_path: Path,
    ) -> None:
        """Empty TestResults produces empty groups."""
        project_root = tmp_path
        replay_dir = tmp_path / "replay"
        replay_dir.mkdir()

        bk = BenchmarkKey(
            module_path="benchmarks.test_sort",
            function_name="sort_fn",
        )
        results = TestResults()
        grouped = results.group_by_benchmarks(
            [bk],
            replay_dir,
            project_root,
        )
        assert 0 == len(grouped[bk])

    def test_multiple_benchmark_keys(
        self,
        tmp_path: Path,
    ) -> None:
        """Results are correctly distributed across multiple keys."""
        project_root = tmp_path
        replay_dir = tmp_path / "replay"
        replay_dir.mkdir()

        bk_a = BenchmarkKey(
            module_path="benchmarks.test_a",
            function_name="fn_a",
        )
        bk_b = BenchmarkKey(
            module_path="benchmarks.test_b",
            function_name="fn_b",
        )

        prefix_a = "replay.test_benchmarks_test_a__replay_test_"
        prefix_b = "replay.test_benchmarks_test_b__replay_test_"

        results = TestResults()
        inv_a = _make_replay_invocation(
            module=prefix_a + "0",
            func="test_a",
            runtime=100,
        )
        inv_b = _make_replay_invocation(
            module=prefix_b + "0",
            func="test_b",
            runtime=200,
        )
        results.add(inv_a)
        results.add(inv_b)

        grouped = results.group_by_benchmarks(
            [bk_a, bk_b],
            replay_dir,
            project_root,
        )
        assert 1 == len(grouped[bk_a])
        assert inv_a in grouped[bk_a]
        assert 1 == len(grouped[bk_b])
        assert inv_b in grouped[bk_b]