codeflash-agent/packages/codeflash-python/src/codeflash_python/testing/_test_runner.py

"""Test subprocess execution and pytest command building."""

from __future__ import annotations

import asyncio
import logging
import shlex
import subprocess
import sys
from pathlib import Path
from typing import TYPE_CHECKING

from ..runtime._codeflash_wrap_decorator import get_run_tmp_file
from ..test_discovery.models import TestType

if TYPE_CHECKING:
    from .models import TestFiles

log = logging.getLogger(__name__)

_BASE_TIMEOUT = 120
_PER_FILE_TIMEOUT = 60
_MAX_TIMEOUT = 600


def _base_pytest_args(rootdir: Path | None, cwd: Path) -> list[str]:
    """Common pytest args shared across all test runner functions."""
    return [
        "--capture=tee-sys",
        "-q",
        f"--rootdir={rootdir or cwd}",
        "-o",
        "addopts=",
    ]


def _subprocess_timeout(num_test_files: int) -> int:
    """Compute subprocess timeout from the number of test files."""
    return min(
        _BASE_TIMEOUT + _PER_FILE_TIMEOUT * num_test_files, _MAX_TIMEOUT
    )


def execute_test_subprocess(
    cmd_list: list[str],
    cwd: Path,
    env: dict[str, str] | None,
    timeout: int = 600,
) -> subprocess.CompletedProcess[str]:
    """Execute a subprocess with the given command list."""
    log.debug(
        "executing test run with command: %s",
        " ".join(cmd_list),
    )
    try:
        return subprocess.run(  # noqa: S603
            cmd_list,
            cwd=cwd,
            env=env,
            timeout=timeout,
            check=False,
            text=True,
            capture_output=True,
        )
    except subprocess.TimeoutExpired as exc:
        log.warning(
            "Test subprocess timed out after %ds: %s",
            timeout,
            " ".join(cmd_list),
        )
        return subprocess.CompletedProcess(
            args=cmd_list,
            returncode=-1,
            stdout=str(exc.stdout) if exc.stdout else "",
            stderr=str(exc.stderr) if exc.stderr else "",
        )


def run_behavioral_tests(  # noqa: PLR0913
    test_files: TestFiles,
    test_env: dict[str, str],
    cwd: Path,
    pytest_cmd: str = "pytest",
    timeout: int | None = None,
    enable_coverage: bool = False,  # noqa: FBT001, FBT002
    rootdir: Path | None = None,
) -> tuple[
    Path,
    subprocess.CompletedProcess[str],
    Path | None,
    Path | None,
]:
    """Run behavioral tests to capture return values."""
    blocklisted_plugins = [
        "benchmark",
        "codspeed",
        "xdist",
        "sugar",
    ]

    test_file_paths: list[str] = []
    for tf in test_files.test_files:
        if tf.test_type == TestType.REPLAY_TEST:
            test_file_paths.extend(
                str(tf.instrumented_behavior_file_path)
                + "::"
                + test.test_function
                for test in tf.tests_in_file
            )
        elif tf.instrumented_behavior_file_path:
            test_file_paths.append(
                str(tf.instrumented_behavior_file_path),
            )
    test_file_paths = list(set(test_file_paths))

    pytest_cmd_list = [
        sys.executable,
        "-m",
        *shlex.split(pytest_cmd),
    ]
    common_args = [
        *_base_pytest_args(rootdir, cwd),
        "--codeflash_loops_scope=session",
        "--codeflash_min_loops=1",
        "--codeflash_max_loops=1",
        "--codeflash_seconds=10.0",
    ]
    if timeout is not None:
        common_args.append(f"--timeout={timeout}")

    result_file_path = get_run_tmp_file(
        Path("pytest_results.xml"),
    )
    result_args = [
        f"--junitxml={result_file_path.as_posix()}",
        "-o",
        "junit_logging=all",
    ]

    pytest_test_env = test_env.copy()
    pytest_test_env["PYTEST_PLUGINS"] = (
        "codeflash_python.testing._pytest_plugin"
    )

    coverage_database_file: Path | None = None
    coverage_config_file: Path | None = None

    blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
    subprocess_timeout = _subprocess_timeout(len(test_file_paths))

    if enable_coverage:
        from ..analysis._coverage import (  # noqa: PLC0415
            prepare_coverage_files,
        )
        from ..verification._baseline import (  # noqa: PLC0415
            jit_disabled_env,
        )

        coverage_database_file, coverage_config_file = prepare_coverage_files()
        pytest_test_env.update(jit_disabled_env())

        coverage_cmd = [
            sys.executable,
            "-m",
            "coverage",
            "run",
            f"--rcfile={coverage_config_file.as_posix()}",
            "-m",
            *shlex.split(pytest_cmd),
        ]
        # Don't block the cov plugin when running under coverage.
        cov_blocklist = [
            f"-p no:{p}" for p in blocklisted_plugins if p != "cov"
        ]
        results = execute_test_subprocess(
            coverage_cmd
            + common_args
            + cov_blocklist
            + result_args
            + test_file_paths,
            cwd=cwd,
            env=pytest_test_env,
            timeout=subprocess_timeout,
        )
    else:
        results = execute_test_subprocess(
            pytest_cmd_list
            + common_args
            + blocklist_args
            + result_args
            + test_file_paths,
            cwd=cwd,
            env=pytest_test_env,
            timeout=subprocess_timeout,
        )

    return (
        result_file_path,
        results,
        coverage_database_file,
        coverage_config_file,
    )


def run_benchmarking_tests(  # noqa: PLR0913
    test_files: TestFiles,
    test_env: dict[str, str],
    cwd: Path,
    pytest_cmd: str = "pytest",
    timeout: int | None = None,
    min_loops: int = 5,
    max_loops: int = 100_000,
    target_duration_seconds: float = 10.0,
    result_file_name: str = "pytest_results.xml",
    rootdir: Path | None = None,
) -> tuple[Path, subprocess.CompletedProcess[str]]:
    """Run benchmarking tests to measure performance."""
    blocklisted_plugins = [
        "codspeed",
        "cov",
        "benchmark",
        "profiling",
        "xdist",
        "sugar",
    ]

    pytest_cmd_list = [
        sys.executable,
        "-m",
        *shlex.split(pytest_cmd),
    ]
    test_file_paths = list(
        {
            str(tf.benchmarking_file_path)
            for tf in test_files.test_files
            if tf.benchmarking_file_path
        }
    )

    pytest_args = [
        *_base_pytest_args(rootdir, cwd),
        "--codeflash_loops_scope=session",
        f"--codeflash_min_loops={min_loops}",
        f"--codeflash_max_loops={max_loops}",
        f"--codeflash_seconds={target_duration_seconds}",
        "--codeflash_stability_check=true",
    ]
    if timeout is not None:
        pytest_args.append(f"--timeout={timeout}")

    result_file_path = get_run_tmp_file(
        Path(result_file_name),
    )
    result_args = [
        f"--junitxml={result_file_path.as_posix()}",
        "-o",
        "junit_logging=all",
    ]

    pytest_test_env = test_env.copy()
    pytest_test_env["PYTEST_PLUGINS"] = (
        "codeflash_python.testing._pytest_plugin"
    )
    blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]

    results = execute_test_subprocess(
        pytest_cmd_list
        + pytest_args
        + blocklist_args
        + result_args
        + test_file_paths,
        cwd=cwd,
        env=pytest_test_env,
        timeout=_subprocess_timeout(len(test_file_paths)),
    )
    return result_file_path, results


def run_line_profile_tests(  # noqa: PLR0913
    test_files: TestFiles,
    test_env: dict[str, str],
    cwd: Path,
    pytest_cmd: str = "pytest",
    timeout: int | None = None,
    result_file_name: str = "pytest_results.xml",
    rootdir: Path | None = None,
) -> tuple[Path, subprocess.CompletedProcess[str]]:
    """Run tests with line profiling enabled."""
    blocklisted_plugins = [
        "codspeed",
        "cov",
        "benchmark",
        "profiling",
        "xdist",
        "sugar",
    ]

    pytest_cmd_list = [
        sys.executable,
        "-m",
        *shlex.split(pytest_cmd),
    ]
    test_file_paths = list(
        {
            str(tf.benchmarking_file_path)
            for tf in test_files.test_files
            if tf.benchmarking_file_path
        }
    )

    pytest_args = [
        *_base_pytest_args(rootdir, cwd),
        "--codeflash_loops_scope=session",
        "--codeflash_min_loops=1",
        "--codeflash_max_loops=1",
        "--codeflash_seconds=10.0",
    ]
    if timeout is not None:
        pytest_args.append(f"--timeout={timeout}")

    result_file_path = get_run_tmp_file(
        Path(result_file_name),
    )
    result_args = [
        f"--junitxml={result_file_path.as_posix()}",
        "-o",
        "junit_logging=all",
    ]

    lp_test_env = test_env.copy()
    lp_test_env["PYTEST_PLUGINS"] = "codeflash_python.testing._pytest_plugin"
    lp_test_env["LINE_PROFILE"] = "1"
    blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]

    results = execute_test_subprocess(
        pytest_cmd_list
        + pytest_args
        + blocklist_args
        + result_args
        + test_file_paths,
        cwd=cwd,
        env=lp_test_env,
        timeout=_subprocess_timeout(len(test_file_paths)),
    )
    return result_file_path, results


# -- Async variants for concurrent candidate evaluation --------


async def async_execute_test_subprocess(
    cmd_list: list[str],
    cwd: Path,
    env: dict[str, str] | None,
    timeout: int = 600,
) -> subprocess.CompletedProcess[str]:
    """Execute a subprocess asynchronously."""
    log.debug(
        "executing async test run with command: %s",
        " ".join(cmd_list),
    )
    try:
        proc = await asyncio.create_subprocess_exec(
            *cmd_list,
            cwd=cwd,
            env=env,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        stdout_bytes, stderr_bytes = await asyncio.wait_for(
            proc.communicate(),
            timeout=timeout,
        )
        return subprocess.CompletedProcess(
            args=cmd_list,
            returncode=proc.returncode or 0,
            stdout=stdout_bytes.decode() if stdout_bytes else "",
            stderr=stderr_bytes.decode() if stderr_bytes else "",
        )
    except asyncio.TimeoutError:
        log.warning(
            "Async test subprocess timed out after %ds: %s",
            timeout,
            " ".join(cmd_list),
        )
        proc.kill()
        return subprocess.CompletedProcess(
            args=cmd_list,
            returncode=-1,
            stdout="",
            stderr="",
        )


async def async_run_behavioral_tests(  # noqa: PLR0913
    test_files: TestFiles,
    test_env: dict[str, str],
    cwd: Path,
    pytest_cmd: str = "pytest",
    timeout: int | None = None,
    enable_coverage: bool = False,  # noqa: FBT001, FBT002
    rootdir: Path | None = None,
    result_file_name: str = "pytest_results.xml",
) -> tuple[
    Path,
    subprocess.CompletedProcess[str],
    Path | None,
    Path | None,
]:
    """Async version of :func:`run_behavioral_tests` with coverage support."""
    blocklisted_plugins = [
        "benchmark",
        "codspeed",
        "xdist",
        "sugar",
    ]

    test_file_paths: list[str] = []
    for tf in test_files.test_files:
        if tf.test_type == TestType.REPLAY_TEST:
            test_file_paths.extend(
                str(tf.instrumented_behavior_file_path)
                + "::"
                + test.test_function
                for test in tf.tests_in_file
            )
        elif tf.instrumented_behavior_file_path:
            test_file_paths.append(
                str(tf.instrumented_behavior_file_path),
            )
    test_file_paths = list(set(test_file_paths))

    pytest_cmd_list = [
        sys.executable,
        "-m",
        *shlex.split(pytest_cmd),
    ]
    common_args = [
        *_base_pytest_args(rootdir, cwd),
        "--codeflash_loops_scope=session",
        "--codeflash_min_loops=1",
        "--codeflash_max_loops=1",
        "--codeflash_seconds=10.0",
    ]
    if timeout is not None:
        common_args.append(f"--timeout={timeout}")

    result_file_path = get_run_tmp_file(
        Path(result_file_name),
    )
    result_args = [
        f"--junitxml={result_file_path.as_posix()}",
        "-o",
        "junit_logging=all",
    ]

    pytest_test_env = test_env.copy()
    pytest_test_env["PYTEST_PLUGINS"] = (
        "codeflash_python.testing._pytest_plugin"
    )

    coverage_database_file: Path | None = None
    coverage_config_file: Path | None = None

    blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
    subprocess_timeout = _subprocess_timeout(len(test_file_paths))

    if enable_coverage:
        from ..analysis._coverage import (  # noqa: PLC0415
            prepare_coverage_files,
        )
        from ..verification._baseline import (  # noqa: PLC0415
            jit_disabled_env,
        )

        coverage_database_file, coverage_config_file = prepare_coverage_files()
        pytest_test_env.update(jit_disabled_env())

        coverage_cmd = [
            sys.executable,
            "-m",
            "coverage",
            "run",
            f"--rcfile={coverage_config_file.as_posix()}",
            "-m",
            *shlex.split(pytest_cmd),
        ]
        cov_blocklist = [
            f"-p no:{p}" for p in blocklisted_plugins if p != "cov"
        ]
        results = await async_execute_test_subprocess(
            coverage_cmd
            + common_args
            + cov_blocklist
            + result_args
            + test_file_paths,
            cwd=cwd,
            env=pytest_test_env,
            timeout=subprocess_timeout,
        )
    else:
        results = await async_execute_test_subprocess(
            pytest_cmd_list
            + common_args
            + blocklist_args
            + result_args
            + test_file_paths,
            cwd=cwd,
            env=pytest_test_env,
            timeout=subprocess_timeout,
        )

    return (
        result_file_path,
        results,
        coverage_database_file,
        coverage_config_file,
    )


async def async_run_benchmarking_tests(  # noqa: PLR0913
    test_files: TestFiles,
    test_env: dict[str, str],
    cwd: Path,
    pytest_cmd: str = "pytest",
    timeout: int | None = None,
    min_loops: int = 5,
    max_loops: int = 100_000,
    target_duration_seconds: float = 10.0,
    result_file_name: str = "pytest_results.xml",
    rootdir: Path | None = None,
) -> tuple[Path, subprocess.CompletedProcess[str]]:
    """Async version of :func:`run_benchmarking_tests`."""
    blocklisted_plugins = [
        "codspeed",
        "cov",
        "benchmark",
        "profiling",
        "xdist",
        "sugar",
    ]

    pytest_cmd_list = [
        sys.executable,
        "-m",
        *shlex.split(pytest_cmd),
    ]
    test_file_paths = list(
        {
            str(tf.benchmarking_file_path)
            for tf in test_files.test_files
            if tf.benchmarking_file_path
        }
    )

    pytest_args = [
        *_base_pytest_args(rootdir, cwd),
        "--codeflash_loops_scope=session",
        f"--codeflash_min_loops={min_loops}",
        f"--codeflash_max_loops={max_loops}",
        f"--codeflash_seconds={target_duration_seconds}",
        "--codeflash_stability_check=true",
    ]
    if timeout is not None:
        pytest_args.append(f"--timeout={timeout}")

    result_file_path = get_run_tmp_file(
        Path(result_file_name),
    )
    result_args = [
        f"--junitxml={result_file_path.as_posix()}",
        "-o",
        "junit_logging=all",
    ]

    pytest_test_env = test_env.copy()
    pytest_test_env["PYTEST_PLUGINS"] = (
        "codeflash_python.testing._pytest_plugin"
    )
    blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]

    results = await async_execute_test_subprocess(
        pytest_cmd_list
        + pytest_args
        + blocklist_args
        + result_args
        + test_file_paths,
        cwd=cwd,
        env=pytest_test_env,
        timeout=_subprocess_timeout(len(test_file_paths)),
    )
    return result_file_path, results


async def async_run_line_profile_tests(  # noqa: PLR0913
    test_files: TestFiles,
    test_env: dict[str, str],
    cwd: Path,
    pytest_cmd: str = "pytest",
    timeout: int | None = None,
    result_file_name: str = "pytest_results.xml",
    rootdir: Path | None = None,
) -> tuple[Path, subprocess.CompletedProcess[str]]:
    """Async version of :func:`run_line_profile_tests`."""
    blocklisted_plugins = [
        "codspeed",
        "cov",
        "benchmark",
        "profiling",
        "xdist",
        "sugar",
    ]

    pytest_cmd_list = [
        sys.executable,
        "-m",
        *shlex.split(pytest_cmd),
    ]
    test_file_paths = list(
        {
            str(tf.benchmarking_file_path)
            for tf in test_files.test_files
            if tf.benchmarking_file_path
        }
    )

    pytest_args = [
        *_base_pytest_args(rootdir, cwd),
        "--codeflash_loops_scope=session",
        "--codeflash_min_loops=1",
        "--codeflash_max_loops=1",
        "--codeflash_seconds=10.0",
    ]
    if timeout is not None:
        pytest_args.append(f"--timeout={timeout}")

    result_file_path = get_run_tmp_file(
        Path(result_file_name),
    )
    result_args = [
        f"--junitxml={result_file_path.as_posix()}",
        "-o",
        "junit_logging=all",
    ]

    lp_test_env = test_env.copy()
    lp_test_env["PYTEST_PLUGINS"] = "codeflash_python.testing._pytest_plugin"
    lp_test_env["LINE_PROFILE"] = "1"
    blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]

    results = await async_execute_test_subprocess(
        pytest_cmd_list
        + pytest_args
        + blocklist_args
        + result_args
        + test_file_paths,
        cwd=cwd,
        env=lp_test_env,
        timeout=_subprocess_timeout(len(test_file_paths)),
    )
    return result_file_path, results