mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
* fix: resolve all ruff lint errors across repo Auto-fixed 31 errors (unused imports, formatting, simplifications). Manually fixed 14 remaining: - EXE001: removed shebangs from non-executable bench scripts - C417: replaced map(lambda) with generator expression - C901/PLR0915: extracted _write_and_instrument_tests from generate_ai_tests - C901/PLR0912: extracted _parse_toml_addopts and _ini_section_name from modify_addopts - RUF001/RUF002: replaced ambiguous Unicode chars (en dash, multiplication sign) - FBT002: made boolean params keyword-only in report functions - E402: moved `import re` to top of file in security reports * fix: resolve pre-existing mypy errors across packages - _testgen.py: annotate `generated` as `str` to avoid no-any-return - _test_runner.py: use str() for TimeoutExpired stdout/stderr (bytes|str), remove unused type: ignore on proc.kill() - _candidate_eval.py: annotate `speedup` as `float` to avoid no-any-return from lazy-loaded performance_gain
661 lines
18 KiB
Python
661 lines
18 KiB
Python
"""Test subprocess execution and pytest command building."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import shlex
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
from ..runtime._codeflash_wrap_decorator import get_run_tmp_file
|
|
from ..test_discovery.models import TestType
|
|
|
|
if TYPE_CHECKING:
|
|
from .models import TestFiles
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_BASE_TIMEOUT = 120
|
|
_PER_FILE_TIMEOUT = 60
|
|
_MAX_TIMEOUT = 600
|
|
|
|
|
|
def _base_pytest_args(rootdir: Path | None, cwd: Path) -> list[str]:
|
|
"""Common pytest args shared across all test runner functions."""
|
|
return [
|
|
"--capture=tee-sys",
|
|
"-q",
|
|
f"--rootdir={rootdir or cwd}",
|
|
"-o",
|
|
"addopts=",
|
|
]
|
|
|
|
|
|
def _subprocess_timeout(num_test_files: int) -> int:
|
|
"""Compute subprocess timeout from the number of test files."""
|
|
return min(
|
|
_BASE_TIMEOUT + _PER_FILE_TIMEOUT * num_test_files, _MAX_TIMEOUT
|
|
)
|
|
|
|
|
|
def execute_test_subprocess(
|
|
cmd_list: list[str],
|
|
cwd: Path,
|
|
env: dict[str, str] | None,
|
|
timeout: int = 600,
|
|
) -> subprocess.CompletedProcess[str]:
|
|
"""Execute a subprocess with the given command list."""
|
|
log.debug(
|
|
"executing test run with command: %s",
|
|
" ".join(cmd_list),
|
|
)
|
|
try:
|
|
return subprocess.run( # noqa: S603
|
|
cmd_list,
|
|
cwd=cwd,
|
|
env=env,
|
|
timeout=timeout,
|
|
check=False,
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
except subprocess.TimeoutExpired as exc:
|
|
log.warning(
|
|
"Test subprocess timed out after %ds: %s",
|
|
timeout,
|
|
" ".join(cmd_list),
|
|
)
|
|
return subprocess.CompletedProcess(
|
|
args=cmd_list,
|
|
returncode=-1,
|
|
stdout=str(exc.stdout) if exc.stdout else "",
|
|
stderr=str(exc.stderr) if exc.stderr else "",
|
|
)
|
|
|
|
|
|
def run_behavioral_tests( # noqa: PLR0913
|
|
test_files: TestFiles,
|
|
test_env: dict[str, str],
|
|
cwd: Path,
|
|
pytest_cmd: str = "pytest",
|
|
timeout: int | None = None,
|
|
enable_coverage: bool = False, # noqa: FBT001, FBT002
|
|
rootdir: Path | None = None,
|
|
) -> tuple[
|
|
Path,
|
|
subprocess.CompletedProcess[str],
|
|
Path | None,
|
|
Path | None,
|
|
]:
|
|
"""Run behavioral tests to capture return values."""
|
|
blocklisted_plugins = [
|
|
"benchmark",
|
|
"codspeed",
|
|
"xdist",
|
|
"sugar",
|
|
]
|
|
|
|
test_file_paths: list[str] = []
|
|
for tf in test_files.test_files:
|
|
if tf.test_type == TestType.REPLAY_TEST:
|
|
test_file_paths.extend(
|
|
str(tf.instrumented_behavior_file_path)
|
|
+ "::"
|
|
+ test.test_function
|
|
for test in tf.tests_in_file
|
|
)
|
|
elif tf.instrumented_behavior_file_path:
|
|
test_file_paths.append(
|
|
str(tf.instrumented_behavior_file_path),
|
|
)
|
|
test_file_paths = list(set(test_file_paths))
|
|
|
|
pytest_cmd_list = [
|
|
sys.executable,
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
common_args = [
|
|
*_base_pytest_args(rootdir, cwd),
|
|
"--codeflash_loops_scope=session",
|
|
"--codeflash_min_loops=1",
|
|
"--codeflash_max_loops=1",
|
|
"--codeflash_seconds=10.0",
|
|
]
|
|
if timeout is not None:
|
|
common_args.append(f"--timeout={timeout}")
|
|
|
|
result_file_path = get_run_tmp_file(
|
|
Path("pytest_results.xml"),
|
|
)
|
|
result_args = [
|
|
f"--junitxml={result_file_path.as_posix()}",
|
|
"-o",
|
|
"junit_logging=all",
|
|
]
|
|
|
|
pytest_test_env = test_env.copy()
|
|
pytest_test_env["PYTEST_PLUGINS"] = (
|
|
"codeflash_python.testing._pytest_plugin"
|
|
)
|
|
|
|
coverage_database_file: Path | None = None
|
|
coverage_config_file: Path | None = None
|
|
|
|
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
|
|
subprocess_timeout = _subprocess_timeout(len(test_file_paths))
|
|
|
|
if enable_coverage:
|
|
from ..analysis._coverage import ( # noqa: PLC0415
|
|
prepare_coverage_files,
|
|
)
|
|
from ..verification._baseline import ( # noqa: PLC0415
|
|
jit_disabled_env,
|
|
)
|
|
|
|
coverage_database_file, coverage_config_file = prepare_coverage_files()
|
|
pytest_test_env.update(jit_disabled_env())
|
|
|
|
coverage_cmd = [
|
|
sys.executable,
|
|
"-m",
|
|
"coverage",
|
|
"run",
|
|
f"--rcfile={coverage_config_file.as_posix()}",
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
# Don't block the cov plugin when running under coverage.
|
|
cov_blocklist = [
|
|
f"-p no:{p}" for p in blocklisted_plugins if p != "cov"
|
|
]
|
|
results = execute_test_subprocess(
|
|
coverage_cmd
|
|
+ common_args
|
|
+ cov_blocklist
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=pytest_test_env,
|
|
timeout=subprocess_timeout,
|
|
)
|
|
else:
|
|
results = execute_test_subprocess(
|
|
pytest_cmd_list
|
|
+ common_args
|
|
+ blocklist_args
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=pytest_test_env,
|
|
timeout=subprocess_timeout,
|
|
)
|
|
|
|
return (
|
|
result_file_path,
|
|
results,
|
|
coverage_database_file,
|
|
coverage_config_file,
|
|
)
|
|
|
|
|
|
def run_benchmarking_tests( # noqa: PLR0913
|
|
test_files: TestFiles,
|
|
test_env: dict[str, str],
|
|
cwd: Path,
|
|
pytest_cmd: str = "pytest",
|
|
timeout: int | None = None,
|
|
min_loops: int = 5,
|
|
max_loops: int = 100_000,
|
|
target_duration_seconds: float = 10.0,
|
|
result_file_name: str = "pytest_results.xml",
|
|
rootdir: Path | None = None,
|
|
) -> tuple[Path, subprocess.CompletedProcess[str]]:
|
|
"""Run benchmarking tests to measure performance."""
|
|
blocklisted_plugins = [
|
|
"codspeed",
|
|
"cov",
|
|
"benchmark",
|
|
"profiling",
|
|
"xdist",
|
|
"sugar",
|
|
]
|
|
|
|
pytest_cmd_list = [
|
|
sys.executable,
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
test_file_paths = list(
|
|
{
|
|
str(tf.benchmarking_file_path)
|
|
for tf in test_files.test_files
|
|
if tf.benchmarking_file_path
|
|
}
|
|
)
|
|
|
|
pytest_args = [
|
|
*_base_pytest_args(rootdir, cwd),
|
|
"--codeflash_loops_scope=session",
|
|
f"--codeflash_min_loops={min_loops}",
|
|
f"--codeflash_max_loops={max_loops}",
|
|
f"--codeflash_seconds={target_duration_seconds}",
|
|
"--codeflash_stability_check=true",
|
|
]
|
|
if timeout is not None:
|
|
pytest_args.append(f"--timeout={timeout}")
|
|
|
|
result_file_path = get_run_tmp_file(
|
|
Path(result_file_name),
|
|
)
|
|
result_args = [
|
|
f"--junitxml={result_file_path.as_posix()}",
|
|
"-o",
|
|
"junit_logging=all",
|
|
]
|
|
|
|
pytest_test_env = test_env.copy()
|
|
pytest_test_env["PYTEST_PLUGINS"] = (
|
|
"codeflash_python.testing._pytest_plugin"
|
|
)
|
|
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
|
|
|
|
results = execute_test_subprocess(
|
|
pytest_cmd_list
|
|
+ pytest_args
|
|
+ blocklist_args
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=pytest_test_env,
|
|
timeout=_subprocess_timeout(len(test_file_paths)),
|
|
)
|
|
return result_file_path, results
|
|
|
|
|
|
def run_line_profile_tests( # noqa: PLR0913
|
|
test_files: TestFiles,
|
|
test_env: dict[str, str],
|
|
cwd: Path,
|
|
pytest_cmd: str = "pytest",
|
|
timeout: int | None = None,
|
|
result_file_name: str = "pytest_results.xml",
|
|
rootdir: Path | None = None,
|
|
) -> tuple[Path, subprocess.CompletedProcess[str]]:
|
|
"""Run tests with line profiling enabled."""
|
|
blocklisted_plugins = [
|
|
"codspeed",
|
|
"cov",
|
|
"benchmark",
|
|
"profiling",
|
|
"xdist",
|
|
"sugar",
|
|
]
|
|
|
|
pytest_cmd_list = [
|
|
sys.executable,
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
test_file_paths = list(
|
|
{
|
|
str(tf.benchmarking_file_path)
|
|
for tf in test_files.test_files
|
|
if tf.benchmarking_file_path
|
|
}
|
|
)
|
|
|
|
pytest_args = [
|
|
*_base_pytest_args(rootdir, cwd),
|
|
"--codeflash_loops_scope=session",
|
|
"--codeflash_min_loops=1",
|
|
"--codeflash_max_loops=1",
|
|
"--codeflash_seconds=10.0",
|
|
]
|
|
if timeout is not None:
|
|
pytest_args.append(f"--timeout={timeout}")
|
|
|
|
result_file_path = get_run_tmp_file(
|
|
Path(result_file_name),
|
|
)
|
|
result_args = [
|
|
f"--junitxml={result_file_path.as_posix()}",
|
|
"-o",
|
|
"junit_logging=all",
|
|
]
|
|
|
|
lp_test_env = test_env.copy()
|
|
lp_test_env["PYTEST_PLUGINS"] = "codeflash_python.testing._pytest_plugin"
|
|
lp_test_env["LINE_PROFILE"] = "1"
|
|
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
|
|
|
|
results = execute_test_subprocess(
|
|
pytest_cmd_list
|
|
+ pytest_args
|
|
+ blocklist_args
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=lp_test_env,
|
|
timeout=_subprocess_timeout(len(test_file_paths)),
|
|
)
|
|
return result_file_path, results
|
|
|
|
|
|
# -- Async variants for concurrent candidate evaluation --------
|
|
|
|
|
|
async def async_execute_test_subprocess(
|
|
cmd_list: list[str],
|
|
cwd: Path,
|
|
env: dict[str, str] | None,
|
|
timeout: int = 600,
|
|
) -> subprocess.CompletedProcess[str]:
|
|
"""Execute a subprocess asynchronously."""
|
|
log.debug(
|
|
"executing async test run with command: %s",
|
|
" ".join(cmd_list),
|
|
)
|
|
try:
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd_list,
|
|
cwd=cwd,
|
|
env=env,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
proc.communicate(),
|
|
timeout=timeout,
|
|
)
|
|
return subprocess.CompletedProcess(
|
|
args=cmd_list,
|
|
returncode=proc.returncode or 0,
|
|
stdout=stdout_bytes.decode() if stdout_bytes else "",
|
|
stderr=stderr_bytes.decode() if stderr_bytes else "",
|
|
)
|
|
except asyncio.TimeoutError:
|
|
log.warning(
|
|
"Async test subprocess timed out after %ds: %s",
|
|
timeout,
|
|
" ".join(cmd_list),
|
|
)
|
|
proc.kill()
|
|
return subprocess.CompletedProcess(
|
|
args=cmd_list,
|
|
returncode=-1,
|
|
stdout="",
|
|
stderr="",
|
|
)
|
|
|
|
|
|
async def async_run_behavioral_tests( # noqa: PLR0913
|
|
test_files: TestFiles,
|
|
test_env: dict[str, str],
|
|
cwd: Path,
|
|
pytest_cmd: str = "pytest",
|
|
timeout: int | None = None,
|
|
enable_coverage: bool = False, # noqa: FBT001, FBT002
|
|
rootdir: Path | None = None,
|
|
result_file_name: str = "pytest_results.xml",
|
|
) -> tuple[
|
|
Path,
|
|
subprocess.CompletedProcess[str],
|
|
Path | None,
|
|
Path | None,
|
|
]:
|
|
"""Async version of :func:`run_behavioral_tests` with coverage support."""
|
|
blocklisted_plugins = [
|
|
"benchmark",
|
|
"codspeed",
|
|
"xdist",
|
|
"sugar",
|
|
]
|
|
|
|
test_file_paths: list[str] = []
|
|
for tf in test_files.test_files:
|
|
if tf.test_type == TestType.REPLAY_TEST:
|
|
test_file_paths.extend(
|
|
str(tf.instrumented_behavior_file_path)
|
|
+ "::"
|
|
+ test.test_function
|
|
for test in tf.tests_in_file
|
|
)
|
|
elif tf.instrumented_behavior_file_path:
|
|
test_file_paths.append(
|
|
str(tf.instrumented_behavior_file_path),
|
|
)
|
|
test_file_paths = list(set(test_file_paths))
|
|
|
|
pytest_cmd_list = [
|
|
sys.executable,
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
common_args = [
|
|
*_base_pytest_args(rootdir, cwd),
|
|
"--codeflash_loops_scope=session",
|
|
"--codeflash_min_loops=1",
|
|
"--codeflash_max_loops=1",
|
|
"--codeflash_seconds=10.0",
|
|
]
|
|
if timeout is not None:
|
|
common_args.append(f"--timeout={timeout}")
|
|
|
|
result_file_path = get_run_tmp_file(
|
|
Path(result_file_name),
|
|
)
|
|
result_args = [
|
|
f"--junitxml={result_file_path.as_posix()}",
|
|
"-o",
|
|
"junit_logging=all",
|
|
]
|
|
|
|
pytest_test_env = test_env.copy()
|
|
pytest_test_env["PYTEST_PLUGINS"] = (
|
|
"codeflash_python.testing._pytest_plugin"
|
|
)
|
|
|
|
coverage_database_file: Path | None = None
|
|
coverage_config_file: Path | None = None
|
|
|
|
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
|
|
subprocess_timeout = _subprocess_timeout(len(test_file_paths))
|
|
|
|
if enable_coverage:
|
|
from ..analysis._coverage import ( # noqa: PLC0415
|
|
prepare_coverage_files,
|
|
)
|
|
from ..verification._baseline import ( # noqa: PLC0415
|
|
jit_disabled_env,
|
|
)
|
|
|
|
coverage_database_file, coverage_config_file = prepare_coverage_files()
|
|
pytest_test_env.update(jit_disabled_env())
|
|
|
|
coverage_cmd = [
|
|
sys.executable,
|
|
"-m",
|
|
"coverage",
|
|
"run",
|
|
f"--rcfile={coverage_config_file.as_posix()}",
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
cov_blocklist = [
|
|
f"-p no:{p}" for p in blocklisted_plugins if p != "cov"
|
|
]
|
|
results = await async_execute_test_subprocess(
|
|
coverage_cmd
|
|
+ common_args
|
|
+ cov_blocklist
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=pytest_test_env,
|
|
timeout=subprocess_timeout,
|
|
)
|
|
else:
|
|
results = await async_execute_test_subprocess(
|
|
pytest_cmd_list
|
|
+ common_args
|
|
+ blocklist_args
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=pytest_test_env,
|
|
timeout=subprocess_timeout,
|
|
)
|
|
|
|
return (
|
|
result_file_path,
|
|
results,
|
|
coverage_database_file,
|
|
coverage_config_file,
|
|
)
|
|
|
|
|
|
async def async_run_benchmarking_tests( # noqa: PLR0913
|
|
test_files: TestFiles,
|
|
test_env: dict[str, str],
|
|
cwd: Path,
|
|
pytest_cmd: str = "pytest",
|
|
timeout: int | None = None,
|
|
min_loops: int = 5,
|
|
max_loops: int = 100_000,
|
|
target_duration_seconds: float = 10.0,
|
|
result_file_name: str = "pytest_results.xml",
|
|
rootdir: Path | None = None,
|
|
) -> tuple[Path, subprocess.CompletedProcess[str]]:
|
|
"""Async version of :func:`run_benchmarking_tests`."""
|
|
blocklisted_plugins = [
|
|
"codspeed",
|
|
"cov",
|
|
"benchmark",
|
|
"profiling",
|
|
"xdist",
|
|
"sugar",
|
|
]
|
|
|
|
pytest_cmd_list = [
|
|
sys.executable,
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
test_file_paths = list(
|
|
{
|
|
str(tf.benchmarking_file_path)
|
|
for tf in test_files.test_files
|
|
if tf.benchmarking_file_path
|
|
}
|
|
)
|
|
|
|
pytest_args = [
|
|
*_base_pytest_args(rootdir, cwd),
|
|
"--codeflash_loops_scope=session",
|
|
f"--codeflash_min_loops={min_loops}",
|
|
f"--codeflash_max_loops={max_loops}",
|
|
f"--codeflash_seconds={target_duration_seconds}",
|
|
"--codeflash_stability_check=true",
|
|
]
|
|
if timeout is not None:
|
|
pytest_args.append(f"--timeout={timeout}")
|
|
|
|
result_file_path = get_run_tmp_file(
|
|
Path(result_file_name),
|
|
)
|
|
result_args = [
|
|
f"--junitxml={result_file_path.as_posix()}",
|
|
"-o",
|
|
"junit_logging=all",
|
|
]
|
|
|
|
pytest_test_env = test_env.copy()
|
|
pytest_test_env["PYTEST_PLUGINS"] = (
|
|
"codeflash_python.testing._pytest_plugin"
|
|
)
|
|
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
|
|
|
|
results = await async_execute_test_subprocess(
|
|
pytest_cmd_list
|
|
+ pytest_args
|
|
+ blocklist_args
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=pytest_test_env,
|
|
timeout=_subprocess_timeout(len(test_file_paths)),
|
|
)
|
|
return result_file_path, results
|
|
|
|
|
|
async def async_run_line_profile_tests( # noqa: PLR0913
|
|
test_files: TestFiles,
|
|
test_env: dict[str, str],
|
|
cwd: Path,
|
|
pytest_cmd: str = "pytest",
|
|
timeout: int | None = None,
|
|
result_file_name: str = "pytest_results.xml",
|
|
rootdir: Path | None = None,
|
|
) -> tuple[Path, subprocess.CompletedProcess[str]]:
|
|
"""Async version of :func:`run_line_profile_tests`."""
|
|
blocklisted_plugins = [
|
|
"codspeed",
|
|
"cov",
|
|
"benchmark",
|
|
"profiling",
|
|
"xdist",
|
|
"sugar",
|
|
]
|
|
|
|
pytest_cmd_list = [
|
|
sys.executable,
|
|
"-m",
|
|
*shlex.split(pytest_cmd),
|
|
]
|
|
test_file_paths = list(
|
|
{
|
|
str(tf.benchmarking_file_path)
|
|
for tf in test_files.test_files
|
|
if tf.benchmarking_file_path
|
|
}
|
|
)
|
|
|
|
pytest_args = [
|
|
*_base_pytest_args(rootdir, cwd),
|
|
"--codeflash_loops_scope=session",
|
|
"--codeflash_min_loops=1",
|
|
"--codeflash_max_loops=1",
|
|
"--codeflash_seconds=10.0",
|
|
]
|
|
if timeout is not None:
|
|
pytest_args.append(f"--timeout={timeout}")
|
|
|
|
result_file_path = get_run_tmp_file(
|
|
Path(result_file_name),
|
|
)
|
|
result_args = [
|
|
f"--junitxml={result_file_path.as_posix()}",
|
|
"-o",
|
|
"junit_logging=all",
|
|
]
|
|
|
|
lp_test_env = test_env.copy()
|
|
lp_test_env["PYTEST_PLUGINS"] = "codeflash_python.testing._pytest_plugin"
|
|
lp_test_env["LINE_PROFILE"] = "1"
|
|
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
|
|
|
|
results = await async_execute_test_subprocess(
|
|
pytest_cmd_list
|
|
+ pytest_args
|
|
+ blocklist_args
|
|
+ result_args
|
|
+ test_file_paths,
|
|
cwd=cwd,
|
|
env=lp_test_env,
|
|
timeout=_subprocess_timeout(len(test_file_paths)),
|
|
)
|
|
return result_file_path, results
|