mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Fix pytest 9 compat, addopts conflicts, XML recovery, and diagnostics
- Use getattr for rootdir/rootpath in discovery worker (pytest 9 compat) - Add -o addopts= to all pytest invocations to override project config - Extract _base_pytest_args helper to eliminate duplication across runners - Support [tool.pytest] config section (not just [tool.pytest.ini_options]) - Add --dist, --no-flaky-report, --failed-first to BLACKLIST_ADDOPTS - Add recover=True to XMLParser for malformed JUnit XML tolerance - Log subprocess stdout/stderr on baseline and candidate test failures - Friendly warning when GitHub App not installed instead of raw error - Upgrade repair failure logging from debug to warning
This commit is contained in:
parent
17de71251f
commit
cefd625d35
8 changed files with 61 additions and 24 deletions
|
|
@ -57,7 +57,7 @@ class PytestCollectionPlugin:
|
|||
global pytest_rootdir, collected_tests
|
||||
|
||||
collected_tests.extend(session.items)
|
||||
pytest_rootdir = session.config.rootdir
|
||||
pytest_rootdir = getattr(session.config, "rootdir", None) or getattr(session.config, "rootpath", None)
|
||||
|
||||
# Write results immediately since pytest.main() will exit after
|
||||
# this callback, not always with a success code.
|
||||
|
|
@ -87,6 +87,7 @@ if __name__ == "__main__":
|
|||
tests_root,
|
||||
"-p",
|
||||
"no:logging",
|
||||
"-o", "addopts=",
|
||||
"--collect-only",
|
||||
"-m",
|
||||
"not skip",
|
||||
|
|
|
|||
|
|
@ -149,6 +149,13 @@ def check_create_pr( # noqa: PLR0913
|
|||
pr_id,
|
||||
pr_url,
|
||||
)
|
||||
elif response.status_code == 401 and "not installed" in response.text: # noqa: PLR2004
|
||||
log.warning(
|
||||
"Could not create PR: the Codeflash GitHub App is not installed on %s/%s. "
|
||||
"Install it at https://github.com/apps/codeflash-ai to enable automatic PRs.",
|
||||
owner,
|
||||
repo,
|
||||
)
|
||||
else:
|
||||
log.error(
|
||||
"Failed to create PR: %s",
|
||||
|
|
|
|||
|
|
@ -172,6 +172,14 @@ def run_tests_and_benchmark( # noqa: PLR0913
|
|||
cid,
|
||||
len(diffs),
|
||||
)
|
||||
if not diffs and run_result is not None and run_result.returncode != 0:
|
||||
log.warning(
|
||||
"Candidate %s test subprocess crashed (exit code %s).\nSTDOUT: %.2000s\nSTDERR: %.2000s",
|
||||
cid,
|
||||
run_result.returncode,
|
||||
run_result.stdout,
|
||||
run_result.stderr,
|
||||
)
|
||||
eval_ctx.record_failed(cid)
|
||||
# Store diffs for potential code repair.
|
||||
if diffs:
|
||||
|
|
|
|||
|
|
@ -332,7 +332,7 @@ def repair_failed_candidates(
|
|||
try:
|
||||
result = code_repair(ai_client, request)
|
||||
except Exception: # noqa: BLE001
|
||||
log.debug(
|
||||
log.warning(
|
||||
"Repair failed for candidate %s",
|
||||
cid,
|
||||
exc_info=True,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ BLACKLIST_ADDOPTS: tuple[str, ...] = (
|
|||
"--cov",
|
||||
"--profile",
|
||||
"--junitxml",
|
||||
"--dist",
|
||||
"--no-flaky-report",
|
||||
"--failed-first",
|
||||
"-n",
|
||||
)
|
||||
|
||||
|
|
@ -96,14 +99,14 @@ def modify_addopts(
|
|||
try:
|
||||
if filename == "pyproject.toml":
|
||||
data = tomlkit.parse(content)
|
||||
pytest_section = data.get("tool", {}).get("pytest", {})
|
||||
original_addopts = (
|
||||
data.get("tool", {})
|
||||
.get("pytest", {})
|
||||
.get("ini_options", {})
|
||||
.get("addopts", "")
|
||||
pytest_section.get("ini_options", {}).get("addopts", "")
|
||||
or pytest_section.get("addopts", "")
|
||||
)
|
||||
if original_addopts == "":
|
||||
return content, False
|
||||
uses_ini_options = "ini_options" in pytest_section and "addopts" in pytest_section.get("ini_options", {})
|
||||
if isinstance(original_addopts, list):
|
||||
original_addopts = " ".join(original_addopts)
|
||||
original_addopts = original_addopts.replace("=", " ")
|
||||
|
|
@ -134,9 +137,14 @@ def modify_addopts(
|
|||
if new_addopts_args == addopts_args:
|
||||
return content, False
|
||||
if file_type == ".toml":
|
||||
data["tool"]["pytest"]["ini_options"]["addopts"] = ( # type: ignore[index]
|
||||
" ".join(new_addopts_args)
|
||||
)
|
||||
if uses_ini_options:
|
||||
data["tool"]["pytest"]["ini_options"]["addopts"] = ( # type: ignore[index]
|
||||
" ".join(new_addopts_args)
|
||||
)
|
||||
else:
|
||||
data["tool"]["pytest"]["addopts"] = ( # type: ignore[index]
|
||||
" ".join(new_addopts_args)
|
||||
)
|
||||
with config_file.open("w", encoding="utf-8") as f:
|
||||
f.write(tomlkit.dumps(data))
|
||||
return content, True
|
||||
|
|
|
|||
|
|
@ -22,6 +22,16 @@ _PER_FILE_TIMEOUT = 60
|
|||
_MAX_TIMEOUT = 600
|
||||
|
||||
|
||||
def _base_pytest_args(rootdir: Path | None, cwd: Path) -> list[str]:
|
||||
"""Common pytest args shared across all test runner functions."""
|
||||
return [
|
||||
"--capture=tee-sys",
|
||||
"-q",
|
||||
f"--rootdir={rootdir or cwd}",
|
||||
"-o", "addopts=",
|
||||
]
|
||||
|
||||
|
||||
def _subprocess_timeout(num_test_files: int) -> int:
|
||||
"""Compute subprocess timeout from the number of test files."""
|
||||
return min(_BASE_TIMEOUT + _PER_FILE_TIMEOUT * num_test_files, _MAX_TIMEOUT)
|
||||
|
|
@ -105,9 +115,7 @@ def run_behavioral_tests( # noqa: PLR0913
|
|||
*shlex.split(pytest_cmd),
|
||||
]
|
||||
common_args = [
|
||||
"--capture=tee-sys",
|
||||
"-q",
|
||||
f"--rootdir={rootdir or cwd}",
|
||||
*_base_pytest_args(rootdir, cwd),
|
||||
"--codeflash_loops_scope=session",
|
||||
"--codeflash_min_loops=1",
|
||||
"--codeflash_max_loops=1",
|
||||
|
|
@ -226,9 +234,7 @@ def run_benchmarking_tests( # noqa: PLR0913
|
|||
)
|
||||
|
||||
pytest_args = [
|
||||
"--capture=tee-sys",
|
||||
"-q",
|
||||
f"--rootdir={rootdir or cwd}",
|
||||
*_base_pytest_args(rootdir, cwd),
|
||||
"--codeflash_loops_scope=session",
|
||||
f"--codeflash_min_loops={min_loops}",
|
||||
f"--codeflash_max_loops={max_loops}",
|
||||
|
|
@ -299,9 +305,7 @@ def run_line_profile_tests( # noqa: PLR0913
|
|||
)
|
||||
|
||||
pytest_args = [
|
||||
"--capture=tee-sys",
|
||||
"-q",
|
||||
f"--rootdir={rootdir or cwd}",
|
||||
*_base_pytest_args(rootdir, cwd),
|
||||
"--codeflash_loops_scope=session",
|
||||
"--codeflash_min_loops=1",
|
||||
"--codeflash_max_loops=1",
|
||||
|
|
|
|||
|
|
@ -48,13 +48,13 @@ matches_re_end = re.compile(
|
|||
|
||||
|
||||
def _parse_func(file_path): # type: ignore[no-untyped-def]
|
||||
"""XML parser with huge_tree=True to handle large JUnit XML files."""
|
||||
"""XML parser with huge_tree and recover to handle large/malformed JUnit XML."""
|
||||
from lxml.etree import ( # type: ignore[import-untyped] # noqa: PLC0415
|
||||
XMLParser,
|
||||
parse,
|
||||
)
|
||||
|
||||
xml_parser = XMLParser(huge_tree=True)
|
||||
xml_parser = XMLParser(huge_tree=True, recover=True)
|
||||
return parse(file_path, xml_parser)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -236,10 +236,19 @@ def establish_original_code_baseline( # noqa: PLR0913
|
|||
)
|
||||
|
||||
if not behavioral_results:
|
||||
log.warning(
|
||||
"No behavioral test results for original code. "
|
||||
"Skipping baseline establishment.",
|
||||
)
|
||||
if run_result is not None:
|
||||
log.warning(
|
||||
"No behavioral test results for original code (exit code %s). "
|
||||
"Skipping baseline establishment.\nSTDOUT: %.2000s\nSTDERR: %.2000s",
|
||||
run_result.returncode,
|
||||
run_result.stdout,
|
||||
run_result.stderr,
|
||||
)
|
||||
else:
|
||||
log.warning(
|
||||
"No behavioral test results for original code. "
|
||||
"Skipping baseline establishment.",
|
||||
)
|
||||
return None
|
||||
|
||||
# Steps 2 & 3 run concurrently: line profiling and benchmarking
|
||||
|
|
|
|||
Loading…
Reference in a new issue