Fix pytest 9 compat, addopts conflicts, XML recovery, and diagnostics

- Use getattr for rootdir/rootpath in discovery worker (pytest 9 compat)
- Add -o addopts= to all pytest invocations to override project config
- Extract _base_pytest_args helper to eliminate duplication across runners
- Support [tool.pytest] config section (not just [tool.pytest.ini_options])
- Add --dist, --no-flaky-report, --failed-first to BLACKLIST_ADDOPTS
- Add recover=True to XMLParser for malformed JUnit XML tolerance
- Log subprocess stdout/stderr on baseline and candidate test failures
- Friendly warning when GitHub App not installed instead of raw error
- Upgrade repair failure logging from debug to warning
This commit is contained in:
Kevin Turcios 2026-04-21 20:41:43 -05:00
parent 17de71251f
commit cefd625d35
8 changed files with 61 additions and 24 deletions

View file

@ -57,7 +57,7 @@ class PytestCollectionPlugin:
global pytest_rootdir, collected_tests
collected_tests.extend(session.items)
pytest_rootdir = session.config.rootdir
pytest_rootdir = getattr(session.config, "rootdir", None) or getattr(session.config, "rootpath", None)
# Write results immediately since pytest.main() will exit after
# this callback, not always with a success code.
@ -87,6 +87,7 @@ if __name__ == "__main__":
tests_root,
"-p",
"no:logging",
"-o", "addopts=",
"--collect-only",
"-m",
"not skip",

View file

@ -149,6 +149,13 @@ def check_create_pr( # noqa: PLR0913
pr_id,
pr_url,
)
elif response.status_code == 401 and "not installed" in response.text: # noqa: PLR2004
log.warning(
"Could not create PR: the Codeflash GitHub App is not installed on %s/%s. "
"Install it at https://github.com/apps/codeflash-ai to enable automatic PRs.",
owner,
repo,
)
else:
log.error(
"Failed to create PR: %s",

View file

@ -172,6 +172,14 @@ def run_tests_and_benchmark( # noqa: PLR0913
cid,
len(diffs),
)
if not diffs and run_result is not None and run_result.returncode != 0:
log.warning(
"Candidate %s test subprocess crashed (exit code %s).\nSTDOUT: %.2000s\nSTDERR: %.2000s",
cid,
run_result.returncode,
run_result.stdout,
run_result.stderr,
)
eval_ctx.record_failed(cid)
# Store diffs for potential code repair.
if diffs:

View file

@ -332,7 +332,7 @@ def repair_failed_candidates(
try:
result = code_repair(ai_client, request)
except Exception: # noqa: BLE001
log.debug(
log.warning(
"Repair failed for candidate %s",
cid,
exc_info=True,

View file

@ -22,6 +22,9 @@ BLACKLIST_ADDOPTS: tuple[str, ...] = (
"--cov",
"--profile",
"--junitxml",
"--dist",
"--no-flaky-report",
"--failed-first",
"-n",
)
@ -96,14 +99,14 @@ def modify_addopts(
try:
if filename == "pyproject.toml":
data = tomlkit.parse(content)
pytest_section = data.get("tool", {}).get("pytest", {})
original_addopts = (
data.get("tool", {})
.get("pytest", {})
.get("ini_options", {})
.get("addopts", "")
pytest_section.get("ini_options", {}).get("addopts", "")
or pytest_section.get("addopts", "")
)
if original_addopts == "":
return content, False
uses_ini_options = "ini_options" in pytest_section and "addopts" in pytest_section.get("ini_options", {})
if isinstance(original_addopts, list):
original_addopts = " ".join(original_addopts)
original_addopts = original_addopts.replace("=", " ")
@ -134,9 +137,14 @@ def modify_addopts(
if new_addopts_args == addopts_args:
return content, False
if file_type == ".toml":
if uses_ini_options:
data["tool"]["pytest"]["ini_options"]["addopts"] = ( # type: ignore[index]
" ".join(new_addopts_args)
)
else:
data["tool"]["pytest"]["addopts"] = ( # type: ignore[index]
" ".join(new_addopts_args)
)
with config_file.open("w", encoding="utf-8") as f:
f.write(tomlkit.dumps(data))
return content, True

View file

@ -22,6 +22,16 @@ _PER_FILE_TIMEOUT = 60
_MAX_TIMEOUT = 600
def _base_pytest_args(rootdir: Path | None, cwd: Path) -> list[str]:
"""Common pytest args shared across all test runner functions."""
return [
"--capture=tee-sys",
"-q",
f"--rootdir={rootdir or cwd}",
"-o", "addopts=",
]
def _subprocess_timeout(num_test_files: int) -> int:
"""Compute subprocess timeout from the number of test files."""
return min(_BASE_TIMEOUT + _PER_FILE_TIMEOUT * num_test_files, _MAX_TIMEOUT)
@ -105,9 +115,7 @@ def run_behavioral_tests( # noqa: PLR0913
*shlex.split(pytest_cmd),
]
common_args = [
"--capture=tee-sys",
"-q",
f"--rootdir={rootdir or cwd}",
*_base_pytest_args(rootdir, cwd),
"--codeflash_loops_scope=session",
"--codeflash_min_loops=1",
"--codeflash_max_loops=1",
@ -226,9 +234,7 @@ def run_benchmarking_tests( # noqa: PLR0913
)
pytest_args = [
"--capture=tee-sys",
"-q",
f"--rootdir={rootdir or cwd}",
*_base_pytest_args(rootdir, cwd),
"--codeflash_loops_scope=session",
f"--codeflash_min_loops={min_loops}",
f"--codeflash_max_loops={max_loops}",
@ -299,9 +305,7 @@ def run_line_profile_tests( # noqa: PLR0913
)
pytest_args = [
"--capture=tee-sys",
"-q",
f"--rootdir={rootdir or cwd}",
*_base_pytest_args(rootdir, cwd),
"--codeflash_loops_scope=session",
"--codeflash_min_loops=1",
"--codeflash_max_loops=1",

View file

@ -48,13 +48,13 @@ matches_re_end = re.compile(
def _parse_func(file_path): # type: ignore[no-untyped-def]
"""XML parser with huge_tree=True to handle large JUnit XML files."""
"""XML parser with huge_tree and recover to handle large/malformed JUnit XML."""
from lxml.etree import ( # type: ignore[import-untyped] # noqa: PLC0415
XMLParser,
parse,
)
xml_parser = XMLParser(huge_tree=True)
xml_parser = XMLParser(huge_tree=True, recover=True)
return parse(file_path, xml_parser)

View file

@ -236,6 +236,15 @@ def establish_original_code_baseline( # noqa: PLR0913
)
if not behavioral_results:
if run_result is not None:
log.warning(
"No behavioral test results for original code (exit code %s). "
"Skipping baseline establishment.\nSTDOUT: %.2000s\nSTDERR: %.2000s",
run_result.returncode,
run_result.stdout,
run_result.stderr,
)
else:
log.warning(
"No behavioral test results for original code. "
"Skipping baseline establishment.",