codeflash/codeflash/verification/parse_test_output.py
2026-02-13 09:48:22 -05:00

1052 lines
47 KiB
Python

from __future__ import annotations
import os
import re
import sqlite3
import subprocess
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
import dill as pickle
from junitparser.xunit2 import JUnitXml
from lxml.etree import XMLParser, parse
from codeflash.cli_cmds.console import DEBUG_MODE, console, logger
from codeflash.code_utils.code_utils import (
file_name_from_test_module_name,
file_path_from_module_name,
get_run_tmp_file,
module_name_from_file_path,
)
from codeflash.discovery.discover_unit_tests import discover_parameters_unittest
from codeflash.languages import is_javascript
# Import Jest-specific parsing from the JavaScript language module
from codeflash.languages.javascript.parse import parse_jest_test_xml as _parse_jest_test_xml
from codeflash.models.models import (
ConcurrencyMetrics,
FunctionTestInvocation,
InvocationId,
TestResults,
TestType,
VerificationType,
)
from codeflash.verification.coverage_utils import CoverageUtils, JestCoverageUtils
if TYPE_CHECKING:
import subprocess
from codeflash.models.models import CodeOptimizationContext, CoverageData, TestFiles
from codeflash.verification.verification_utils import TestConfig
def parse_func(file_path: Path) -> XMLParser:
"""Parse the XML file with lxml.etree.XMLParser as the backend."""
xml_parser = XMLParser(huge_tree=True)
return parse(file_path, xml_parser)
matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n")
matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+):([^:]+)######!")
# Jest timing marker patterns are imported from codeflash.languages.javascript.parse
# and re-exported here for backwards compatibility
def calculate_function_throughput_from_test_results(test_results: TestResults, function_name: str) -> int:
"""Calculate function throughput from TestResults by extracting performance stdout.
A completed execution is defined as having both a start tag and matching end tag from performance wrappers.
Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
End: !######test_module:test_function:function_name:loop_index:iteration_id:duration######!
"""
start_matches = start_pattern.findall(test_results.perf_stdout or "")
end_matches = end_pattern.findall(test_results.perf_stdout or "")
end_matches_truncated = [end_match[:5] for end_match in end_matches]
end_matches_set = set(end_matches_truncated)
function_throughput = 0
for start_match in start_matches:
if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
function_throughput += 1
return function_throughput
# Pattern for concurrency benchmark output:
# !@######CONC:module:class:test:function:loop_index:seq_time:conc_time:factor######@!
_concurrency_pattern = re.compile(r"!@######CONC:([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):(\d+):(\d+):(\d+)######@!")
def parse_concurrency_metrics(test_results: TestResults, function_name: str) -> ConcurrencyMetrics | None:
"""Parse concurrency benchmark results from test output.
Format: !@######CONC:module:class:test:function:loop_index:seq_time:conc_time:factor######@!
Returns ConcurrencyMetrics with:
- sequential_time_ns: Total time for N sequential executions
- concurrent_time_ns: Total time for N concurrent executions
- concurrency_factor: N (number of concurrent executions)
- concurrency_ratio: sequential_time / concurrent_time (higher = better concurrency)
"""
if not test_results.perf_stdout:
return None
matches = _concurrency_pattern.findall(test_results.perf_stdout)
if not matches:
return None
# Aggregate metrics for the target function
total_seq, total_conc, factor, count = 0, 0, 0, 0
for match in matches:
# match[3] is function_name
if len(match) >= 8 and match[3] == function_name:
total_seq += int(match[5])
total_conc += int(match[6])
factor = int(match[7])
count += 1
if count == 0:
return None
avg_seq = total_seq / count
avg_conc = total_conc / count
ratio = avg_seq / avg_conc if avg_conc > 0 else 1.0
return ConcurrencyMetrics(
sequential_time_ns=int(avg_seq),
concurrent_time_ns=int(avg_conc),
concurrency_factor=factor,
concurrency_ratio=ratio,
)
def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> Path | None:
"""Resolve test file path from pytest's test class path.
This function handles various cases where pytest's classname in JUnit XML
includes parent directories that may already be part of base_dir.
Args:
test_class_path: The full class path from pytest (e.g., "project.tests.test_file.TestClass")
or a file path from Jest (e.g., "tests/test_file.test.js")
base_dir: The base directory for tests (tests project root)
Returns:
Path to the test file if found, None otherwise
Examples:
>>> # base_dir = "/path/to/tests"
>>> # test_class_path = "code_to_optimize.tests.unittest.test_file.TestClass"
>>> # Should find: /path/to/tests/unittest/test_file.py
"""
# Handle file paths (contain slashes and extensions like .js/.ts)
if "/" in test_class_path or "\\" in test_class_path:
# This is a file path, not a Python module path
# Try the path as-is if it's absolute
potential_path = Path(test_class_path)
if potential_path.is_absolute() and potential_path.exists():
return potential_path
# Try to resolve relative to base_dir's parent (project root)
project_root = base_dir.parent
potential_path = project_root / test_class_path
# Normalize to resolve .. and . components
try:
potential_path = potential_path.resolve()
if potential_path.exists():
return potential_path
except (OSError, RuntimeError):
pass
# Also try relative to base_dir itself
potential_path = base_dir / test_class_path
try:
potential_path = potential_path.resolve()
if potential_path.exists():
return potential_path
except (OSError, RuntimeError):
pass
return None
# First try the full path (Python module path)
test_file_path = file_name_from_test_module_name(test_class_path, base_dir)
# If we couldn't find the file, try stripping the last component (likely a class name)
# This handles cases like "module.TestClass" where TestClass is a class, not a module
if test_file_path is None and "." in test_class_path:
module_without_class = ".".join(test_class_path.split(".")[:-1])
test_file_path = file_name_from_test_module_name(module_without_class, base_dir)
# If still not found, progressively strip prefix components
# This handles cases where pytest's classname includes parent directories that are
# already part of base_dir (e.g., "project.tests.unittest.test_file.TestClass"
# when base_dir is "/.../tests")
if test_file_path is None:
parts = test_class_path.split(".")
# Try stripping 1, 2, 3, ... prefix components
for num_to_strip in range(1, len(parts)):
remaining = ".".join(parts[num_to_strip:])
test_file_path = file_name_from_test_module_name(remaining, base_dir)
if test_file_path:
break
# Also try without the last component (class name)
if "." in remaining:
remaining_no_class = ".".join(remaining.split(".")[:-1])
test_file_path = file_name_from_test_module_name(remaining_no_class, base_dir)
if test_file_path:
break
return test_file_path
def parse_jest_json_results(
file_location: Path, test_files: TestFiles, test_config: TestConfig, function_name: str | None = None
) -> TestResults:
"""Parse Jest test results from JSON format written by codeflash-jest-helper.
Args:
file_location: Path to the JSON results file.
test_files: TestFiles object containing test file information.
test_config: Test configuration.
function_name: Name of the function being tested.
Returns:
TestResults containing parsed test invocations.
"""
import json
test_results = TestResults()
if not file_location.exists():
logger.debug(f"No Jest JSON results at {file_location}")
return test_results
try:
with file_location.open("r") as f:
data = json.load(f)
results = data.get("results", [])
for result in results:
test_name = result.get("testName", "") or result.get("testFunctionName", "")
func_name = result.get("funcName", "")
duration_ns = result.get("durationNs", 0)
loop_index = result.get("loopIndex", 1)
invocation_id = result.get("invocationId", 0)
error = result.get("error")
result_module_path = result.get("testModulePath", "")
# Try to find the test file from test_files by matching testModulePath
test_file_path = None
test_type = TestType.GENERATED_REGRESSION # Default for Jest generated tests
# If we have testModulePath from the result, use it to find the matching test file
if result_module_path:
# Convert module path to file path (e.g., "tests.test_foo.test" -> "tests/test_foo.test.js")
expected_path = result_module_path.replace(".", "/")
if not expected_path.endswith(".js"):
expected_path += ".js"
for test_file in test_files.test_files:
# Check behavior path
if test_file.instrumented_behavior_file_path:
try:
rel_path = str(
test_file.instrumented_behavior_file_path.relative_to(test_config.tests_project_rootdir)
)
except ValueError:
rel_path = test_file.instrumented_behavior_file_path.name
if (
rel_path == expected_path
or rel_path.replace("/", ".").replace(".js", "") == result_module_path
):
test_file_path = test_file.instrumented_behavior_file_path
test_type = test_file.test_type
break
# Check benchmarking path
if test_file.benchmarking_file_path:
try:
rel_path = str(
test_file.benchmarking_file_path.relative_to(test_config.tests_project_rootdir)
)
except ValueError:
rel_path = test_file.benchmarking_file_path.name
if (
rel_path == expected_path
or rel_path.replace("/", ".").replace(".js", "") == result_module_path
):
test_file_path = test_file.benchmarking_file_path
test_type = test_file.test_type
break
# Fallback: find the first test file that exists (legacy behavior)
if test_file_path is None:
for test_file in test_files.test_files:
if test_file.benchmarking_file_path and test_file.benchmarking_file_path.exists():
test_file_path = test_file.benchmarking_file_path
test_type = test_file.test_type
break
if test_file.instrumented_behavior_file_path and test_file.instrumented_behavior_file_path.exists():
test_file_path = test_file.instrumented_behavior_file_path
test_type = test_file.test_type
break
if test_file_path is None:
logger.debug(f"Could not find test file for Jest result: {test_name} (module: {result_module_path})")
continue
# Create invocation ID - use funcName from result or passed function_name
function_getting_tested = func_name or function_name or "unknown"
# For Jest tests, keep the relative file path with extension intact
# (Python uses module_name_from_file_path which strips extensions)
try:
test_module_path = str(test_file_path.relative_to(test_config.tests_project_rootdir))
except ValueError:
test_module_path = test_file_path.name
invocation_id_obj = InvocationId(
test_module_path=test_module_path,
test_class_name=None,
test_function_name=test_name or func_name,
function_getting_tested=function_getting_tested,
iteration_id=str(invocation_id),
)
test_results.add(
function_test_invocation=FunctionTestInvocation(
loop_index=loop_index,
id=invocation_id_obj,
file_name=test_file_path,
did_pass=error is None,
runtime=duration_ns,
test_framework=test_config.test_framework,
test_type=test_type,
return_value=result.get("returnValue"),
timed_out=False,
verification_type=VerificationType.FUNCTION_CALL,
)
)
except Exception as e:
logger.warning(f"Failed to parse Jest JSON results from {file_location}: {e}")
return test_results
def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
test_results = TestResults()
if not file_location.exists():
logger.debug(f"No test results for {file_location} found.")
console.rule()
return test_results
with file_location.open("rb") as file:
try:
while file:
len_next_bytes = file.read(4)
if not len_next_bytes:
return test_results
len_next = int.from_bytes(len_next_bytes, byteorder="big")
encoded_test_bytes = file.read(len_next)
encoded_test_name = encoded_test_bytes.decode("ascii")
duration_bytes = file.read(8)
duration = int.from_bytes(duration_bytes, byteorder="big")
len_next_bytes = file.read(4)
len_next = int.from_bytes(len_next_bytes, byteorder="big")
test_pickle_bin = file.read(len_next)
loop_index_bytes = file.read(8)
loop_index = int.from_bytes(loop_index_bytes, byteorder="big")
len_next_bytes = file.read(4)
len_next = int.from_bytes(len_next_bytes, byteorder="big")
invocation_id_bytes = file.read(len_next)
invocation_id = invocation_id_bytes.decode("ascii")
invocation_id_object = InvocationId.from_str_id(encoded_test_name, invocation_id)
test_file_path = file_path_from_module_name(
invocation_id_object.test_module_path, test_config.tests_project_rootdir
)
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
try:
test_pickle = pickle.loads(test_pickle_bin) if loop_index == 1 else None
except Exception as e:
if DEBUG_MODE:
logger.exception(f"Failed to load pickle file for {encoded_test_name} Exception: {e}")
continue
assert test_type is not None, f"Test type not found for {test_file_path}"
test_results.add(
function_test_invocation=FunctionTestInvocation(
loop_index=loop_index,
id=invocation_id_object,
file_name=test_file_path,
did_pass=True,
runtime=duration,
test_framework=test_config.test_framework,
test_type=test_type,
return_value=test_pickle,
timed_out=False,
verification_type=VerificationType.FUNCTION_CALL,
)
)
except Exception as e:
logger.warning(f"Failed to parse test results from {file_location}. Exception: {e}")
return test_results
return test_results
def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
test_results = TestResults()
if not sqlite_file_path.exists():
logger.warning(f"No test results for {sqlite_file_path} found.")
console.rule()
return test_results
db = None
try:
db = sqlite3.connect(sqlite_file_path)
cur = db.cursor()
data = cur.execute(
"SELECT test_module_path, test_class_name, test_function_name, "
"function_getting_tested, loop_index, iteration_id, runtime, return_value,verification_type FROM test_results"
).fetchall()
except Exception as e:
logger.warning(f"Failed to parse test results from {sqlite_file_path}. Exception: {e}")
if db is not None:
db.close()
return test_results
finally:
db.close()
# Check if this is a JavaScript test (use JSON) or Python test (use pickle)
is_jest = is_javascript()
for val in data:
try:
test_module_path = val[0]
test_class_name = val[1] if val[1] else None
test_function_name = val[2] if val[2] else None
function_getting_tested = val[3]
# For Jest tests, test_module_path could be:
# - A module-style path: "tests.fibonacci.test.ts" (dots as separators)
# - A file path: "tests/fibonacci.test.ts" (slashes as separators)
# For Python, it's a module path (e.g., "tests.test_foo") that needs conversion
if is_jest:
# Jest test file extensions (including .test.ts, .spec.ts patterns)
jest_test_extensions = (
".test.ts",
".test.js",
".test.tsx",
".test.jsx",
".spec.ts",
".spec.js",
".spec.tsx",
".spec.jsx",
".ts",
".js",
".tsx",
".jsx",
".mjs",
".mts",
)
# Check if it's a module-style path (no slashes, has dots beyond extension)
if "/" not in test_module_path and "\\" not in test_module_path:
# Find the appropriate extension to preserve
extension = ""
for ext in jest_test_extensions:
if test_module_path.endswith(ext):
extension = ext
break
if extension:
# Convert module-style path to file path
# "tests.fibonacci__perfinstrumented.test.ts" -> "tests/fibonacci__perfinstrumented.test.ts"
base_path = test_module_path[: -len(extension)]
file_path = base_path.replace(".", os.sep) + extension
# Check if the module path includes the tests directory name
tests_dir_name = test_config.tests_project_rootdir.name
if file_path.startswith((tests_dir_name + os.sep, tests_dir_name + "/")):
# Module path includes "tests." - use project root parent
test_file_path = test_config.tests_project_rootdir.parent / file_path
else:
# Module path doesn't include tests dir - use tests root directly
test_file_path = test_config.tests_project_rootdir / file_path
else:
# No recognized extension, treat as-is
test_file_path = test_config.tests_project_rootdir / test_module_path
else:
# Already a file path
test_file_path = test_config.tests_project_rootdir / test_module_path
else:
# Python: convert module path to file path
test_file_path = file_path_from_module_name(test_module_path, test_config.tests_project_rootdir)
loop_index = val[4]
iteration_id = val[5]
runtime = val[6]
verification_type = val[8]
if verification_type in {VerificationType.INIT_STATE_FTO, VerificationType.INIT_STATE_HELPER}:
test_type = TestType.INIT_STATE_TEST
else:
# Try original_file_path first (for existing tests that were instrumented)
test_type = test_files.get_test_type_by_original_file_path(test_file_path)
logger.debug(f"[PARSE-DEBUG] test_module={test_module_path}, test_file_path={test_file_path}")
logger.debug(f"[PARSE-DEBUG] by_original_file_path: {test_type}")
# If not found, try instrumented_behavior_file_path (for generated tests)
if test_type is None:
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
logger.debug(f"[PARSE-DEBUG] by_instrumented_file_path: {test_type}")
# Default to GENERATED_REGRESSION for Jest tests when test type can't be determined
if test_type is None and is_jest:
test_type = TestType.GENERATED_REGRESSION
logger.debug("[PARSE-DEBUG] defaulting to GENERATED_REGRESSION (Jest)")
elif test_type is None:
# Skip results where test type cannot be determined
logger.debug(f"Skipping result for {test_function_name}: could not determine test type")
continue
logger.debug(f"[PARSE-DEBUG] FINAL test_type={test_type}")
# Deserialize return value
# For Jest: Skip deserialization - comparison happens via language-specific comparator
# For Python: Use pickle to deserialize
ret_val = None
if loop_index == 1 and val[7]:
try:
if is_jest:
# Jest comparison happens via Node.js script (language_support.compare_test_results)
# Store a marker indicating data exists but is not deserialized in Python
ret_val = ("__serialized__", val[7])
else:
# Python uses pickle serialization
ret_val = (pickle.loads(val[7]),)
except Exception as e:
# If deserialization fails, skip this result
logger.debug(f"Failed to deserialize return value for {test_function_name}: {e}")
continue
test_results.add(
function_test_invocation=FunctionTestInvocation(
loop_index=loop_index,
id=InvocationId(
test_module_path=test_module_path,
test_class_name=test_class_name,
test_function_name=test_function_name,
function_getting_tested=function_getting_tested,
iteration_id=iteration_id,
),
file_name=test_file_path,
did_pass=True,
runtime=runtime,
test_framework=test_config.test_framework,
test_type=test_type,
return_value=ret_val,
timed_out=False,
verification_type=VerificationType(verification_type) if verification_type else None,
)
)
except Exception:
logger.exception(f"Failed to parse sqlite test results for {sqlite_file_path}")
# Hardcoding the test result to True because the test did execute and we are only interested in the return values,
# the did_pass comes from the xml results file
return test_results
def parse_test_xml(
test_xml_file_path: Path,
test_files: TestFiles,
test_config: TestConfig,
run_result: subprocess.CompletedProcess | None = None,
) -> TestResults:
# Route to Jest-specific parser for JavaScript/TypeScript tests
if is_javascript():
return _parse_jest_test_xml(
test_xml_file_path,
test_files,
test_config,
run_result,
parse_func=parse_func,
resolve_test_file_from_class_path=resolve_test_file_from_class_path,
)
test_results = TestResults()
# Parse unittest output
if not test_xml_file_path.exists():
logger.warning(f"No test results for {test_xml_file_path} found.")
console.rule()
return test_results
try:
xml = JUnitXml.fromfile(str(test_xml_file_path), parse_func=parse_func)
except Exception as e:
logger.warning(f"Failed to parse {test_xml_file_path} as JUnitXml. Exception: {e}")
return test_results
# Always use tests_project_rootdir since pytest is now the test runner for all frameworks
base_dir = test_config.tests_project_rootdir
for suite in xml:
for testcase in suite:
class_name = testcase.classname
test_file_name = suite._elem.attrib.get("file") # noqa: SLF001
if (
test_file_name == f"unittest{os.sep}loader.py"
and class_name == "unittest.loader._FailedTest"
and suite.errors == 1
and suite.tests == 1
):
# This means that the test failed to load, so we don't want to crash on it
logger.info("Test failed to load, skipping it.")
if run_result is not None:
if isinstance(run_result.stdout, str) and isinstance(run_result.stderr, str):
logger.info(f"Test log - STDOUT : {run_result.stdout} \n STDERR : {run_result.stderr}")
else:
logger.info(
f"Test log - STDOUT : {run_result.stdout.decode()} \n STDERR : {run_result.stderr.decode()}"
)
return test_results
test_class_path = testcase.classname
try:
if testcase.name is None:
logger.debug(
f"testcase.name is None for testcase {testcase!r} in file {test_xml_file_path}, skipping"
)
continue
test_function = testcase.name.split("[", 1)[0] if "[" in testcase.name else testcase.name
except (AttributeError, TypeError) as e:
msg = (
f"Accessing testcase.name in parse_test_xml for testcase {testcase!r} in file"
f" {test_xml_file_path} has exception: {e}"
)
logger.exception(msg)
continue
if test_file_name is None:
if test_class_path:
# TODO : This might not be true if the test is organized under a class
test_file_path = resolve_test_file_from_class_path(test_class_path, base_dir)
if test_file_path is None:
logger.warning(f"Could not find the test for file name - {test_class_path} ")
continue
else:
test_file_path = file_path_from_module_name(test_function, base_dir)
else:
test_file_path = base_dir / test_file_name
assert test_file_path, f"Test file path not found for {test_file_name}"
if not test_file_path.exists():
logger.warning(f"Could not find the test for file name - {test_file_path} ")
continue
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
if test_type is None:
# Log registered paths for debugging
registered_paths = [str(tf.instrumented_behavior_file_path) for tf in test_files.test_files]
logger.warning(
f"Test type not found for '{test_file_path}'. "
f"Registered test files: {registered_paths}. Skipping test case."
)
continue
test_module_path = module_name_from_file_path(test_file_path, test_config.tests_project_rootdir)
result = testcase.is_passed # TODO: See for the cases of ERROR and SKIPPED
test_class = None
if class_name is not None and class_name.startswith(test_module_path):
test_class = class_name[len(test_module_path) + 1 :] # +1 for the dot, gets Unittest class name
loop_index = int(testcase.name.split("[ ")[-1][:-2]) if testcase.name and "[" in testcase.name else 1
timed_out = False
if len(testcase.result) > 1:
logger.debug(f"!!!!!Multiple results for {testcase.name or '<None>'} in {test_xml_file_path}!!!")
if len(testcase.result) == 1:
message = testcase.result[0].message.lower()
if "failed: timeout >" in message or "timed out" in message:
timed_out = True
sys_stdout = testcase.system_out or ""
begin_matches = list(matches_re_start.finditer(sys_stdout))
end_matches = {}
for match in matches_re_end.finditer(sys_stdout):
groups = match.groups()
if len(groups[5].split(":")) > 1:
iteration_id = groups[5].split(":")[0]
groups = (*groups[:5], iteration_id)
end_matches[groups] = match
if not begin_matches or not begin_matches:
test_results.add(
FunctionTestInvocation(
loop_index=loop_index,
id=InvocationId(
test_module_path=test_module_path,
test_class_name=test_class,
test_function_name=test_function,
function_getting_tested="", # TODO: Fix this
iteration_id="",
),
file_name=test_file_path,
runtime=None,
test_framework=test_config.test_framework,
did_pass=result,
test_type=test_type,
return_value=None,
timed_out=timed_out,
stdout="",
)
)
else:
for match_index, match in enumerate(begin_matches):
groups = match.groups()
end_match = end_matches.get(groups)
iteration_id, runtime = groups[5], None
if end_match:
stdout = sys_stdout[match.end() : end_match.start()]
split_val = end_match.groups()[5].split(":")
if len(split_val) > 1:
iteration_id = split_val[0]
runtime = int(split_val[1])
else:
iteration_id, runtime = split_val[0], None
elif match_index == len(begin_matches) - 1:
stdout = sys_stdout[match.end() :]
else:
stdout = sys_stdout[match.end() : begin_matches[match_index + 1].start()]
test_results.add(
FunctionTestInvocation(
loop_index=int(groups[4]),
id=InvocationId(
test_module_path=groups[0],
test_class_name=None if groups[1] == "" else groups[1][:-1],
test_function_name=groups[2],
function_getting_tested=groups[3],
iteration_id=iteration_id,
),
file_name=test_file_path,
runtime=runtime,
test_framework=test_config.test_framework,
did_pass=result,
test_type=test_type,
return_value=None,
timed_out=timed_out,
stdout=stdout,
)
)
if not test_results:
logger.info(
f"Tests '{[test_file.original_file_path for test_file in test_files.test_files]}' failed to run, skipping"
)
if run_result is not None:
stdout, stderr = "", ""
try:
stdout = run_result.stdout.decode()
stderr = run_result.stderr.decode()
except AttributeError:
stdout = run_result.stderr
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
return test_results
def merge_test_results(
xml_test_results: TestResults, bin_test_results: TestResults, test_framework: str
) -> TestResults:
merged_test_results = TestResults()
grouped_xml_results: defaultdict[str, TestResults] = defaultdict(TestResults)
grouped_bin_results: defaultdict[str, TestResults] = defaultdict(TestResults)
# This is done to match the right iteration_id which might not be available in the xml
for result in xml_test_results:
if test_framework == "pytest":
if result.id.test_function_name.endswith("]") and "[" in result.id.test_function_name: # parameterized test
test_function_name = result.id.test_function_name[: result.id.test_function_name.index("[")]
else:
test_function_name = result.id.test_function_name
elif test_framework == "unittest":
test_function_name = result.id.test_function_name
is_parameterized, new_test_function_name, _ = discover_parameters_unittest(test_function_name)
if is_parameterized: # handle parameterized test
test_function_name = new_test_function_name
else:
# Jest and other frameworks - use test function name as-is
test_function_name = result.id.test_function_name
grouped_xml_results[
(result.id.test_module_path or "")
+ ":"
+ (result.id.test_class_name or "")
+ ":"
+ (test_function_name or "")
+ ":"
+ str(result.loop_index)
].add(result)
for result in bin_test_results:
grouped_bin_results[
(result.id.test_module_path or "")
+ ":"
+ (result.id.test_class_name or "")
+ ":"
+ (result.id.test_function_name or "")
+ ":"
+ str(result.loop_index)
].add(result)
for result_id in grouped_xml_results:
xml_results = grouped_xml_results[result_id]
bin_results = grouped_bin_results.get(result_id)
if not bin_results:
merged_test_results.merge(xml_results)
continue
if len(xml_results) == 1:
xml_result = xml_results[0]
# This means that we only have one FunctionTestInvocation for this test xml. Match them to the bin results
# Either a whole test function fails or passes.
for result_bin in bin_results:
# Prefer XML runtime (from stdout markers) if bin runtime is None/0
# This is important for Jest perf tests which output timing to stdout, not SQLite
merged_runtime = result_bin.runtime if result_bin.runtime else xml_result.runtime
merged_test_results.add(
FunctionTestInvocation(
loop_index=xml_result.loop_index,
id=result_bin.id,
file_name=xml_result.file_name,
runtime=merged_runtime,
test_framework=xml_result.test_framework,
did_pass=xml_result.did_pass,
test_type=xml_result.test_type,
return_value=result_bin.return_value,
timed_out=xml_result.timed_out,
verification_type=VerificationType(result_bin.verification_type)
if result_bin.verification_type
else None,
stdout=xml_result.stdout,
)
)
elif xml_results.test_results[0].id.iteration_id is not None:
# This means that we have multiple iterations of the same test function
# We need to match the iteration_id to the bin results
for xml_result in xml_results.test_results:
try:
bin_result = bin_results.get_by_unique_invocation_loop_id(xml_result.unique_invocation_loop_id)
except AttributeError:
bin_result = None
if bin_result is None:
merged_test_results.add(xml_result)
continue
# Prefer XML runtime (from stdout markers) if bin runtime is None/0
# This is important for Jest perf tests which output timing to stdout, not SQLite
merged_runtime = bin_result.runtime if bin_result.runtime else xml_result.runtime
merged_test_results.add(
FunctionTestInvocation(
loop_index=xml_result.loop_index,
id=xml_result.id,
file_name=xml_result.file_name,
runtime=merged_runtime,
test_framework=xml_result.test_framework,
did_pass=bin_result.did_pass,
test_type=xml_result.test_type,
return_value=bin_result.return_value,
timed_out=xml_result.timed_out
if merged_runtime is None
else False, # If runtime was measured, then the testcase did not time out
verification_type=VerificationType(bin_result.verification_type)
if bin_result.verification_type
else None,
stdout=xml_result.stdout,
)
)
else:
# Should happen only if the xml did not have any test invocation id info
for i, bin_result in enumerate(bin_results.test_results):
try:
xml_result = xml_results.test_results[i]
except IndexError:
xml_result = None
if xml_result is None:
merged_test_results.add(bin_result)
continue
# Prefer XML runtime (from stdout markers) if bin runtime is None/0
# This is important for Jest perf tests which output timing to stdout, not SQLite
merged_runtime = bin_result.runtime if bin_result.runtime else xml_result.runtime
merged_test_results.add(
FunctionTestInvocation(
loop_index=bin_result.loop_index,
id=bin_result.id,
file_name=bin_result.file_name,
runtime=merged_runtime,
test_framework=bin_result.test_framework,
did_pass=bin_result.did_pass,
test_type=bin_result.test_type,
return_value=bin_result.return_value,
timed_out=xml_result.timed_out, # only the xml gets the timed_out flag
verification_type=VerificationType(bin_result.verification_type)
if bin_result.verification_type
else None,
stdout=xml_result.stdout,
)
)
return merged_test_results
FAILURES_HEADER_RE = re.compile(r"=+ FAILURES =+")
TEST_HEADER_RE = re.compile(r"_{3,}\s*(.*?)\s*_{3,}$")
def parse_test_failures_from_stdout(stdout: str) -> dict[str, str]:
"""Extract individual pytest test failures from stdout grouped by test case qualified name, and add them to the test results."""
lines = stdout.splitlines()
start = end = None
for i, line in enumerate(lines):
if FAILURES_HEADER_RE.search(line.strip()):
start = i
break
if start is None:
return {}
for j in range(start + 1, len(lines)):
stripped = lines[j].strip()
if "short test summary info" in stripped:
end = j
break
# any new === section === block
if stripped.startswith("=") and stripped.count("=") > 3:
end = j
break
# If no clear "end", just grap the rest of the string
if end is None:
end = len(lines)
failure_block = lines[start:end]
failures: dict[str, str] = {}
current_name = None
current_lines: list[str] = []
for line in failure_block:
m = TEST_HEADER_RE.match(line.strip())
if m:
if current_name is not None:
failures[current_name] = "".join(current_lines)
current_name = m.group(1)
current_lines = []
elif current_name:
current_lines.append(line + "\n")
if current_name:
failures[current_name] = "".join(current_lines)
return failures
def parse_test_results(
test_xml_path: Path,
test_files: TestFiles,
test_config: TestConfig,
optimization_iteration: int,
function_name: str | None,
source_file: Path | None,
coverage_database_file: Path | None,
coverage_config_file: Path | None,
code_context: CodeOptimizationContext | None = None,
run_result: subprocess.CompletedProcess | None = None,
skip_sqlite_cleanup: bool = False,
) -> tuple[TestResults, CoverageData | None]:
test_results_xml = parse_test_xml(
test_xml_path, test_files=test_files, test_config=test_config, run_result=run_result
)
# Parse timing/behavior data from SQLite (used by both Python and Jest)
# Jest uses SQLite exclusively via codeflash-jest-helper
# Python can use SQLite (preferred) or legacy binary format
test_results_data = TestResults()
try:
sql_results_file = get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.sqlite"))
if sql_results_file.exists():
test_results_data = parse_sqlite_test_results(
sqlite_file_path=sql_results_file, test_files=test_files, test_config=test_config
)
logger.debug(f"Parsed {len(test_results_data.test_results)} results from SQLite")
except Exception as e:
logger.exception(f"Failed to parse SQLite test results: {e}")
# Also try to read legacy binary format for Python tests
# Binary file may contain additional results (e.g., from codeflash_wrap) even if SQLite has data
# from @codeflash_capture. We need to merge both sources.
if not is_javascript():
try:
bin_results_file = get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.bin"))
if bin_results_file.exists():
bin_test_results = parse_test_return_values_bin(
bin_results_file, test_files=test_files, test_config=test_config
)
# Merge binary results with SQLite results
for result in bin_test_results:
test_results_data.add(result)
logger.debug(f"Merged {len(bin_test_results)} results from binary file")
except AttributeError as e:
logger.exception(e)
# Cleanup temp files
get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.bin")).unlink(missing_ok=True)
get_run_tmp_file(Path("pytest_results.xml")).unlink(missing_ok=True)
get_run_tmp_file(Path("unittest_results.xml")).unlink(missing_ok=True)
get_run_tmp_file(Path("jest_results.xml")).unlink(missing_ok=True)
get_run_tmp_file(Path("jest_perf_results.xml")).unlink(missing_ok=True)
get_run_tmp_file(Path("vitest_results.xml")).unlink(missing_ok=True)
get_run_tmp_file(Path("vitest_perf_results.xml")).unlink(missing_ok=True)
get_run_tmp_file(Path("vitest_line_profile_results.xml")).unlink(missing_ok=True)
# For Jest tests, SQLite cleanup is deferred until after comparison
# (comparison happens via language_support.compare_test_results)
if not skip_sqlite_cleanup:
get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.sqlite")).unlink(missing_ok=True)
results = merge_test_results(test_results_xml, test_results_data, test_config.test_framework)
all_args = False
coverage = None
if coverage_database_file and source_file and code_context and function_name:
all_args = True
if is_javascript():
# Jest uses coverage-final.json (coverage_database_file points to this)
coverage = JestCoverageUtils.load_from_jest_json(
coverage_json_path=coverage_database_file,
function_name=function_name,
code_context=code_context,
source_code_path=source_file,
)
else:
# Python uses coverage.py SQLite database
coverage = CoverageUtils.load_from_sqlite_database(
database_path=coverage_database_file,
config_path=coverage_config_file,
source_code_path=source_file,
code_context=code_context,
function_name=function_name,
)
coverage.log_coverage()
try:
failures = parse_test_failures_from_stdout(run_result.stdout)
results.test_failures = failures
except Exception as e:
logger.exception(e)
# Cleanup Jest coverage directory after coverage is parsed
import shutil
jest_coverage_dir = get_run_tmp_file(Path("jest_coverage"))
if jest_coverage_dir.exists():
shutil.rmtree(jest_coverage_dir, ignore_errors=True)
return results, coverage if all_args else None