1052 lines
47 KiB
Python
1052 lines
47 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
import subprocess
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
import dill as pickle
|
|
from junitparser.xunit2 import JUnitXml
|
|
from lxml.etree import XMLParser, parse
|
|
|
|
from codeflash.cli_cmds.console import DEBUG_MODE, console, logger
|
|
from codeflash.code_utils.code_utils import (
|
|
file_name_from_test_module_name,
|
|
file_path_from_module_name,
|
|
get_run_tmp_file,
|
|
module_name_from_file_path,
|
|
)
|
|
from codeflash.discovery.discover_unit_tests import discover_parameters_unittest
|
|
from codeflash.languages import is_javascript
|
|
|
|
# Import Jest-specific parsing from the JavaScript language module
|
|
from codeflash.languages.javascript.parse import parse_jest_test_xml as _parse_jest_test_xml
|
|
from codeflash.models.models import (
|
|
ConcurrencyMetrics,
|
|
FunctionTestInvocation,
|
|
InvocationId,
|
|
TestResults,
|
|
TestType,
|
|
VerificationType,
|
|
)
|
|
from codeflash.verification.coverage_utils import CoverageUtils, JestCoverageUtils
|
|
|
|
if TYPE_CHECKING:
|
|
import subprocess
|
|
|
|
from codeflash.models.models import CodeOptimizationContext, CoverageData, TestFiles
|
|
from codeflash.verification.verification_utils import TestConfig
|
|
|
|
|
|
def parse_func(file_path: Path) -> XMLParser:
|
|
"""Parse the XML file with lxml.etree.XMLParser as the backend."""
|
|
xml_parser = XMLParser(huge_tree=True)
|
|
return parse(file_path, xml_parser)
|
|
|
|
|
|
matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n")
|
|
matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
|
|
|
|
|
|
start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
|
|
end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+):([^:]+)######!")
|
|
|
|
# Jest timing marker patterns are imported from codeflash.languages.javascript.parse
|
|
# and re-exported here for backwards compatibility
|
|
|
|
|
|
def calculate_function_throughput_from_test_results(test_results: TestResults, function_name: str) -> int:
|
|
"""Calculate function throughput from TestResults by extracting performance stdout.
|
|
|
|
A completed execution is defined as having both a start tag and matching end tag from performance wrappers.
|
|
Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
|
|
End: !######test_module:test_function:function_name:loop_index:iteration_id:duration######!
|
|
"""
|
|
start_matches = start_pattern.findall(test_results.perf_stdout or "")
|
|
end_matches = end_pattern.findall(test_results.perf_stdout or "")
|
|
|
|
end_matches_truncated = [end_match[:5] for end_match in end_matches]
|
|
end_matches_set = set(end_matches_truncated)
|
|
|
|
function_throughput = 0
|
|
for start_match in start_matches:
|
|
if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
|
|
function_throughput += 1
|
|
return function_throughput
|
|
|
|
|
|
# Pattern for concurrency benchmark output:
|
|
# !@######CONC:module:class:test:function:loop_index:seq_time:conc_time:factor######@!
|
|
_concurrency_pattern = re.compile(r"!@######CONC:([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):(\d+):(\d+):(\d+)######@!")
|
|
|
|
|
|
def parse_concurrency_metrics(test_results: TestResults, function_name: str) -> ConcurrencyMetrics | None:
|
|
"""Parse concurrency benchmark results from test output.
|
|
|
|
Format: !@######CONC:module:class:test:function:loop_index:seq_time:conc_time:factor######@!
|
|
|
|
Returns ConcurrencyMetrics with:
|
|
- sequential_time_ns: Total time for N sequential executions
|
|
- concurrent_time_ns: Total time for N concurrent executions
|
|
- concurrency_factor: N (number of concurrent executions)
|
|
- concurrency_ratio: sequential_time / concurrent_time (higher = better concurrency)
|
|
"""
|
|
if not test_results.perf_stdout:
|
|
return None
|
|
|
|
matches = _concurrency_pattern.findall(test_results.perf_stdout)
|
|
if not matches:
|
|
return None
|
|
|
|
# Aggregate metrics for the target function
|
|
total_seq, total_conc, factor, count = 0, 0, 0, 0
|
|
for match in matches:
|
|
# match[3] is function_name
|
|
if len(match) >= 8 and match[3] == function_name:
|
|
total_seq += int(match[5])
|
|
total_conc += int(match[6])
|
|
factor = int(match[7])
|
|
count += 1
|
|
|
|
if count == 0:
|
|
return None
|
|
|
|
avg_seq = total_seq / count
|
|
avg_conc = total_conc / count
|
|
ratio = avg_seq / avg_conc if avg_conc > 0 else 1.0
|
|
|
|
return ConcurrencyMetrics(
|
|
sequential_time_ns=int(avg_seq),
|
|
concurrent_time_ns=int(avg_conc),
|
|
concurrency_factor=factor,
|
|
concurrency_ratio=ratio,
|
|
)
|
|
|
|
|
|
def resolve_test_file_from_class_path(test_class_path: str, base_dir: Path) -> Path | None:
|
|
"""Resolve test file path from pytest's test class path.
|
|
|
|
This function handles various cases where pytest's classname in JUnit XML
|
|
includes parent directories that may already be part of base_dir.
|
|
|
|
Args:
|
|
test_class_path: The full class path from pytest (e.g., "project.tests.test_file.TestClass")
|
|
or a file path from Jest (e.g., "tests/test_file.test.js")
|
|
base_dir: The base directory for tests (tests project root)
|
|
|
|
Returns:
|
|
Path to the test file if found, None otherwise
|
|
|
|
Examples:
|
|
>>> # base_dir = "/path/to/tests"
|
|
>>> # test_class_path = "code_to_optimize.tests.unittest.test_file.TestClass"
|
|
>>> # Should find: /path/to/tests/unittest/test_file.py
|
|
|
|
"""
|
|
# Handle file paths (contain slashes and extensions like .js/.ts)
|
|
if "/" in test_class_path or "\\" in test_class_path:
|
|
# This is a file path, not a Python module path
|
|
# Try the path as-is if it's absolute
|
|
potential_path = Path(test_class_path)
|
|
if potential_path.is_absolute() and potential_path.exists():
|
|
return potential_path
|
|
|
|
# Try to resolve relative to base_dir's parent (project root)
|
|
project_root = base_dir.parent
|
|
potential_path = project_root / test_class_path
|
|
# Normalize to resolve .. and . components
|
|
try:
|
|
potential_path = potential_path.resolve()
|
|
if potential_path.exists():
|
|
return potential_path
|
|
except (OSError, RuntimeError):
|
|
pass
|
|
|
|
# Also try relative to base_dir itself
|
|
potential_path = base_dir / test_class_path
|
|
try:
|
|
potential_path = potential_path.resolve()
|
|
if potential_path.exists():
|
|
return potential_path
|
|
except (OSError, RuntimeError):
|
|
pass
|
|
|
|
return None
|
|
|
|
# First try the full path (Python module path)
|
|
test_file_path = file_name_from_test_module_name(test_class_path, base_dir)
|
|
|
|
# If we couldn't find the file, try stripping the last component (likely a class name)
|
|
# This handles cases like "module.TestClass" where TestClass is a class, not a module
|
|
if test_file_path is None and "." in test_class_path:
|
|
module_without_class = ".".join(test_class_path.split(".")[:-1])
|
|
test_file_path = file_name_from_test_module_name(module_without_class, base_dir)
|
|
|
|
# If still not found, progressively strip prefix components
|
|
# This handles cases where pytest's classname includes parent directories that are
|
|
# already part of base_dir (e.g., "project.tests.unittest.test_file.TestClass"
|
|
# when base_dir is "/.../tests")
|
|
if test_file_path is None:
|
|
parts = test_class_path.split(".")
|
|
# Try stripping 1, 2, 3, ... prefix components
|
|
for num_to_strip in range(1, len(parts)):
|
|
remaining = ".".join(parts[num_to_strip:])
|
|
test_file_path = file_name_from_test_module_name(remaining, base_dir)
|
|
if test_file_path:
|
|
break
|
|
# Also try without the last component (class name)
|
|
if "." in remaining:
|
|
remaining_no_class = ".".join(remaining.split(".")[:-1])
|
|
test_file_path = file_name_from_test_module_name(remaining_no_class, base_dir)
|
|
if test_file_path:
|
|
break
|
|
|
|
return test_file_path
|
|
|
|
|
|
def parse_jest_json_results(
|
|
file_location: Path, test_files: TestFiles, test_config: TestConfig, function_name: str | None = None
|
|
) -> TestResults:
|
|
"""Parse Jest test results from JSON format written by codeflash-jest-helper.
|
|
|
|
Args:
|
|
file_location: Path to the JSON results file.
|
|
test_files: TestFiles object containing test file information.
|
|
test_config: Test configuration.
|
|
function_name: Name of the function being tested.
|
|
|
|
Returns:
|
|
TestResults containing parsed test invocations.
|
|
|
|
"""
|
|
import json
|
|
|
|
test_results = TestResults()
|
|
if not file_location.exists():
|
|
logger.debug(f"No Jest JSON results at {file_location}")
|
|
return test_results
|
|
|
|
try:
|
|
with file_location.open("r") as f:
|
|
data = json.load(f)
|
|
|
|
results = data.get("results", [])
|
|
for result in results:
|
|
test_name = result.get("testName", "") or result.get("testFunctionName", "")
|
|
func_name = result.get("funcName", "")
|
|
duration_ns = result.get("durationNs", 0)
|
|
loop_index = result.get("loopIndex", 1)
|
|
invocation_id = result.get("invocationId", 0)
|
|
error = result.get("error")
|
|
result_module_path = result.get("testModulePath", "")
|
|
|
|
# Try to find the test file from test_files by matching testModulePath
|
|
test_file_path = None
|
|
test_type = TestType.GENERATED_REGRESSION # Default for Jest generated tests
|
|
|
|
# If we have testModulePath from the result, use it to find the matching test file
|
|
if result_module_path:
|
|
# Convert module path to file path (e.g., "tests.test_foo.test" -> "tests/test_foo.test.js")
|
|
expected_path = result_module_path.replace(".", "/")
|
|
if not expected_path.endswith(".js"):
|
|
expected_path += ".js"
|
|
|
|
for test_file in test_files.test_files:
|
|
# Check behavior path
|
|
if test_file.instrumented_behavior_file_path:
|
|
try:
|
|
rel_path = str(
|
|
test_file.instrumented_behavior_file_path.relative_to(test_config.tests_project_rootdir)
|
|
)
|
|
except ValueError:
|
|
rel_path = test_file.instrumented_behavior_file_path.name
|
|
if (
|
|
rel_path == expected_path
|
|
or rel_path.replace("/", ".").replace(".js", "") == result_module_path
|
|
):
|
|
test_file_path = test_file.instrumented_behavior_file_path
|
|
test_type = test_file.test_type
|
|
break
|
|
# Check benchmarking path
|
|
if test_file.benchmarking_file_path:
|
|
try:
|
|
rel_path = str(
|
|
test_file.benchmarking_file_path.relative_to(test_config.tests_project_rootdir)
|
|
)
|
|
except ValueError:
|
|
rel_path = test_file.benchmarking_file_path.name
|
|
if (
|
|
rel_path == expected_path
|
|
or rel_path.replace("/", ".").replace(".js", "") == result_module_path
|
|
):
|
|
test_file_path = test_file.benchmarking_file_path
|
|
test_type = test_file.test_type
|
|
break
|
|
|
|
# Fallback: find the first test file that exists (legacy behavior)
|
|
if test_file_path is None:
|
|
for test_file in test_files.test_files:
|
|
if test_file.benchmarking_file_path and test_file.benchmarking_file_path.exists():
|
|
test_file_path = test_file.benchmarking_file_path
|
|
test_type = test_file.test_type
|
|
break
|
|
if test_file.instrumented_behavior_file_path and test_file.instrumented_behavior_file_path.exists():
|
|
test_file_path = test_file.instrumented_behavior_file_path
|
|
test_type = test_file.test_type
|
|
break
|
|
|
|
if test_file_path is None:
|
|
logger.debug(f"Could not find test file for Jest result: {test_name} (module: {result_module_path})")
|
|
continue
|
|
|
|
# Create invocation ID - use funcName from result or passed function_name
|
|
function_getting_tested = func_name or function_name or "unknown"
|
|
# For Jest tests, keep the relative file path with extension intact
|
|
# (Python uses module_name_from_file_path which strips extensions)
|
|
try:
|
|
test_module_path = str(test_file_path.relative_to(test_config.tests_project_rootdir))
|
|
except ValueError:
|
|
test_module_path = test_file_path.name
|
|
invocation_id_obj = InvocationId(
|
|
test_module_path=test_module_path,
|
|
test_class_name=None,
|
|
test_function_name=test_name or func_name,
|
|
function_getting_tested=function_getting_tested,
|
|
iteration_id=str(invocation_id),
|
|
)
|
|
|
|
test_results.add(
|
|
function_test_invocation=FunctionTestInvocation(
|
|
loop_index=loop_index,
|
|
id=invocation_id_obj,
|
|
file_name=test_file_path,
|
|
did_pass=error is None,
|
|
runtime=duration_ns,
|
|
test_framework=test_config.test_framework,
|
|
test_type=test_type,
|
|
return_value=result.get("returnValue"),
|
|
timed_out=False,
|
|
verification_type=VerificationType.FUNCTION_CALL,
|
|
)
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse Jest JSON results from {file_location}: {e}")
|
|
|
|
return test_results
|
|
|
|
|
|
def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
|
|
test_results = TestResults()
|
|
if not file_location.exists():
|
|
logger.debug(f"No test results for {file_location} found.")
|
|
console.rule()
|
|
return test_results
|
|
|
|
with file_location.open("rb") as file:
|
|
try:
|
|
while file:
|
|
len_next_bytes = file.read(4)
|
|
if not len_next_bytes:
|
|
return test_results
|
|
len_next = int.from_bytes(len_next_bytes, byteorder="big")
|
|
encoded_test_bytes = file.read(len_next)
|
|
encoded_test_name = encoded_test_bytes.decode("ascii")
|
|
duration_bytes = file.read(8)
|
|
duration = int.from_bytes(duration_bytes, byteorder="big")
|
|
len_next_bytes = file.read(4)
|
|
len_next = int.from_bytes(len_next_bytes, byteorder="big")
|
|
test_pickle_bin = file.read(len_next)
|
|
loop_index_bytes = file.read(8)
|
|
loop_index = int.from_bytes(loop_index_bytes, byteorder="big")
|
|
len_next_bytes = file.read(4)
|
|
len_next = int.from_bytes(len_next_bytes, byteorder="big")
|
|
invocation_id_bytes = file.read(len_next)
|
|
invocation_id = invocation_id_bytes.decode("ascii")
|
|
|
|
invocation_id_object = InvocationId.from_str_id(encoded_test_name, invocation_id)
|
|
test_file_path = file_path_from_module_name(
|
|
invocation_id_object.test_module_path, test_config.tests_project_rootdir
|
|
)
|
|
|
|
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
|
|
try:
|
|
test_pickle = pickle.loads(test_pickle_bin) if loop_index == 1 else None
|
|
except Exception as e:
|
|
if DEBUG_MODE:
|
|
logger.exception(f"Failed to load pickle file for {encoded_test_name} Exception: {e}")
|
|
continue
|
|
assert test_type is not None, f"Test type not found for {test_file_path}"
|
|
test_results.add(
|
|
function_test_invocation=FunctionTestInvocation(
|
|
loop_index=loop_index,
|
|
id=invocation_id_object,
|
|
file_name=test_file_path,
|
|
did_pass=True,
|
|
runtime=duration,
|
|
test_framework=test_config.test_framework,
|
|
test_type=test_type,
|
|
return_value=test_pickle,
|
|
timed_out=False,
|
|
verification_type=VerificationType.FUNCTION_CALL,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse test results from {file_location}. Exception: {e}")
|
|
return test_results
|
|
return test_results
|
|
|
|
|
|
def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
|
|
test_results = TestResults()
|
|
if not sqlite_file_path.exists():
|
|
logger.warning(f"No test results for {sqlite_file_path} found.")
|
|
console.rule()
|
|
return test_results
|
|
db = None
|
|
try:
|
|
db = sqlite3.connect(sqlite_file_path)
|
|
cur = db.cursor()
|
|
data = cur.execute(
|
|
"SELECT test_module_path, test_class_name, test_function_name, "
|
|
"function_getting_tested, loop_index, iteration_id, runtime, return_value,verification_type FROM test_results"
|
|
).fetchall()
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse test results from {sqlite_file_path}. Exception: {e}")
|
|
if db is not None:
|
|
db.close()
|
|
return test_results
|
|
finally:
|
|
db.close()
|
|
|
|
# Check if this is a JavaScript test (use JSON) or Python test (use pickle)
|
|
is_jest = is_javascript()
|
|
|
|
for val in data:
|
|
try:
|
|
test_module_path = val[0]
|
|
test_class_name = val[1] if val[1] else None
|
|
test_function_name = val[2] if val[2] else None
|
|
function_getting_tested = val[3]
|
|
|
|
# For Jest tests, test_module_path could be:
|
|
# - A module-style path: "tests.fibonacci.test.ts" (dots as separators)
|
|
# - A file path: "tests/fibonacci.test.ts" (slashes as separators)
|
|
# For Python, it's a module path (e.g., "tests.test_foo") that needs conversion
|
|
if is_jest:
|
|
# Jest test file extensions (including .test.ts, .spec.ts patterns)
|
|
jest_test_extensions = (
|
|
".test.ts",
|
|
".test.js",
|
|
".test.tsx",
|
|
".test.jsx",
|
|
".spec.ts",
|
|
".spec.js",
|
|
".spec.tsx",
|
|
".spec.jsx",
|
|
".ts",
|
|
".js",
|
|
".tsx",
|
|
".jsx",
|
|
".mjs",
|
|
".mts",
|
|
)
|
|
# Check if it's a module-style path (no slashes, has dots beyond extension)
|
|
if "/" not in test_module_path and "\\" not in test_module_path:
|
|
# Find the appropriate extension to preserve
|
|
extension = ""
|
|
for ext in jest_test_extensions:
|
|
if test_module_path.endswith(ext):
|
|
extension = ext
|
|
break
|
|
if extension:
|
|
# Convert module-style path to file path
|
|
# "tests.fibonacci__perfinstrumented.test.ts" -> "tests/fibonacci__perfinstrumented.test.ts"
|
|
base_path = test_module_path[: -len(extension)]
|
|
file_path = base_path.replace(".", os.sep) + extension
|
|
# Check if the module path includes the tests directory name
|
|
tests_dir_name = test_config.tests_project_rootdir.name
|
|
if file_path.startswith((tests_dir_name + os.sep, tests_dir_name + "/")):
|
|
# Module path includes "tests." - use project root parent
|
|
test_file_path = test_config.tests_project_rootdir.parent / file_path
|
|
else:
|
|
# Module path doesn't include tests dir - use tests root directly
|
|
test_file_path = test_config.tests_project_rootdir / file_path
|
|
else:
|
|
# No recognized extension, treat as-is
|
|
test_file_path = test_config.tests_project_rootdir / test_module_path
|
|
else:
|
|
# Already a file path
|
|
test_file_path = test_config.tests_project_rootdir / test_module_path
|
|
else:
|
|
# Python: convert module path to file path
|
|
test_file_path = file_path_from_module_name(test_module_path, test_config.tests_project_rootdir)
|
|
|
|
loop_index = val[4]
|
|
iteration_id = val[5]
|
|
runtime = val[6]
|
|
verification_type = val[8]
|
|
if verification_type in {VerificationType.INIT_STATE_FTO, VerificationType.INIT_STATE_HELPER}:
|
|
test_type = TestType.INIT_STATE_TEST
|
|
else:
|
|
# Try original_file_path first (for existing tests that were instrumented)
|
|
test_type = test_files.get_test_type_by_original_file_path(test_file_path)
|
|
logger.debug(f"[PARSE-DEBUG] test_module={test_module_path}, test_file_path={test_file_path}")
|
|
logger.debug(f"[PARSE-DEBUG] by_original_file_path: {test_type}")
|
|
# If not found, try instrumented_behavior_file_path (for generated tests)
|
|
if test_type is None:
|
|
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
|
|
logger.debug(f"[PARSE-DEBUG] by_instrumented_file_path: {test_type}")
|
|
# Default to GENERATED_REGRESSION for Jest tests when test type can't be determined
|
|
if test_type is None and is_jest:
|
|
test_type = TestType.GENERATED_REGRESSION
|
|
logger.debug("[PARSE-DEBUG] defaulting to GENERATED_REGRESSION (Jest)")
|
|
elif test_type is None:
|
|
# Skip results where test type cannot be determined
|
|
logger.debug(f"Skipping result for {test_function_name}: could not determine test type")
|
|
continue
|
|
logger.debug(f"[PARSE-DEBUG] FINAL test_type={test_type}")
|
|
|
|
# Deserialize return value
|
|
# For Jest: Skip deserialization - comparison happens via language-specific comparator
|
|
# For Python: Use pickle to deserialize
|
|
ret_val = None
|
|
if loop_index == 1 and val[7]:
|
|
try:
|
|
if is_jest:
|
|
# Jest comparison happens via Node.js script (language_support.compare_test_results)
|
|
# Store a marker indicating data exists but is not deserialized in Python
|
|
ret_val = ("__serialized__", val[7])
|
|
else:
|
|
# Python uses pickle serialization
|
|
ret_val = (pickle.loads(val[7]),)
|
|
except Exception as e:
|
|
# If deserialization fails, skip this result
|
|
logger.debug(f"Failed to deserialize return value for {test_function_name}: {e}")
|
|
continue
|
|
|
|
test_results.add(
|
|
function_test_invocation=FunctionTestInvocation(
|
|
loop_index=loop_index,
|
|
id=InvocationId(
|
|
test_module_path=test_module_path,
|
|
test_class_name=test_class_name,
|
|
test_function_name=test_function_name,
|
|
function_getting_tested=function_getting_tested,
|
|
iteration_id=iteration_id,
|
|
),
|
|
file_name=test_file_path,
|
|
did_pass=True,
|
|
runtime=runtime,
|
|
test_framework=test_config.test_framework,
|
|
test_type=test_type,
|
|
return_value=ret_val,
|
|
timed_out=False,
|
|
verification_type=VerificationType(verification_type) if verification_type else None,
|
|
)
|
|
)
|
|
except Exception:
|
|
logger.exception(f"Failed to parse sqlite test results for {sqlite_file_path}")
|
|
# Hardcoding the test result to True because the test did execute and we are only interested in the return values,
|
|
# the did_pass comes from the xml results file
|
|
return test_results
|
|
|
|
|
|
def parse_test_xml(
|
|
test_xml_file_path: Path,
|
|
test_files: TestFiles,
|
|
test_config: TestConfig,
|
|
run_result: subprocess.CompletedProcess | None = None,
|
|
) -> TestResults:
|
|
# Route to Jest-specific parser for JavaScript/TypeScript tests
|
|
if is_javascript():
|
|
return _parse_jest_test_xml(
|
|
test_xml_file_path,
|
|
test_files,
|
|
test_config,
|
|
run_result,
|
|
parse_func=parse_func,
|
|
resolve_test_file_from_class_path=resolve_test_file_from_class_path,
|
|
)
|
|
|
|
test_results = TestResults()
|
|
# Parse unittest output
|
|
if not test_xml_file_path.exists():
|
|
logger.warning(f"No test results for {test_xml_file_path} found.")
|
|
console.rule()
|
|
return test_results
|
|
try:
|
|
xml = JUnitXml.fromfile(str(test_xml_file_path), parse_func=parse_func)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse {test_xml_file_path} as JUnitXml. Exception: {e}")
|
|
return test_results
|
|
# Always use tests_project_rootdir since pytest is now the test runner for all frameworks
|
|
base_dir = test_config.tests_project_rootdir
|
|
for suite in xml:
|
|
for testcase in suite:
|
|
class_name = testcase.classname
|
|
test_file_name = suite._elem.attrib.get("file") # noqa: SLF001
|
|
if (
|
|
test_file_name == f"unittest{os.sep}loader.py"
|
|
and class_name == "unittest.loader._FailedTest"
|
|
and suite.errors == 1
|
|
and suite.tests == 1
|
|
):
|
|
# This means that the test failed to load, so we don't want to crash on it
|
|
logger.info("Test failed to load, skipping it.")
|
|
if run_result is not None:
|
|
if isinstance(run_result.stdout, str) and isinstance(run_result.stderr, str):
|
|
logger.info(f"Test log - STDOUT : {run_result.stdout} \n STDERR : {run_result.stderr}")
|
|
else:
|
|
logger.info(
|
|
f"Test log - STDOUT : {run_result.stdout.decode()} \n STDERR : {run_result.stderr.decode()}"
|
|
)
|
|
return test_results
|
|
|
|
test_class_path = testcase.classname
|
|
try:
|
|
if testcase.name is None:
|
|
logger.debug(
|
|
f"testcase.name is None for testcase {testcase!r} in file {test_xml_file_path}, skipping"
|
|
)
|
|
continue
|
|
test_function = testcase.name.split("[", 1)[0] if "[" in testcase.name else testcase.name
|
|
except (AttributeError, TypeError) as e:
|
|
msg = (
|
|
f"Accessing testcase.name in parse_test_xml for testcase {testcase!r} in file"
|
|
f" {test_xml_file_path} has exception: {e}"
|
|
)
|
|
logger.exception(msg)
|
|
continue
|
|
if test_file_name is None:
|
|
if test_class_path:
|
|
# TODO : This might not be true if the test is organized under a class
|
|
test_file_path = resolve_test_file_from_class_path(test_class_path, base_dir)
|
|
|
|
if test_file_path is None:
|
|
logger.warning(f"Could not find the test for file name - {test_class_path} ")
|
|
continue
|
|
else:
|
|
test_file_path = file_path_from_module_name(test_function, base_dir)
|
|
else:
|
|
test_file_path = base_dir / test_file_name
|
|
assert test_file_path, f"Test file path not found for {test_file_name}"
|
|
|
|
if not test_file_path.exists():
|
|
logger.warning(f"Could not find the test for file name - {test_file_path} ")
|
|
continue
|
|
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
|
|
if test_type is None:
|
|
# Log registered paths for debugging
|
|
registered_paths = [str(tf.instrumented_behavior_file_path) for tf in test_files.test_files]
|
|
logger.warning(
|
|
f"Test type not found for '{test_file_path}'. "
|
|
f"Registered test files: {registered_paths}. Skipping test case."
|
|
)
|
|
continue
|
|
test_module_path = module_name_from_file_path(test_file_path, test_config.tests_project_rootdir)
|
|
result = testcase.is_passed # TODO: See for the cases of ERROR and SKIPPED
|
|
test_class = None
|
|
if class_name is not None and class_name.startswith(test_module_path):
|
|
test_class = class_name[len(test_module_path) + 1 :] # +1 for the dot, gets Unittest class name
|
|
|
|
loop_index = int(testcase.name.split("[ ")[-1][:-2]) if testcase.name and "[" in testcase.name else 1
|
|
|
|
timed_out = False
|
|
if len(testcase.result) > 1:
|
|
logger.debug(f"!!!!!Multiple results for {testcase.name or '<None>'} in {test_xml_file_path}!!!")
|
|
if len(testcase.result) == 1:
|
|
message = testcase.result[0].message.lower()
|
|
if "failed: timeout >" in message or "timed out" in message:
|
|
timed_out = True
|
|
|
|
sys_stdout = testcase.system_out or ""
|
|
begin_matches = list(matches_re_start.finditer(sys_stdout))
|
|
end_matches = {}
|
|
for match in matches_re_end.finditer(sys_stdout):
|
|
groups = match.groups()
|
|
if len(groups[5].split(":")) > 1:
|
|
iteration_id = groups[5].split(":")[0]
|
|
groups = (*groups[:5], iteration_id)
|
|
end_matches[groups] = match
|
|
|
|
if not begin_matches or not begin_matches:
|
|
test_results.add(
|
|
FunctionTestInvocation(
|
|
loop_index=loop_index,
|
|
id=InvocationId(
|
|
test_module_path=test_module_path,
|
|
test_class_name=test_class,
|
|
test_function_name=test_function,
|
|
function_getting_tested="", # TODO: Fix this
|
|
iteration_id="",
|
|
),
|
|
file_name=test_file_path,
|
|
runtime=None,
|
|
test_framework=test_config.test_framework,
|
|
did_pass=result,
|
|
test_type=test_type,
|
|
return_value=None,
|
|
timed_out=timed_out,
|
|
stdout="",
|
|
)
|
|
)
|
|
|
|
else:
|
|
for match_index, match in enumerate(begin_matches):
|
|
groups = match.groups()
|
|
end_match = end_matches.get(groups)
|
|
iteration_id, runtime = groups[5], None
|
|
if end_match:
|
|
stdout = sys_stdout[match.end() : end_match.start()]
|
|
split_val = end_match.groups()[5].split(":")
|
|
if len(split_val) > 1:
|
|
iteration_id = split_val[0]
|
|
runtime = int(split_val[1])
|
|
else:
|
|
iteration_id, runtime = split_val[0], None
|
|
elif match_index == len(begin_matches) - 1:
|
|
stdout = sys_stdout[match.end() :]
|
|
else:
|
|
stdout = sys_stdout[match.end() : begin_matches[match_index + 1].start()]
|
|
|
|
test_results.add(
|
|
FunctionTestInvocation(
|
|
loop_index=int(groups[4]),
|
|
id=InvocationId(
|
|
test_module_path=groups[0],
|
|
test_class_name=None if groups[1] == "" else groups[1][:-1],
|
|
test_function_name=groups[2],
|
|
function_getting_tested=groups[3],
|
|
iteration_id=iteration_id,
|
|
),
|
|
file_name=test_file_path,
|
|
runtime=runtime,
|
|
test_framework=test_config.test_framework,
|
|
did_pass=result,
|
|
test_type=test_type,
|
|
return_value=None,
|
|
timed_out=timed_out,
|
|
stdout=stdout,
|
|
)
|
|
)
|
|
|
|
if not test_results:
|
|
logger.info(
|
|
f"Tests '{[test_file.original_file_path for test_file in test_files.test_files]}' failed to run, skipping"
|
|
)
|
|
if run_result is not None:
|
|
stdout, stderr = "", ""
|
|
try:
|
|
stdout = run_result.stdout.decode()
|
|
stderr = run_result.stderr.decode()
|
|
except AttributeError:
|
|
stdout = run_result.stderr
|
|
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
|
|
return test_results
|
|
|
|
|
|
def merge_test_results(
|
|
xml_test_results: TestResults, bin_test_results: TestResults, test_framework: str
|
|
) -> TestResults:
|
|
merged_test_results = TestResults()
|
|
|
|
grouped_xml_results: defaultdict[str, TestResults] = defaultdict(TestResults)
|
|
grouped_bin_results: defaultdict[str, TestResults] = defaultdict(TestResults)
|
|
|
|
# This is done to match the right iteration_id which might not be available in the xml
|
|
for result in xml_test_results:
|
|
if test_framework == "pytest":
|
|
if result.id.test_function_name.endswith("]") and "[" in result.id.test_function_name: # parameterized test
|
|
test_function_name = result.id.test_function_name[: result.id.test_function_name.index("[")]
|
|
else:
|
|
test_function_name = result.id.test_function_name
|
|
elif test_framework == "unittest":
|
|
test_function_name = result.id.test_function_name
|
|
is_parameterized, new_test_function_name, _ = discover_parameters_unittest(test_function_name)
|
|
if is_parameterized: # handle parameterized test
|
|
test_function_name = new_test_function_name
|
|
else:
|
|
# Jest and other frameworks - use test function name as-is
|
|
test_function_name = result.id.test_function_name
|
|
|
|
grouped_xml_results[
|
|
(result.id.test_module_path or "")
|
|
+ ":"
|
|
+ (result.id.test_class_name or "")
|
|
+ ":"
|
|
+ (test_function_name or "")
|
|
+ ":"
|
|
+ str(result.loop_index)
|
|
].add(result)
|
|
|
|
for result in bin_test_results:
|
|
grouped_bin_results[
|
|
(result.id.test_module_path or "")
|
|
+ ":"
|
|
+ (result.id.test_class_name or "")
|
|
+ ":"
|
|
+ (result.id.test_function_name or "")
|
|
+ ":"
|
|
+ str(result.loop_index)
|
|
].add(result)
|
|
|
|
for result_id in grouped_xml_results:
|
|
xml_results = grouped_xml_results[result_id]
|
|
bin_results = grouped_bin_results.get(result_id)
|
|
if not bin_results:
|
|
merged_test_results.merge(xml_results)
|
|
continue
|
|
|
|
if len(xml_results) == 1:
|
|
xml_result = xml_results[0]
|
|
# This means that we only have one FunctionTestInvocation for this test xml. Match them to the bin results
|
|
# Either a whole test function fails or passes.
|
|
for result_bin in bin_results:
|
|
# Prefer XML runtime (from stdout markers) if bin runtime is None/0
|
|
# This is important for Jest perf tests which output timing to stdout, not SQLite
|
|
merged_runtime = result_bin.runtime if result_bin.runtime else xml_result.runtime
|
|
merged_test_results.add(
|
|
FunctionTestInvocation(
|
|
loop_index=xml_result.loop_index,
|
|
id=result_bin.id,
|
|
file_name=xml_result.file_name,
|
|
runtime=merged_runtime,
|
|
test_framework=xml_result.test_framework,
|
|
did_pass=xml_result.did_pass,
|
|
test_type=xml_result.test_type,
|
|
return_value=result_bin.return_value,
|
|
timed_out=xml_result.timed_out,
|
|
verification_type=VerificationType(result_bin.verification_type)
|
|
if result_bin.verification_type
|
|
else None,
|
|
stdout=xml_result.stdout,
|
|
)
|
|
)
|
|
elif xml_results.test_results[0].id.iteration_id is not None:
|
|
# This means that we have multiple iterations of the same test function
|
|
# We need to match the iteration_id to the bin results
|
|
for xml_result in xml_results.test_results:
|
|
try:
|
|
bin_result = bin_results.get_by_unique_invocation_loop_id(xml_result.unique_invocation_loop_id)
|
|
except AttributeError:
|
|
bin_result = None
|
|
if bin_result is None:
|
|
merged_test_results.add(xml_result)
|
|
continue
|
|
# Prefer XML runtime (from stdout markers) if bin runtime is None/0
|
|
# This is important for Jest perf tests which output timing to stdout, not SQLite
|
|
merged_runtime = bin_result.runtime if bin_result.runtime else xml_result.runtime
|
|
merged_test_results.add(
|
|
FunctionTestInvocation(
|
|
loop_index=xml_result.loop_index,
|
|
id=xml_result.id,
|
|
file_name=xml_result.file_name,
|
|
runtime=merged_runtime,
|
|
test_framework=xml_result.test_framework,
|
|
did_pass=bin_result.did_pass,
|
|
test_type=xml_result.test_type,
|
|
return_value=bin_result.return_value,
|
|
timed_out=xml_result.timed_out
|
|
if merged_runtime is None
|
|
else False, # If runtime was measured, then the testcase did not time out
|
|
verification_type=VerificationType(bin_result.verification_type)
|
|
if bin_result.verification_type
|
|
else None,
|
|
stdout=xml_result.stdout,
|
|
)
|
|
)
|
|
else:
|
|
# Should happen only if the xml did not have any test invocation id info
|
|
for i, bin_result in enumerate(bin_results.test_results):
|
|
try:
|
|
xml_result = xml_results.test_results[i]
|
|
except IndexError:
|
|
xml_result = None
|
|
if xml_result is None:
|
|
merged_test_results.add(bin_result)
|
|
continue
|
|
# Prefer XML runtime (from stdout markers) if bin runtime is None/0
|
|
# This is important for Jest perf tests which output timing to stdout, not SQLite
|
|
merged_runtime = bin_result.runtime if bin_result.runtime else xml_result.runtime
|
|
merged_test_results.add(
|
|
FunctionTestInvocation(
|
|
loop_index=bin_result.loop_index,
|
|
id=bin_result.id,
|
|
file_name=bin_result.file_name,
|
|
runtime=merged_runtime,
|
|
test_framework=bin_result.test_framework,
|
|
did_pass=bin_result.did_pass,
|
|
test_type=bin_result.test_type,
|
|
return_value=bin_result.return_value,
|
|
timed_out=xml_result.timed_out, # only the xml gets the timed_out flag
|
|
verification_type=VerificationType(bin_result.verification_type)
|
|
if bin_result.verification_type
|
|
else None,
|
|
stdout=xml_result.stdout,
|
|
)
|
|
)
|
|
|
|
return merged_test_results
|
|
|
|
|
|
FAILURES_HEADER_RE = re.compile(r"=+ FAILURES =+")
|
|
TEST_HEADER_RE = re.compile(r"_{3,}\s*(.*?)\s*_{3,}$")
|
|
|
|
|
|
def parse_test_failures_from_stdout(stdout: str) -> dict[str, str]:
|
|
"""Extract individual pytest test failures from stdout grouped by test case qualified name, and add them to the test results."""
|
|
lines = stdout.splitlines()
|
|
start = end = None
|
|
|
|
for i, line in enumerate(lines):
|
|
if FAILURES_HEADER_RE.search(line.strip()):
|
|
start = i
|
|
break
|
|
|
|
if start is None:
|
|
return {}
|
|
|
|
for j in range(start + 1, len(lines)):
|
|
stripped = lines[j].strip()
|
|
if "short test summary info" in stripped:
|
|
end = j
|
|
break
|
|
# any new === section === block
|
|
if stripped.startswith("=") and stripped.count("=") > 3:
|
|
end = j
|
|
break
|
|
|
|
# If no clear "end", just grap the rest of the string
|
|
if end is None:
|
|
end = len(lines)
|
|
|
|
failure_block = lines[start:end]
|
|
|
|
failures: dict[str, str] = {}
|
|
current_name = None
|
|
current_lines: list[str] = []
|
|
|
|
for line in failure_block:
|
|
m = TEST_HEADER_RE.match(line.strip())
|
|
if m:
|
|
if current_name is not None:
|
|
failures[current_name] = "".join(current_lines)
|
|
|
|
current_name = m.group(1)
|
|
current_lines = []
|
|
elif current_name:
|
|
current_lines.append(line + "\n")
|
|
|
|
if current_name:
|
|
failures[current_name] = "".join(current_lines)
|
|
|
|
return failures
|
|
|
|
|
|
def parse_test_results(
|
|
test_xml_path: Path,
|
|
test_files: TestFiles,
|
|
test_config: TestConfig,
|
|
optimization_iteration: int,
|
|
function_name: str | None,
|
|
source_file: Path | None,
|
|
coverage_database_file: Path | None,
|
|
coverage_config_file: Path | None,
|
|
code_context: CodeOptimizationContext | None = None,
|
|
run_result: subprocess.CompletedProcess | None = None,
|
|
skip_sqlite_cleanup: bool = False,
|
|
) -> tuple[TestResults, CoverageData | None]:
|
|
test_results_xml = parse_test_xml(
|
|
test_xml_path, test_files=test_files, test_config=test_config, run_result=run_result
|
|
)
|
|
|
|
# Parse timing/behavior data from SQLite (used by both Python and Jest)
|
|
# Jest uses SQLite exclusively via codeflash-jest-helper
|
|
# Python can use SQLite (preferred) or legacy binary format
|
|
test_results_data = TestResults()
|
|
|
|
try:
|
|
sql_results_file = get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.sqlite"))
|
|
if sql_results_file.exists():
|
|
test_results_data = parse_sqlite_test_results(
|
|
sqlite_file_path=sql_results_file, test_files=test_files, test_config=test_config
|
|
)
|
|
logger.debug(f"Parsed {len(test_results_data.test_results)} results from SQLite")
|
|
except Exception as e:
|
|
logger.exception(f"Failed to parse SQLite test results: {e}")
|
|
|
|
# Also try to read legacy binary format for Python tests
|
|
# Binary file may contain additional results (e.g., from codeflash_wrap) even if SQLite has data
|
|
# from @codeflash_capture. We need to merge both sources.
|
|
if not is_javascript():
|
|
try:
|
|
bin_results_file = get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.bin"))
|
|
if bin_results_file.exists():
|
|
bin_test_results = parse_test_return_values_bin(
|
|
bin_results_file, test_files=test_files, test_config=test_config
|
|
)
|
|
# Merge binary results with SQLite results
|
|
for result in bin_test_results:
|
|
test_results_data.add(result)
|
|
logger.debug(f"Merged {len(bin_test_results)} results from binary file")
|
|
except AttributeError as e:
|
|
logger.exception(e)
|
|
|
|
# Cleanup temp files
|
|
get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.bin")).unlink(missing_ok=True)
|
|
|
|
get_run_tmp_file(Path("pytest_results.xml")).unlink(missing_ok=True)
|
|
get_run_tmp_file(Path("unittest_results.xml")).unlink(missing_ok=True)
|
|
get_run_tmp_file(Path("jest_results.xml")).unlink(missing_ok=True)
|
|
get_run_tmp_file(Path("jest_perf_results.xml")).unlink(missing_ok=True)
|
|
get_run_tmp_file(Path("vitest_results.xml")).unlink(missing_ok=True)
|
|
get_run_tmp_file(Path("vitest_perf_results.xml")).unlink(missing_ok=True)
|
|
get_run_tmp_file(Path("vitest_line_profile_results.xml")).unlink(missing_ok=True)
|
|
|
|
# For Jest tests, SQLite cleanup is deferred until after comparison
|
|
# (comparison happens via language_support.compare_test_results)
|
|
if not skip_sqlite_cleanup:
|
|
get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.sqlite")).unlink(missing_ok=True)
|
|
|
|
results = merge_test_results(test_results_xml, test_results_data, test_config.test_framework)
|
|
|
|
all_args = False
|
|
coverage = None
|
|
if coverage_database_file and source_file and code_context and function_name:
|
|
all_args = True
|
|
if is_javascript():
|
|
# Jest uses coverage-final.json (coverage_database_file points to this)
|
|
coverage = JestCoverageUtils.load_from_jest_json(
|
|
coverage_json_path=coverage_database_file,
|
|
function_name=function_name,
|
|
code_context=code_context,
|
|
source_code_path=source_file,
|
|
)
|
|
else:
|
|
# Python uses coverage.py SQLite database
|
|
coverage = CoverageUtils.load_from_sqlite_database(
|
|
database_path=coverage_database_file,
|
|
config_path=coverage_config_file,
|
|
source_code_path=source_file,
|
|
code_context=code_context,
|
|
function_name=function_name,
|
|
)
|
|
coverage.log_coverage()
|
|
try:
|
|
failures = parse_test_failures_from_stdout(run_result.stdout)
|
|
results.test_failures = failures
|
|
except Exception as e:
|
|
logger.exception(e)
|
|
|
|
# Cleanup Jest coverage directory after coverage is parsed
|
|
import shutil
|
|
|
|
jest_coverage_dir = get_run_tmp_file(Path("jest_coverage"))
|
|
if jest_coverage_dir.exists():
|
|
shutil.rmtree(jest_coverage_dir, ignore_errors=True)
|
|
|
|
return results, coverage if all_args else None
|