codeflash/tests/test_languages/test_java/test_java_tracer_integration.py
Kevin Turcios b737f71e46 fix: update test assertions to match simplified Workload fixture
The Workload.java fixture was trimmed to only repeatString but test
files still asserted computeSum, filterEvens, and instanceMethod.
2026-04-10 16:05:27 -05:00

343 lines
15 KiB
Python

"""End-to-end integration test for the Java tracer → optimizer pipeline.
Tests the full flow: trace → replay test generation → function discovery →
test discovery → function ranking, using the simple Workload fixture.
"""
from __future__ import annotations
import subprocess
from pathlib import Path
import pytest
from codeflash.languages.java.tracer import run_java_tracer
FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "java_tracer_e2e"
WORKLOAD_SOURCE = FIXTURE_DIR / "src" / "main" / "java" / "com" / "example" / "Workload.java"
WORKLOAD_CLASS = "com.example.Workload"
WORKLOAD_PACKAGE = "com.example"
@pytest.fixture(scope="module")
def compiled_workload() -> Path:
classes_dir = FIXTURE_DIR / "target" / "classes"
classes_dir.mkdir(parents=True, exist_ok=True)
result = subprocess.run(
["javac", "--release", "11", "-d", str(classes_dir), str(WORKLOAD_SOURCE)],
capture_output=True,
text=True,
check=False,
)
assert result.returncode == 0, f"javac failed: {result.stderr}"
return classes_dir
@pytest.fixture
def traced_workload(compiled_workload: Path, tmp_path: Path) -> tuple[Path, Path, Path, int]:
"""Trace the workload and generate replay tests. Returns (trace_db, jfr_file, output_dir, test_count)."""
trace_db_path = tmp_path / "trace.db"
output_dir = tmp_path / "replay_tests"
trace_db, jfr_file, test_count = run_java_tracer(
java_command=["java", "-cp", str(compiled_workload), WORKLOAD_CLASS],
trace_db_path=trace_db_path,
packages=[WORKLOAD_PACKAGE],
project_root=FIXTURE_DIR,
output_dir=output_dir,
)
assert trace_db.exists(), "Trace DB not created"
assert test_count >= 1, f"Expected at least 1 replay test file, got {test_count}"
return trace_db, jfr_file, output_dir, test_count
class TestFunctionDiscoveryFromReplayTests:
"""Test that functions are correctly discovered from replay test metadata."""
def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> None:
_trace_db, _jfr_file, output_dir, _test_count = traced_workload
from codeflash.discovery.functions_to_optimize import _get_java_replay_test_functions
from codeflash.verification.verification_utils import TestConfig
replay_test_paths = list(output_dir.glob("*.java"))
assert len(replay_test_paths) >= 1
test_cfg = TestConfig(
tests_root=FIXTURE_DIR / "src" / "test" / "java",
tests_project_rootdir=FIXTURE_DIR,
project_root_path=FIXTURE_DIR,
pytest_cmd="pytest",
)
functions, trace_file_path = _get_java_replay_test_functions(replay_test_paths, test_cfg, FIXTURE_DIR)
# Should have found functions in the Workload source file
assert len(functions) > 0, "No functions discovered from replay tests"
assert trace_file_path.exists(), f"Trace file not found: {trace_file_path}"
# Collect all discovered function names
all_func_names = set()
for file_path, func_list in functions.items():
assert file_path.exists(), f"Source file not found: {file_path}"
assert "Workload" in file_path.name
for func in func_list:
all_func_names.add(func.function_name)
assert func.language == "java", f"Expected language='java', got '{func.language}'"
assert func.file_path == file_path
assert "repeatString" in all_func_names
def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
"""Test that test discovery maps replay tests to source functions."""
_trace_db, _jfr_file, output_dir, _test_count = traced_workload
from codeflash.languages.java.discovery import discover_functions_from_source
from codeflash.languages.java.test_discovery import discover_tests
source_code = WORKLOAD_SOURCE.read_text(encoding="utf-8")
source_functions = discover_functions_from_source(source_code, file_path=WORKLOAD_SOURCE)
result = discover_tests(output_dir, source_functions)
# Replay tests should be mapped to source functions
assert len(result) > 0, "No test mappings found from replay tests"
# Check specific functions are mapped
matched_func_names = set()
for qualified_name in result:
func_name = qualified_name.split(".")[-1] if "." in qualified_name else qualified_name
matched_func_names.add(func_name)
assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}"
# Each function should have at least one test
for func_name, test_infos in result.items():
assert len(test_infos) > 0, f"No tests for {func_name}"
for test_info in test_infos:
assert test_info.test_file.exists()
assert "ReplayTest" in test_info.test_file.name
class TestJfrProfiling:
"""Test JFR profiling and function ranking."""
def test_jfr_parsing(self, traced_workload: tuple) -> None:
_trace_db, jfr_file, _output_dir, _test_count = traced_workload
if not jfr_file.exists():
pytest.skip("JFR file not created (JFR may not be available)")
from codeflash.languages.java.jfr_parser import JfrProfile
profile = JfrProfile(jfr_file, [WORKLOAD_PACKAGE])
ranking = profile.get_method_ranking()
# The workload is very short, so JFR might not capture many samples
# Just verify the parser doesn't crash and returns a list
assert isinstance(ranking, list)
def test_java_function_ranker(self, traced_workload: tuple) -> None:
_trace_db, jfr_file, _output_dir, _test_count = traced_workload
if not jfr_file.exists():
pytest.skip("JFR file not created (JFR may not be available)")
from codeflash.benchmarking.function_ranker import JavaFunctionRanker
from codeflash.languages.java.discovery import discover_functions_from_source
from codeflash.languages.java.jfr_parser import JfrProfile
profile = JfrProfile(jfr_file, [WORKLOAD_PACKAGE])
ranker = JavaFunctionRanker(profile)
source_code = WORKLOAD_SOURCE.read_text(encoding="utf-8")
source_functions = discover_functions_from_source(source_code, file_path=WORKLOAD_SOURCE)
# Rank functions - should not crash even with minimal JFR data
ranked = ranker.rank_functions(source_functions)
assert isinstance(ranked, list)
class TestFullDiscoveryPipeline:
"""Test the complete discovery pipeline as the optimizer would run it."""
def test_full_pipeline(self, compiled_workload: Path, tmp_path: Path) -> None:
"""Simulate what optimizer.run() does: discover functions, discover tests, rank.
Uses the same directory layout as the real flow: replay tests go into
src/test/java/codeflash/replay/ so test discovery can find them.
"""
trace_db_path = tmp_path / "trace.db"
# Generate replay tests into the project's test source tree (like _run_java_tracer does)
test_root = FIXTURE_DIR / "src" / "test" / "java"
output_dir = test_root / "codeflash" / "replay"
output_dir.mkdir(parents=True, exist_ok=True)
try:
_trace_db, jfr_file, test_count = run_java_tracer(
java_command=["java", "-cp", str(compiled_workload), WORKLOAD_CLASS],
trace_db_path=trace_db_path,
packages=[WORKLOAD_PACKAGE],
project_root=FIXTURE_DIR,
output_dir=output_dir,
)
assert test_count >= 1
# Step 1: Discover functions from replay tests (like get_optimizable_functions)
from codeflash.discovery.functions_to_optimize import _get_java_replay_test_functions
from codeflash.verification.verification_utils import TestConfig
replay_test_paths = list(output_dir.glob("*.java"))
test_cfg = TestConfig(
tests_root=test_root,
tests_project_rootdir=FIXTURE_DIR,
project_root_path=FIXTURE_DIR,
pytest_cmd="pytest",
)
file_to_funcs, trace_file_path = _get_java_replay_test_functions(replay_test_paths, test_cfg, FIXTURE_DIR)
assert len(file_to_funcs) > 0
assert trace_file_path.exists()
# Step 2: Set language (like optimizer.run lines 496-502)
from codeflash.languages import set_current_language
from codeflash.languages.base import Language
set_current_language(Language.JAVA)
# Step 3: Discover tests (like optimizer.discover_tests)
from codeflash.discovery.discover_unit_tests import discover_tests_for_language
all_functions = [func for funcs in file_to_funcs.values() for func in funcs]
function_to_tests, num_unit_tests, num_replay_tests = discover_tests_for_language(
test_cfg, "java", file_to_funcs
)
assert num_unit_tests + num_replay_tests > 0, "No tests discovered"
assert num_replay_tests > 0, f"Expected replay tests, got {num_replay_tests}"
assert len(function_to_tests) > 0, "No function-to-test mappings"
# Verify function_to_tests has entries for our traced functions
has_repeat_string = any("repeatString" in key for key in function_to_tests)
assert has_repeat_string, f"repeatString not in function_to_tests keys: {list(function_to_tests.keys())}"
# Step 4: Rank functions (like optimizer.rank_all_functions_globally)
if jfr_file.exists():
from codeflash.benchmarking.function_ranker import JavaFunctionRanker
from codeflash.languages.java.jfr_parser import JfrProfile
packages = set()
for func in all_functions:
parts = func.qualified_name.split(".")
if len(parts) >= 2:
packages.add(".".join(parts[:-1]))
profile = JfrProfile(jfr_file, list(packages))
ranker = JavaFunctionRanker(profile)
ranked = ranker.rank_functions(all_functions)
assert isinstance(ranked, list)
finally:
# Clean up generated replay tests from fixture directory
for f in output_dir.glob("*.java"):
f.unlink()
if output_dir.exists() and not any(output_dir.iterdir()):
output_dir.rmdir()
codeflash_dir = output_dir.parent
if codeflash_dir.exists() and codeflash_dir.name == "codeflash" and not any(codeflash_dir.iterdir()):
codeflash_dir.rmdir()
def test_instrument_and_compile_replay_tests(self, compiled_workload: Path, tmp_path: Path) -> None:
"""Test that replay tests can be instrumented and compiled by Maven."""
trace_db_path = tmp_path / "trace.db"
test_root = FIXTURE_DIR / "src" / "test" / "java"
output_dir = test_root / "codeflash" / "replay"
output_dir.mkdir(parents=True, exist_ok=True)
cleanup_paths: list[Path] = []
try:
_trace_db, _jfr_file, test_count = run_java_tracer(
java_command=["java", "-cp", str(compiled_workload), WORKLOAD_CLASS],
trace_db_path=trace_db_path,
packages=[WORKLOAD_PACKAGE],
project_root=FIXTURE_DIR,
output_dir=output_dir,
)
assert test_count >= 1
replay_test_paths = list(output_dir.glob("*.java"))
cleanup_paths.extend(replay_test_paths)
# Instrument a replay test (like instrument_existing_tests does)
from codeflash.languages.java.discovery import discover_functions_from_source
from codeflash.languages.java.instrumentation import instrument_existing_test
source_code = WORKLOAD_SOURCE.read_text(encoding="utf-8")
source_functions = discover_functions_from_source(source_code, file_path=WORKLOAD_SOURCE)
# Pick the first function with a return type for instrumentation
target_func = next(f for f in source_functions if f.function_name == "repeatString")
replay_test_file = replay_test_paths[0]
test_source = replay_test_file.read_text(encoding="utf-8")
# Instrument for behavior mode
success, instrumented_source = instrument_existing_test(
test_string=test_source, function_to_optimize=target_func, mode="behavior", test_path=replay_test_file
)
assert success, "Failed to instrument replay test for behavior mode"
assert instrumented_source is not None
assert "__perfinstrumented" in instrumented_source
# Write the instrumented test
instrumented_path = replay_test_file.parent / f"{replay_test_file.stem}__perfinstrumented.java"
instrumented_path.write_text(instrumented_source, encoding="utf-8")
cleanup_paths.append(instrumented_path)
# Instrument for performance mode
success, perf_source = instrument_existing_test(
test_string=test_source,
function_to_optimize=target_func,
mode="performance",
test_path=replay_test_file,
)
assert success, "Failed to instrument replay test for performance mode"
assert perf_source is not None
perf_path = replay_test_file.parent / f"{replay_test_file.stem}__perfonlyinstrumented.java"
perf_path.write_text(perf_source, encoding="utf-8")
cleanup_paths.append(perf_path)
# Install codeflash-runtime as Maven dependency and compile
from codeflash.languages.java.build_tool_strategy import get_strategy
strategy = get_strategy(FIXTURE_DIR)
strategy.ensure_runtime(FIXTURE_DIR, None)
import os
compile_env = os.environ.copy()
compile_result = strategy.compile_tests(FIXTURE_DIR, compile_env, None, timeout=120)
assert compile_result.returncode == 0, (
f"Maven compilation failed (rc={compile_result.returncode}):\n"
f"stdout: {compile_result.stdout}\n"
f"stderr: {compile_result.stderr}"
)
finally:
for f in cleanup_paths:
f.unlink(missing_ok=True)
# Also clean up Maven build artifacts for the replay package
replay_classes = FIXTURE_DIR / "target" / "test-classes" / "codeflash"
if replay_classes.exists():
import shutil
shutil.rmtree(replay_classes, ignore_errors=True)
if output_dir.exists() and not any(output_dir.iterdir()):
output_dir.rmdir()
codeflash_dir = output_dir.parent
if codeflash_dir.exists() and codeflash_dir.name == "codeflash" and not any(codeflash_dir.iterdir()):
codeflash_dir.rmdir()