codeflash/tests/scripts/end_to_end_test_java_tracer.py
HeshamHM28 20f76a30dd fix(test): move --no-pr before optimize subcommand in e2e tracer test
--no-pr is a top-level codeflash flag, not an optimize subcommand flag.
Placing it after optimize caused it to be passed to the JVM as an
unrecognized option.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 06:56:27 +00:00

155 lines
5.2 KiB
Python

import logging
import os
import pathlib
import re
import shutil
import subprocess
import time
def run_test(expected_improvement_pct: int) -> bool:
logging.basicConfig(level=logging.INFO)
fixture_dir = (pathlib.Path(__file__).parent.parent / "test_languages" / "fixtures" / "java_tracer_e2e").resolve()
# Ensure test directory exists (git doesn't track empty dirs)
test_java_dir = fixture_dir / "src" / "test" / "java"
test_java_dir.mkdir(parents=True, exist_ok=True)
# Clean up leftover replay tests from previous runs
replay_dir = test_java_dir / "codeflash" / "replay"
if replay_dir.exists():
shutil.rmtree(replay_dir, ignore_errors=True)
for f in test_java_dir.rglob("*__perfinstrumented*.java"):
f.unlink(missing_ok=True)
for f in test_java_dir.rglob("*__perfonlyinstrumented*.java"):
f.unlink(missing_ok=True)
# Compile the workload
classes_dir = fixture_dir / "target" / "classes"
classes_dir.mkdir(parents=True, exist_ok=True)
compile_result = subprocess.run(
[
"javac",
"--release",
"11",
"-d",
str(classes_dir),
str(fixture_dir / "src" / "main" / "java" / "com" / "example" / "Workload.java"),
],
capture_output=True,
text=True,
)
if compile_result.returncode != 0:
logging.error(f"javac failed: {compile_result.stderr}")
return False
# Run the Java tracer + optimizer
command = [
"uv",
"run",
"--no-project",
"-m",
"codeflash.main",
"--no-pr",
"optimize",
"java",
"-cp",
str(classes_dir),
"com.example.Workload",
]
env = os.environ.copy()
env["PYTHONIOENCODING"] = "utf-8"
env["PYTHONUNBUFFERED"] = "1"
logging.info(f"Running command: {' '.join(command)}")
logging.info(f"Working directory: {fixture_dir}")
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
cwd=str(fixture_dir),
env=env,
encoding="utf-8",
)
output = []
for line in process.stdout:
print(line, end="", flush=True)
output.append(line)
return_code = process.wait()
stdout = "".join(output)
if return_code != 0:
logging.error(f"Command returned exit code {return_code}")
return False
# Validate: replay tests were generated
if "replay test files generated" not in stdout:
logging.error("Failed to find replay test generation message")
return False
# Validate: replay tests were discovered (global count)
replay_match = re.search(r"Discovered \d+ existing unit tests? and (\d+) replay tests?", stdout)
if not replay_match:
logging.error("Failed to find replay test discovery message")
return False
num_replay = int(replay_match.group(1))
if num_replay == 0:
logging.error("No replay tests discovered")
return False
logging.info(f"Replay tests discovered: {num_replay}")
# Validate: replay test files were used per-function
replay_file_match = re.search(r"Discovered \d+ existing unit test files?, (\d+) replay test files?", stdout)
if not replay_file_match:
logging.error("Failed to find per-function replay test file discovery message")
return False
num_replay_files = int(replay_file_match.group(1))
if num_replay_files == 0:
logging.error("No replay test files discovered per-function")
return False
logging.info(f"Replay test files per-function: {num_replay_files}")
# Validate: at least one optimization was found
if "⚡️ Optimization successful! 📄 " not in stdout:
logging.error("Failed to find optimization success message")
return False
improvement_match = re.search(r"📈 ([\d,]+)% (?:(\w+) )?improvement", stdout)
if not improvement_match:
logging.error("Could not find improvement percentage in output")
return False
improvement_pct = int(improvement_match.group(1).replace(",", ""))
logging.info(f"Performance improvement: {improvement_pct}%")
if improvement_pct <= expected_improvement_pct:
logging.error(f"Performance improvement {improvement_pct}% not above {expected_improvement_pct}%")
return False
logging.info(f"Success: Java tracer e2e passed with {improvement_pct}% improvement")
return True
def run_with_retries(test_func, *args) -> int:
max_retries = int(os.getenv("MAX_RETRIES", 3))
retry_delay = int(os.getenv("RETRY_DELAY", 5))
for attempt in range(1, max_retries + 1):
logging.info(f"\n=== Attempt {attempt} of {max_retries} ===")
if test_func(*args):
logging.info(f"Test passed on attempt {attempt}")
return 0
logging.error(f"Test failed on attempt {attempt}")
if attempt < max_retries:
logging.info(f"Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
else:
logging.error("Test failed after all retries")
return 1
return 1
if __name__ == "__main__":
exit(run_with_retries(run_test, int(os.getenv("EXPECTED_IMPROVEMENT_PCT", 10))))