From 4c9328591425e4d5e8243a6f13336a1b4c8c1668 Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 5 Feb 2026 16:23:47 +0000 Subject: [PATCH 1/2] fix: show actual test file paths in failure log instead of original_file_path For AI-generated tests, original_file_path is intentionally None. When tests fail to run, the log now shows instrumented_behavior_file_path (the actual path being executed) instead of original_file_path. This makes debugging test execution failures much clearer. Co-Authored-By: Claude Sonnet 4.5 --- codeflash/verification/parse_test_output.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index ad4937411..c66fb129f 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -1243,8 +1243,14 @@ def parse_test_xml( ) if not test_results: + # Show actual test file paths being used (behavior or original), not just original_file_path + # For AI-generated tests, original_file_path is None, so show instrumented_behavior_file_path instead + test_paths_display = [ + str(test_file.instrumented_behavior_file_path or test_file.original_file_path) + for test_file in test_files.test_files + ] logger.info( - f"Tests '{[test_file.original_file_path for test_file in test_files.test_files]}' failed to run, skipping" + f"Tests {test_paths_display} failed to run, skipping" ) if run_result is not None: stdout, stderr = "", "" From e0b805d7f604121a4e04b39c5be3a41a278ae0bf Mon Sep 17 00:00:00 2001 From: Mohamed Ashraf Date: Thu, 5 Feb 2026 16:25:37 +0000 Subject: [PATCH 2/2] fix: detect and log Java compilation failures explicitly When Maven fails during test execution, it's not immediately clear if the failure is due to compilation errors (invalid Java code) or test failures (runtime issues). This change adds explicit detection of compilation errors by checking Maven's output for compilation error indicators (e.g., "COMPILATION ERROR", "cannot find symbol", "package does not exist"). When compilation errors are detected: - Logs ERROR-level message indicating compilation failure - Suggests checking that generated test code is syntactically valid - Includes first 50 lines of Maven output for diagnosis This makes it immediately obvious when AI-generated tests contain syntax errors (like using Java reserved keywords as class names), rather than appearing as silent test execution failures. Co-Authored-By: Claude Sonnet 4.5 --- codeflash/languages/java/test_runner.py | 31 ++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/codeflash/languages/java/test_runner.py b/codeflash/languages/java/test_runner.py index b5e0618a8..e68a649a9 100644 --- a/codeflash/languages/java/test_runner.py +++ b/codeflash/languages/java/test_runner.py @@ -1044,10 +1044,39 @@ def _run_maven_tests( logger.debug("Running Maven command: %s in %s", " ".join(cmd), project_root) try: - return subprocess.run( + result = subprocess.run( cmd, check=False, cwd=project_root, env=env, capture_output=True, text=True, timeout=timeout ) + # Check if Maven failed due to compilation errors (not just test failures) + if result.returncode != 0: + # Maven compilation errors contain specific markers in output + compilation_error_indicators = [ + "[ERROR] COMPILATION ERROR", + "[ERROR] Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin", + "compilation failure", + "cannot find symbol", + "package .* does not exist", + ] + + combined_output = (result.stdout or "") + (result.stderr or "") + has_compilation_error = any( + indicator.lower() in combined_output.lower() for indicator in compilation_error_indicators + ) + + if has_compilation_error: + logger.error( + f"Maven compilation failed for {mode} tests. " + f"Check that generated test code is syntactically valid Java. " + f"Return code: {result.returncode}" + ) + # Log first 50 lines of output to help diagnose compilation errors + output_lines = combined_output.split("\n") + error_context = "\n".join(output_lines[:50]) if len(output_lines) > 50 else combined_output + logger.error(f"Maven compilation error output:\n{error_context}") + + return result + except subprocess.TimeoutExpired: logger.exception("Maven test execution timed out after %d seconds", timeout) return subprocess.CompletedProcess(