fix: resolve Vitest benchmarking showing wall-clock time instead of per-function timing

Root cause: Vitest performance tests reported "20.0 seconds over 1 loop" (JUnit XML wall-clock fallback) instead of actual per-function nanosecond timing. This was a chain of two issues: 1. **stdout interception**: Vitest's default `threads` pool intercepts process.stdout.write() and console.log(), preventing timing markers from flowing to the parent process. Fixed by adding `--pool=forks` to all Vitest commands and config files. The `forks` pool uses child processes where stdout flows directly to the parent. 2. **test name detection**: Even after markers flowed through (43,000+ found in stdout), the parser couldn't match them to JUnit XML testcases because all markers had "unknown" as the test name. This happened because Vitest doesn't inject `beforeEach` as a global (unlike Jest), so capture.js's Jest-style hook to set `currentTestName` never fired. Fixed by adding Vitest-specific test name detection in capture.js: - Primary: `expect.getState().currentTestName` (full describe path) - Fallback: `__vitest_worker__.current.fullTestName` - Defense-in-depth: parser fallback matches "unknown" markers to the first testcase when no name match is found Result: cheerio's `isHtml` went from "20.0s / 1 loop" to "902μs / 20,853 loops" with proper speedup analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-04 18:25:17 +00:00 · 2026-03-03 14:31:10 +05:30 · 2026-03-03 14:31:10 +05:30 · ed2594bfd1
commit ed2594bfd1
parent a78b4cc299
5 changed files with 207 additions and 36 deletions
--- a/codeflash/languages/javascript/parse.py
+++ b/codeflash/languages/javascript/parse.py
@ -172,22 +172,12 @@ def parse_jest_test_xml(
            if global_stdout:
                marker_count = len(jest_start_pattern.findall(global_stdout))
                if marker_count > 0:
-                    logger.debug(f"Found {marker_count} timing start markers in Jest stdout")
-                else:
-                    logger.debug(f"No timing start markers found in Jest stdout (len={len(global_stdout)})")
-                # Check for END markers with duration (perf test markers)
+                    logger.debug(f"Found {marker_count} timing start markers in subprocess stdout")
                end_marker_count = len(jest_end_pattern.findall(global_stdout))
                if end_marker_count > 0:
-                    logger.debug(
-                        f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout"
-                    )
-                    # Sample a few markers to verify loop indices
-                    end_samples = list(jest_end_pattern.finditer(global_stdout))[:5]
-                    for sample in end_samples:
-                        groups = sample.groups()
-                        logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}")
+                    logger.debug(f"Found {end_marker_count} END timing markers with duration in subprocess stdout")
                else:
-                    logger.debug("[PERF-DEBUG] No END markers with duration found in Jest stdout")
+                    logger.debug(f"No END markers found in subprocess stdout (len={len(global_stdout)})")
        except (AttributeError, UnicodeDecodeError):
            global_stdout = ""

@ -215,6 +205,10 @@ def parse_jest_test_xml(
            # Key: (testName, testName2, funcName, loopIndex, lineId)
            key = match.groups()[:5]
            end_matches_dict[key] = match
+        logger.debug(
+            f"Suite {suite_count}: combined_stdout len={len(combined_stdout)}, "
+            f"start_matches={len(start_matches)}, end_matches={len(end_matches_dict)}"
+        )

        # Debug: log suite-level END marker parsing for perf tests
        if end_matches_dict:
@ -371,6 +365,25 @@ def parse_jest_test_xml(
                    # end_key is (module, testName, funcName, loopIndex, invocationId)
                    if len(end_key) >= 2 and sanitized_test_name in end_key[1]:
                        matching_ends_direct.append(end_match)
+
+                # Fallback: If no matches found but END markers exist with "unknown" test name
+                # (happens in Vitest where beforeEach hook doesn't fire to set currentTestName),
+                # match ALL "unknown" markers to this testcase. Use a consumed set to avoid
+                # assigning the same marker to multiple testcases.
+                if not matching_ends_direct and end_matches_dict:
+                    unknown_markers = [(k, m) for k, m in end_matches_dict.items() if len(k) >= 2 and k[1] == "unknown"]
+                    if unknown_markers:
+                        # Assign all unconsumed unknown markers to this testcase
+                        for _, end_match in unknown_markers:
+                            matching_ends_direct.append(end_match)
+                        # Remove consumed markers so they aren't double-assigned to other testcases
+                        for end_key, _ in unknown_markers:
+                            end_matches_dict.pop(end_key, None)
+                        logger.debug(
+                            f"[PERF-UNKNOWN-MATCH] Testcase '{test_name[:40]}': matched {len(matching_ends_direct)} "
+                            f"'unknown' END markers (Vitest fallback)"
+                        )
+
                # Debug: log matching results for perf tests
                if matching_ends_direct:
                    loop_indices = [int(m.groups()[3]) if m.groups()[3].isdigit() else 1 for m in matching_ends_direct]
--- a/codeflash/languages/javascript/vitest_runner.py
+++ b/codeflash/languages/javascript/vitest_runner.py
@ -225,6 +225,8 @@ export default mergeConfig(originalConfig, {{
  test: {{
    // Override include pattern to match all test files including generated ones
    include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
+    // Use forks pool so timing markers from process.stdout.write flow to parent stdout
+    pool: 'forks',
  }},
 }});
 """
@ -239,6 +241,8 @@ export default defineConfig({
    include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
    // Exclude common non-test directories
    exclude: ['**/node_modules/**', '**/dist/**'],
+    // Use forks pool so timing markers from process.stdout.write flow to parent stdout
+    pool: 'forks',
  },
 });
 """
@ -277,6 +281,7 @@ def _build_vitest_behavioral_command(
        "--reporter=default",
        "--reporter=junit",
        "--no-file-parallelism",  # Serial execution for deterministic timing
+        "--pool=forks",  # Use child processes so timing markers flow to parent stdout
    ]

    # For monorepos with restrictive vitest configs (e.g., include: test/**/*.test.ts),
@ -326,6 +331,7 @@ def _build_vitest_benchmarking_command(
        "--reporter=default",
        "--reporter=junit",
        "--no-file-parallelism",  # Serial execution for consistent benchmarking
+        "--pool=forks",  # Use child processes so timing markers flow to parent stdout
    ]

    # Use codeflash vitest config to override restrictive include patterns
@ -656,11 +662,6 @@ def run_vitest_benchmarking_tests(
            )
        else:
            logger.debug(f"[VITEST-BENCH] No perf END markers found in stdout (len={len(result.stdout)})")
-            # Check if there are behavior END markers instead
-            behavior_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:\d+:[^#]+######!")
-            behavior_matches = list(behavior_end_pattern.finditer(result.stdout))
-            if behavior_matches:
-                logger.debug(f"[VITEST-BENCH] Found {len(behavior_matches)} behavior END markers instead (no duration)")

    return result_file_path, result

@ -719,6 +720,7 @@ def run_vitest_line_profile_tests(
        "--reporter=default",
        "--reporter=junit",
        "--no-file-parallelism",  # Serial execution for consistent line profiling
+        "--pool=forks",  # Use child processes so timing markers flow to parent stdout
    ]

    # Use codeflash vitest config to override restrictive include patterns
--- a/packages/codeflash/package.json
+++ b/packages/codeflash/package.json
@ -1,6 +1,6 @@
 {
  "name": "codeflash",
-  "version": "0.10.0",
+  "version": "0.10.1",
  "description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",
  "main": "runtime/index.js",
  "types": "runtime/index.d.ts",
--- a/packages/codeflash/runtime/capture.js
+++ b/packages/codeflash/runtime/capture.js
@ -266,10 +266,96 @@ if (RANDOM_SEED !== 0) {
    }
 }

-// Current test context (set by Jest hooks)
+// Current test context (set by Jest hooks or Vitest worker)
 let currentTestName = null;
 let currentTestPath = null;  // Test file path from Jest

+/**
+ * Get the current test name from Vitest's internal worker API.
+ * Vitest doesn't inject beforeEach as a global, so the Jest-style hook doesn't fire.
+ * Instead, we query Vitest's worker directly for the current test name.
+ *
+ * In Vitest's fork pool, `__vitest_worker__.current` is the current task object
+ * with properties: name, fullName, fullTestName, suite, type, file, etc.
+ * Also, `expect.getState().currentTestName` works from within a test.
+ *
+ * @returns {string|null} The current test name, or null if not in Vitest
+ */
+function getVitestTestName() {
+    // Prefer expect.getState().currentTestName — returns full path including describe blocks
+    // e.g., "Performance tests > should return true for basic HTML tags"
+    // This matches what Jest's beforeEach hook would set.
+    try {
+        if (typeof expect !== 'undefined' && expect.getState) {
+            const state = expect.getState();
+            if (state?.currentTestName) {
+                return state.currentTestName;
+            }
+        }
+    } catch (e) {
+        // expect not available
+    }
+    // Fallback: Vitest worker API — worker.current.fullTestName includes describe path
+    try {
+        const worker = globalThis.__vitest_worker__;
+        if (worker?.current?.fullTestName) {
+            return worker.current.fullTestName;
+        }
+        if (worker?.current?.fullName) {
+            return worker.current.fullName;
+        }
+        if (worker?.current?.name) {
+            return worker.current.name;
+        }
+    } catch (e) {
+        // Not in Vitest context
+    }
+    return null;
+}
+
+/**
+ * Get the current test file path from Vitest's internal worker API.
+ * @returns {string|null} The current test file path, or null if not in Vitest
+ */
+function getVitestTestPath() {
+    try {
+        const worker = globalThis.__vitest_worker__;
+        if (worker?.filepath) {
+            return worker.filepath;
+        }
+    } catch (e) {
+        // Not in Vitest context
+    }
+    // Fallback: try expect.getState() for testPath
+    try {
+        if (typeof expect !== 'undefined' && expect.getState) {
+            const state = expect.getState();
+            if (state?.testPath) {
+                return state.testPath;
+            }
+        }
+    } catch (e) {
+        // expect not available
+    }
+    return null;
+}
+
+/**
+ * Get the effective test name, trying Jest hooks first, then Vitest API, then fallback.
+ * @returns {string} The current test name
+ */
+function getEffectiveTestName() {
+    return currentTestName || getVitestTestName() || 'unknown';
+}
+
+/**
+ * Get the effective test path, trying Jest hooks first, then Vitest API, then fallback.
+ * @returns {string|null} The current test file path
+ */
+function getEffectiveTestPath() {
+    return currentTestPath || getVitestTestPath() || null;
+}
+
 // Invocation counter map: tracks how many times each testId has been seen
 // Key: testId (testModule:testClass:testFunction:lineId:loopIndex)
 // Value: count (starts at 0, increments each time same key is seen)
@ -549,13 +635,14 @@ function capture(funcName, lineId, fn, ...args) {

    // Get test context (raw values for SQLite storage)
    // Use TEST_MODULE env var if set, otherwise derive from test file path
+    const effectiveTestPath = getEffectiveTestPath();
    let testModulePath;
    if (TEST_MODULE) {
        testModulePath = TEST_MODULE;
-    } else if (currentTestPath) {
+    } else if (effectiveTestPath) {
        // Get relative path from cwd and convert to module-style path
        const path = require('path');
-        const relativePath = path.relative(process.cwd(), currentTestPath);
+        const relativePath = path.relative(process.cwd(), effectiveTestPath);
        // Convert to Python module-style path (e.g., "tests/test_foo.test.js" -> "tests.test_foo.test")
        // This matches what Jest's junit XML produces
        testModulePath = relativePath
@ -564,10 +651,10 @@ function capture(funcName, lineId, fn, ...args) {
            .replace(/\.test$/, '.test') // Keep .test suffix
            .replace(/\//g, '.');       // Convert path separators to dots
    } else {
-        testModulePath = currentTestName || 'unknown';
+        testModulePath = getEffectiveTestName();
    }
    const testClassName = null;  // Jest doesn't use classes like Python
-    const testFunctionName = currentTestName || 'unknown';
+    const testFunctionName = getEffectiveTestName();

    // Sanitized versions for stdout tags (avoid regex conflicts)
    const safeModulePath = sanitizeTestId(testModulePath);
@ -583,8 +670,8 @@ function capture(funcName, lineId, fn, ...args) {
    // Format stdout tag (matches Python format, uses sanitized names)
    const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${LOOP_INDEX}:${invocationId}`;

-    // Print start tag
-    console.log(`!$######${testStdoutTag}######$!`);
+    // Print start tag (use process.stdout.write to bypass test framework console interception)
+    process.stdout.write(`!$######${testStdoutTag}######$!\n`);

    // Timing with nanosecond precision
    const startTime = getTimeNs();
@ -602,14 +689,14 @@ function capture(funcName, lineId, fn, ...args) {
                    const durationNs = getDurationNs(startTime, endTime);
                    recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, resolved, null, durationNs);
                    // Print end tag (no duration for behavior mode)
-                    console.log(`!######${testStdoutTag}######!`);
+                    process.stdout.write(`!######${testStdoutTag}######!\n`);
                    return resolved;
                },
                (err) => {
                    const endTime = getTimeNs();
                    const durationNs = getDurationNs(startTime, endTime);
                    recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, null, err, durationNs);
-                    console.log(`!######${testStdoutTag}######!`);
+                    process.stdout.write(`!######${testStdoutTag}######!\n`);
                    throw err;
                }
            );
@ -623,7 +710,7 @@ function capture(funcName, lineId, fn, ...args) {
    recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, returnValue, error, durationNs);

    // Print end tag (no duration for behavior mode, matching Python)
-    console.log(`!######${testStdoutTag}######!`);
+    process.stdout.write(`!######${testStdoutTag}######!\n`);

    if (error) throw error;
    return returnValue;
@ -656,22 +743,24 @@ function capturePerf(funcName, lineId, fn, ...args) {
    const shouldLoop = getPerfLoopCount() > 1 && !checkSharedTimeLimit();

    // Get test context (computed once, reused across batch)
+    // Uses Vitest worker API as fallback when Jest-style beforeEach hook doesn't fire
+    const effectiveTestPath = getEffectiveTestPath();
    let testModulePath;
    if (TEST_MODULE) {
        testModulePath = TEST_MODULE;
-    } else if (currentTestPath) {
+    } else if (effectiveTestPath) {
        const path = require('path');
-        const relativePath = path.relative(process.cwd(), currentTestPath);
+        const relativePath = path.relative(process.cwd(), effectiveTestPath);
        testModulePath = relativePath
            .replace(/\\/g, '/')
            .replace(/\.js$/, '')
            .replace(/\.test$/, '.test')
            .replace(/\//g, '.');
    } else {
-        testModulePath = currentTestName || 'unknown';
+        testModulePath = getEffectiveTestName();
    }
    const testClassName = null;
-    const testFunctionName = currentTestName || 'unknown';
+    const testFunctionName = getEffectiveTestName();

    const safeModulePath = sanitizeTestId(testModulePath);
    const safeTestFunctionName = sanitizeTestId(testFunctionName);
@ -767,8 +856,8 @@ function capturePerf(funcName, lineId, fn, ...args) {
            lastError = e;
        }

-        // Print end tag with timing
-        console.log(`!######${testStdoutTag}:${durationNs}######!`);
+        // Print end tag with timing (use process.stdout.write to bypass test framework console interception)
+        process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);

        // Update shared loop counter
        sharedPerfState.totalLoopsCompleted++;
@ -808,7 +897,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
 * @private
 */
 function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
-    console.log(`!######${testStdoutTag}:${durationNs}######!`);
+    process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
    sharedPerfState.totalLoopsCompleted++;
    if (durationNs > 0) {
        runtimes.push(durationNs / 1000);
--- a/tests/test_languages/test_mocha_runner.py
+++ b/tests/test_languages/test_mocha_runner.py
@ -636,3 +636,70 @@ class TestRunMochaLineProfileTests:
            env = call_kwargs.kwargs.get("env") or call_kwargs[1].get("env", {})
            assert env.get("CODEFLASH_MODE") == "line_profile"
            assert env.get("CODEFLASH_LINE_PROFILE_OUTPUT") == str(profile_output)
+
+
+class TestParserUnknownTestNameFallback:
+    """Tests for the parser's fallback when perf markers have 'unknown' test name."""
+
+    def test_unknown_markers_matched_to_first_testcase(self):
+        """When capturePerf markers have 'unknown' test name (Vitest beforeEach not firing),
+        the parser should still match them to testcases via the fallback logic."""
+        from codeflash.languages.javascript.parse import parse_jest_test_xml
+        from codeflash.models.models import TestFile, TestFiles
+        from codeflash.models.test_type import TestType
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            tmpdir_path = Path(tmpdir)
+
+            # Create a JUnit XML with one test suite and one testcase
+            xml_content = """<?xml version="1.0" encoding="UTF-8"?>
+<testsuites>
+  <testsuite name="src/test_func__perf_test_0.test.ts" tests="1" failures="0" time="10.5">
+    <testcase name="should compute correctly" classname="src/test_func__perf_test_0.test.ts" time="10.5">
+    </testcase>
+  </testsuite>
+</testsuites>"""
+            xml_path = tmpdir_path / "results.xml"
+            xml_path.write_text(xml_content, encoding="utf-8")
+
+            # Create test files
+            test_file = tmpdir_path / "test_func__perf_test_0.test.ts"
+            test_file.write_text("// perf test", encoding="utf-8")
+
+            test_files = TestFiles(
+                test_files=[
+                    TestFile(
+                        instrumented_behavior_file_path=test_file,
+                        benchmarking_file_path=test_file,
+                        test_type=TestType.GENERATED_REGRESSION,
+                    )
+                ]
+            )
+
+            # Create a mock subprocess result with perf markers using "unknown" test name
+            # This simulates what happens when Vitest's beforeEach doesn't fire
+            markers = []
+            for i in range(1, 6):
+                markers.append(f"!######test_mod:unknown:computeFunc:{i}:1_0:{1000 + i * 100}######!")
+            stdout = "\n".join(markers)
+
+            mock_result = MagicMock()
+            mock_result.stdout = stdout
+
+            test_config = MagicMock()
+            test_config.tests_project_rootdir = tmpdir_path
+            test_config.test_framework = "vitest"
+
+            results = parse_jest_test_xml(
+                test_xml_file_path=xml_path,
+                test_files=test_files,
+                test_config=test_config,
+                run_result=mock_result,
+            )
+
+            # The "unknown" fallback should assign all 5 markers to the testcase
+            assert len(results.test_results) == 5
+            # Verify runtimes were extracted (not the 10.5s XML fallback)
+            runtimes = [r.runtime for r in results.test_results if r.runtime is not None]
+            assert len(runtimes) == 5
+            assert all(r < 100_000 for r in runtimes)  # All under 100 microseconds (nanoseconds)