fix: resolve Vitest benchmarking showing wall-clock time instead of per-function timing

Root cause: Vitest performance tests reported "20.0 seconds over 1 loop"
(JUnit XML wall-clock fallback) instead of actual per-function nanosecond
timing. This was a chain of two issues:

1. **stdout interception**: Vitest's default `threads` pool intercepts
   process.stdout.write() and console.log(), preventing timing markers
   from flowing to the parent process. Fixed by adding `--pool=forks`
   to all Vitest commands and config files. The `forks` pool uses child
   processes where stdout flows directly to the parent.

2. **test name detection**: Even after markers flowed through (43,000+
   found in stdout), the parser couldn't match them to JUnit XML
   testcases because all markers had "unknown" as the test name. This
   happened because Vitest doesn't inject `beforeEach` as a global
   (unlike Jest), so capture.js's Jest-style hook to set
   `currentTestName` never fired.

   Fixed by adding Vitest-specific test name detection in capture.js:
   - Primary: `expect.getState().currentTestName` (full describe path)
   - Fallback: `__vitest_worker__.current.fullTestName`
   - Defense-in-depth: parser fallback matches "unknown" markers to
     the first testcase when no name match is found

Result: cheerio's `isHtml` went from "20.0s / 1 loop" to
"902μs / 20,853 loops" with proper speedup analysis.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sarthak Agarwal 2026-03-03 14:31:10 +05:30
parent a78b4cc299
commit ed2594bfd1
5 changed files with 207 additions and 36 deletions

View file

@ -172,22 +172,12 @@ def parse_jest_test_xml(
if global_stdout:
marker_count = len(jest_start_pattern.findall(global_stdout))
if marker_count > 0:
logger.debug(f"Found {marker_count} timing start markers in Jest stdout")
else:
logger.debug(f"No timing start markers found in Jest stdout (len={len(global_stdout)})")
# Check for END markers with duration (perf test markers)
logger.debug(f"Found {marker_count} timing start markers in subprocess stdout")
end_marker_count = len(jest_end_pattern.findall(global_stdout))
if end_marker_count > 0:
logger.debug(
f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout"
)
# Sample a few markers to verify loop indices
end_samples = list(jest_end_pattern.finditer(global_stdout))[:5]
for sample in end_samples:
groups = sample.groups()
logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}")
logger.debug(f"Found {end_marker_count} END timing markers with duration in subprocess stdout")
else:
logger.debug("[PERF-DEBUG] No END markers with duration found in Jest stdout")
logger.debug(f"No END markers found in subprocess stdout (len={len(global_stdout)})")
except (AttributeError, UnicodeDecodeError):
global_stdout = ""
@ -215,6 +205,10 @@ def parse_jest_test_xml(
# Key: (testName, testName2, funcName, loopIndex, lineId)
key = match.groups()[:5]
end_matches_dict[key] = match
logger.debug(
f"Suite {suite_count}: combined_stdout len={len(combined_stdout)}, "
f"start_matches={len(start_matches)}, end_matches={len(end_matches_dict)}"
)
# Debug: log suite-level END marker parsing for perf tests
if end_matches_dict:
@ -371,6 +365,25 @@ def parse_jest_test_xml(
# end_key is (module, testName, funcName, loopIndex, invocationId)
if len(end_key) >= 2 and sanitized_test_name in end_key[1]:
matching_ends_direct.append(end_match)
# Fallback: If no matches found but END markers exist with "unknown" test name
# (happens in Vitest where beforeEach hook doesn't fire to set currentTestName),
# match ALL "unknown" markers to this testcase. Use a consumed set to avoid
# assigning the same marker to multiple testcases.
if not matching_ends_direct and end_matches_dict:
unknown_markers = [(k, m) for k, m in end_matches_dict.items() if len(k) >= 2 and k[1] == "unknown"]
if unknown_markers:
# Assign all unconsumed unknown markers to this testcase
for _, end_match in unknown_markers:
matching_ends_direct.append(end_match)
# Remove consumed markers so they aren't double-assigned to other testcases
for end_key, _ in unknown_markers:
end_matches_dict.pop(end_key, None)
logger.debug(
f"[PERF-UNKNOWN-MATCH] Testcase '{test_name[:40]}': matched {len(matching_ends_direct)} "
f"'unknown' END markers (Vitest fallback)"
)
# Debug: log matching results for perf tests
if matching_ends_direct:
loop_indices = [int(m.groups()[3]) if m.groups()[3].isdigit() else 1 for m in matching_ends_direct]

View file

@ -225,6 +225,8 @@ export default mergeConfig(originalConfig, {{
test: {{
// Override include pattern to match all test files including generated ones
include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
// Use forks pool so timing markers from process.stdout.write flow to parent stdout
pool: 'forks',
}},
}});
"""
@ -239,6 +241,8 @@ export default defineConfig({
include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
// Exclude common non-test directories
exclude: ['**/node_modules/**', '**/dist/**'],
// Use forks pool so timing markers from process.stdout.write flow to parent stdout
pool: 'forks',
},
});
"""
@ -277,6 +281,7 @@ def _build_vitest_behavioral_command(
"--reporter=default",
"--reporter=junit",
"--no-file-parallelism", # Serial execution for deterministic timing
"--pool=forks", # Use child processes so timing markers flow to parent stdout
]
# For monorepos with restrictive vitest configs (e.g., include: test/**/*.test.ts),
@ -326,6 +331,7 @@ def _build_vitest_benchmarking_command(
"--reporter=default",
"--reporter=junit",
"--no-file-parallelism", # Serial execution for consistent benchmarking
"--pool=forks", # Use child processes so timing markers flow to parent stdout
]
# Use codeflash vitest config to override restrictive include patterns
@ -656,11 +662,6 @@ def run_vitest_benchmarking_tests(
)
else:
logger.debug(f"[VITEST-BENCH] No perf END markers found in stdout (len={len(result.stdout)})")
# Check if there are behavior END markers instead
behavior_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:\d+:[^#]+######!")
behavior_matches = list(behavior_end_pattern.finditer(result.stdout))
if behavior_matches:
logger.debug(f"[VITEST-BENCH] Found {len(behavior_matches)} behavior END markers instead (no duration)")
return result_file_path, result
@ -719,6 +720,7 @@ def run_vitest_line_profile_tests(
"--reporter=default",
"--reporter=junit",
"--no-file-parallelism", # Serial execution for consistent line profiling
"--pool=forks", # Use child processes so timing markers flow to parent stdout
]
# Use codeflash vitest config to override restrictive include patterns

View file

@ -1,6 +1,6 @@
{
"name": "codeflash",
"version": "0.10.0",
"version": "0.10.1",
"description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",
"main": "runtime/index.js",
"types": "runtime/index.d.ts",

View file

@ -266,10 +266,96 @@ if (RANDOM_SEED !== 0) {
}
}
// Current test context (set by Jest hooks)
// Current test context (set by Jest hooks or Vitest worker)
let currentTestName = null;
let currentTestPath = null; // Test file path from Jest
/**
* Get the current test name from Vitest's internal worker API.
* Vitest doesn't inject beforeEach as a global, so the Jest-style hook doesn't fire.
* Instead, we query Vitest's worker directly for the current test name.
*
* In Vitest's fork pool, `__vitest_worker__.current` is the current task object
* with properties: name, fullName, fullTestName, suite, type, file, etc.
* Also, `expect.getState().currentTestName` works from within a test.
*
* @returns {string|null} The current test name, or null if not in Vitest
*/
function getVitestTestName() {
// Prefer expect.getState().currentTestName — returns full path including describe blocks
// e.g., "Performance tests > should return true for basic HTML tags"
// This matches what Jest's beforeEach hook would set.
try {
if (typeof expect !== 'undefined' && expect.getState) {
const state = expect.getState();
if (state?.currentTestName) {
return state.currentTestName;
}
}
} catch (e) {
// expect not available
}
// Fallback: Vitest worker API — worker.current.fullTestName includes describe path
try {
const worker = globalThis.__vitest_worker__;
if (worker?.current?.fullTestName) {
return worker.current.fullTestName;
}
if (worker?.current?.fullName) {
return worker.current.fullName;
}
if (worker?.current?.name) {
return worker.current.name;
}
} catch (e) {
// Not in Vitest context
}
return null;
}
/**
* Get the current test file path from Vitest's internal worker API.
* @returns {string|null} The current test file path, or null if not in Vitest
*/
function getVitestTestPath() {
try {
const worker = globalThis.__vitest_worker__;
if (worker?.filepath) {
return worker.filepath;
}
} catch (e) {
// Not in Vitest context
}
// Fallback: try expect.getState() for testPath
try {
if (typeof expect !== 'undefined' && expect.getState) {
const state = expect.getState();
if (state?.testPath) {
return state.testPath;
}
}
} catch (e) {
// expect not available
}
return null;
}
/**
* Get the effective test name, trying Jest hooks first, then Vitest API, then fallback.
* @returns {string} The current test name
*/
function getEffectiveTestName() {
return currentTestName || getVitestTestName() || 'unknown';
}
/**
* Get the effective test path, trying Jest hooks first, then Vitest API, then fallback.
* @returns {string|null} The current test file path
*/
function getEffectiveTestPath() {
return currentTestPath || getVitestTestPath() || null;
}
// Invocation counter map: tracks how many times each testId has been seen
// Key: testId (testModule:testClass:testFunction:lineId:loopIndex)
// Value: count (starts at 0, increments each time same key is seen)
@ -549,13 +635,14 @@ function capture(funcName, lineId, fn, ...args) {
// Get test context (raw values for SQLite storage)
// Use TEST_MODULE env var if set, otherwise derive from test file path
const effectiveTestPath = getEffectiveTestPath();
let testModulePath;
if (TEST_MODULE) {
testModulePath = TEST_MODULE;
} else if (currentTestPath) {
} else if (effectiveTestPath) {
// Get relative path from cwd and convert to module-style path
const path = require('path');
const relativePath = path.relative(process.cwd(), currentTestPath);
const relativePath = path.relative(process.cwd(), effectiveTestPath);
// Convert to Python module-style path (e.g., "tests/test_foo.test.js" -> "tests.test_foo.test")
// This matches what Jest's junit XML produces
testModulePath = relativePath
@ -564,10 +651,10 @@ function capture(funcName, lineId, fn, ...args) {
.replace(/\.test$/, '.test') // Keep .test suffix
.replace(/\//g, '.'); // Convert path separators to dots
} else {
testModulePath = currentTestName || 'unknown';
testModulePath = getEffectiveTestName();
}
const testClassName = null; // Jest doesn't use classes like Python
const testFunctionName = currentTestName || 'unknown';
const testFunctionName = getEffectiveTestName();
// Sanitized versions for stdout tags (avoid regex conflicts)
const safeModulePath = sanitizeTestId(testModulePath);
@ -583,8 +670,8 @@ function capture(funcName, lineId, fn, ...args) {
// Format stdout tag (matches Python format, uses sanitized names)
const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${LOOP_INDEX}:${invocationId}`;
// Print start tag
console.log(`!$######${testStdoutTag}######$!`);
// Print start tag (use process.stdout.write to bypass test framework console interception)
process.stdout.write(`!$######${testStdoutTag}######$!\n`);
// Timing with nanosecond precision
const startTime = getTimeNs();
@ -602,14 +689,14 @@ function capture(funcName, lineId, fn, ...args) {
const durationNs = getDurationNs(startTime, endTime);
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, resolved, null, durationNs);
// Print end tag (no duration for behavior mode)
console.log(`!######${testStdoutTag}######!`);
process.stdout.write(`!######${testStdoutTag}######!\n`);
return resolved;
},
(err) => {
const endTime = getTimeNs();
const durationNs = getDurationNs(startTime, endTime);
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, null, err, durationNs);
console.log(`!######${testStdoutTag}######!`);
process.stdout.write(`!######${testStdoutTag}######!\n`);
throw err;
}
);
@ -623,7 +710,7 @@ function capture(funcName, lineId, fn, ...args) {
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, returnValue, error, durationNs);
// Print end tag (no duration for behavior mode, matching Python)
console.log(`!######${testStdoutTag}######!`);
process.stdout.write(`!######${testStdoutTag}######!\n`);
if (error) throw error;
return returnValue;
@ -656,22 +743,24 @@ function capturePerf(funcName, lineId, fn, ...args) {
const shouldLoop = getPerfLoopCount() > 1 && !checkSharedTimeLimit();
// Get test context (computed once, reused across batch)
// Uses Vitest worker API as fallback when Jest-style beforeEach hook doesn't fire
const effectiveTestPath = getEffectiveTestPath();
let testModulePath;
if (TEST_MODULE) {
testModulePath = TEST_MODULE;
} else if (currentTestPath) {
} else if (effectiveTestPath) {
const path = require('path');
const relativePath = path.relative(process.cwd(), currentTestPath);
const relativePath = path.relative(process.cwd(), effectiveTestPath);
testModulePath = relativePath
.replace(/\\/g, '/')
.replace(/\.js$/, '')
.replace(/\.test$/, '.test')
.replace(/\//g, '.');
} else {
testModulePath = currentTestName || 'unknown';
testModulePath = getEffectiveTestName();
}
const testClassName = null;
const testFunctionName = currentTestName || 'unknown';
const testFunctionName = getEffectiveTestName();
const safeModulePath = sanitizeTestId(testModulePath);
const safeTestFunctionName = sanitizeTestId(testFunctionName);
@ -767,8 +856,8 @@ function capturePerf(funcName, lineId, fn, ...args) {
lastError = e;
}
// Print end tag with timing
console.log(`!######${testStdoutTag}:${durationNs}######!`);
// Print end tag with timing (use process.stdout.write to bypass test framework console interception)
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
// Update shared loop counter
sharedPerfState.totalLoopsCompleted++;
@ -808,7 +897,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
* @private
*/
function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
console.log(`!######${testStdoutTag}:${durationNs}######!`);
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
sharedPerfState.totalLoopsCompleted++;
if (durationNs > 0) {
runtimes.push(durationNs / 1000);

View file

@ -636,3 +636,70 @@ class TestRunMochaLineProfileTests:
env = call_kwargs.kwargs.get("env") or call_kwargs[1].get("env", {})
assert env.get("CODEFLASH_MODE") == "line_profile"
assert env.get("CODEFLASH_LINE_PROFILE_OUTPUT") == str(profile_output)
class TestParserUnknownTestNameFallback:
"""Tests for the parser's fallback when perf markers have 'unknown' test name."""
def test_unknown_markers_matched_to_first_testcase(self):
"""When capturePerf markers have 'unknown' test name (Vitest beforeEach not firing),
the parser should still match them to testcases via the fallback logic."""
from codeflash.languages.javascript.parse import parse_jest_test_xml
from codeflash.models.models import TestFile, TestFiles
from codeflash.models.test_type import TestType
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
# Create a JUnit XML with one test suite and one testcase
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<testsuites>
<testsuite name="src/test_func__perf_test_0.test.ts" tests="1" failures="0" time="10.5">
<testcase name="should compute correctly" classname="src/test_func__perf_test_0.test.ts" time="10.5">
</testcase>
</testsuite>
</testsuites>"""
xml_path = tmpdir_path / "results.xml"
xml_path.write_text(xml_content, encoding="utf-8")
# Create test files
test_file = tmpdir_path / "test_func__perf_test_0.test.ts"
test_file.write_text("// perf test", encoding="utf-8")
test_files = TestFiles(
test_files=[
TestFile(
instrumented_behavior_file_path=test_file,
benchmarking_file_path=test_file,
test_type=TestType.GENERATED_REGRESSION,
)
]
)
# Create a mock subprocess result with perf markers using "unknown" test name
# This simulates what happens when Vitest's beforeEach doesn't fire
markers = []
for i in range(1, 6):
markers.append(f"!######test_mod:unknown:computeFunc:{i}:1_0:{1000 + i * 100}######!")
stdout = "\n".join(markers)
mock_result = MagicMock()
mock_result.stdout = stdout
test_config = MagicMock()
test_config.tests_project_rootdir = tmpdir_path
test_config.test_framework = "vitest"
results = parse_jest_test_xml(
test_xml_file_path=xml_path,
test_files=test_files,
test_config=test_config,
run_result=mock_result,
)
# The "unknown" fallback should assign all 5 markers to the testcase
assert len(results.test_results) == 5
# Verify runtimes were extracted (not the 10.5s XML fallback)
runtimes = [r.runtime for r in results.test_results if r.runtime is not None]
assert len(runtimes) == 5
assert all(r < 100_000 for r in runtimes) # All under 100 microseconds (nanoseconds)