mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
fix: resolve Vitest benchmarking showing wall-clock time instead of per-function timing
Root cause: Vitest performance tests reported "20.0 seconds over 1 loop"
(JUnit XML wall-clock fallback) instead of actual per-function nanosecond
timing. This was a chain of two issues:
1. **stdout interception**: Vitest's default `threads` pool intercepts
process.stdout.write() and console.log(), preventing timing markers
from flowing to the parent process. Fixed by adding `--pool=forks`
to all Vitest commands and config files. The `forks` pool uses child
processes where stdout flows directly to the parent.
2. **test name detection**: Even after markers flowed through (43,000+
found in stdout), the parser couldn't match them to JUnit XML
testcases because all markers had "unknown" as the test name. This
happened because Vitest doesn't inject `beforeEach` as a global
(unlike Jest), so capture.js's Jest-style hook to set
`currentTestName` never fired.
Fixed by adding Vitest-specific test name detection in capture.js:
- Primary: `expect.getState().currentTestName` (full describe path)
- Fallback: `__vitest_worker__.current.fullTestName`
- Defense-in-depth: parser fallback matches "unknown" markers to
the first testcase when no name match is found
Result: cheerio's `isHtml` went from "20.0s / 1 loop" to
"902μs / 20,853 loops" with proper speedup analysis.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a78b4cc299
commit
ed2594bfd1
5 changed files with 207 additions and 36 deletions
|
|
@ -172,22 +172,12 @@ def parse_jest_test_xml(
|
|||
if global_stdout:
|
||||
marker_count = len(jest_start_pattern.findall(global_stdout))
|
||||
if marker_count > 0:
|
||||
logger.debug(f"Found {marker_count} timing start markers in Jest stdout")
|
||||
else:
|
||||
logger.debug(f"No timing start markers found in Jest stdout (len={len(global_stdout)})")
|
||||
# Check for END markers with duration (perf test markers)
|
||||
logger.debug(f"Found {marker_count} timing start markers in subprocess stdout")
|
||||
end_marker_count = len(jest_end_pattern.findall(global_stdout))
|
||||
if end_marker_count > 0:
|
||||
logger.debug(
|
||||
f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout"
|
||||
)
|
||||
# Sample a few markers to verify loop indices
|
||||
end_samples = list(jest_end_pattern.finditer(global_stdout))[:5]
|
||||
for sample in end_samples:
|
||||
groups = sample.groups()
|
||||
logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}")
|
||||
logger.debug(f"Found {end_marker_count} END timing markers with duration in subprocess stdout")
|
||||
else:
|
||||
logger.debug("[PERF-DEBUG] No END markers with duration found in Jest stdout")
|
||||
logger.debug(f"No END markers found in subprocess stdout (len={len(global_stdout)})")
|
||||
except (AttributeError, UnicodeDecodeError):
|
||||
global_stdout = ""
|
||||
|
||||
|
|
@ -215,6 +205,10 @@ def parse_jest_test_xml(
|
|||
# Key: (testName, testName2, funcName, loopIndex, lineId)
|
||||
key = match.groups()[:5]
|
||||
end_matches_dict[key] = match
|
||||
logger.debug(
|
||||
f"Suite {suite_count}: combined_stdout len={len(combined_stdout)}, "
|
||||
f"start_matches={len(start_matches)}, end_matches={len(end_matches_dict)}"
|
||||
)
|
||||
|
||||
# Debug: log suite-level END marker parsing for perf tests
|
||||
if end_matches_dict:
|
||||
|
|
@ -371,6 +365,25 @@ def parse_jest_test_xml(
|
|||
# end_key is (module, testName, funcName, loopIndex, invocationId)
|
||||
if len(end_key) >= 2 and sanitized_test_name in end_key[1]:
|
||||
matching_ends_direct.append(end_match)
|
||||
|
||||
# Fallback: If no matches found but END markers exist with "unknown" test name
|
||||
# (happens in Vitest where beforeEach hook doesn't fire to set currentTestName),
|
||||
# match ALL "unknown" markers to this testcase. Use a consumed set to avoid
|
||||
# assigning the same marker to multiple testcases.
|
||||
if not matching_ends_direct and end_matches_dict:
|
||||
unknown_markers = [(k, m) for k, m in end_matches_dict.items() if len(k) >= 2 and k[1] == "unknown"]
|
||||
if unknown_markers:
|
||||
# Assign all unconsumed unknown markers to this testcase
|
||||
for _, end_match in unknown_markers:
|
||||
matching_ends_direct.append(end_match)
|
||||
# Remove consumed markers so they aren't double-assigned to other testcases
|
||||
for end_key, _ in unknown_markers:
|
||||
end_matches_dict.pop(end_key, None)
|
||||
logger.debug(
|
||||
f"[PERF-UNKNOWN-MATCH] Testcase '{test_name[:40]}': matched {len(matching_ends_direct)} "
|
||||
f"'unknown' END markers (Vitest fallback)"
|
||||
)
|
||||
|
||||
# Debug: log matching results for perf tests
|
||||
if matching_ends_direct:
|
||||
loop_indices = [int(m.groups()[3]) if m.groups()[3].isdigit() else 1 for m in matching_ends_direct]
|
||||
|
|
|
|||
|
|
@ -225,6 +225,8 @@ export default mergeConfig(originalConfig, {{
|
|||
test: {{
|
||||
// Override include pattern to match all test files including generated ones
|
||||
include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
|
||||
// Use forks pool so timing markers from process.stdout.write flow to parent stdout
|
||||
pool: 'forks',
|
||||
}},
|
||||
}});
|
||||
"""
|
||||
|
|
@ -239,6 +241,8 @@ export default defineConfig({
|
|||
include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
|
||||
// Exclude common non-test directories
|
||||
exclude: ['**/node_modules/**', '**/dist/**'],
|
||||
// Use forks pool so timing markers from process.stdout.write flow to parent stdout
|
||||
pool: 'forks',
|
||||
},
|
||||
});
|
||||
"""
|
||||
|
|
@ -277,6 +281,7 @@ def _build_vitest_behavioral_command(
|
|||
"--reporter=default",
|
||||
"--reporter=junit",
|
||||
"--no-file-parallelism", # Serial execution for deterministic timing
|
||||
"--pool=forks", # Use child processes so timing markers flow to parent stdout
|
||||
]
|
||||
|
||||
# For monorepos with restrictive vitest configs (e.g., include: test/**/*.test.ts),
|
||||
|
|
@ -326,6 +331,7 @@ def _build_vitest_benchmarking_command(
|
|||
"--reporter=default",
|
||||
"--reporter=junit",
|
||||
"--no-file-parallelism", # Serial execution for consistent benchmarking
|
||||
"--pool=forks", # Use child processes so timing markers flow to parent stdout
|
||||
]
|
||||
|
||||
# Use codeflash vitest config to override restrictive include patterns
|
||||
|
|
@ -656,11 +662,6 @@ def run_vitest_benchmarking_tests(
|
|||
)
|
||||
else:
|
||||
logger.debug(f"[VITEST-BENCH] No perf END markers found in stdout (len={len(result.stdout)})")
|
||||
# Check if there are behavior END markers instead
|
||||
behavior_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:\d+:[^#]+######!")
|
||||
behavior_matches = list(behavior_end_pattern.finditer(result.stdout))
|
||||
if behavior_matches:
|
||||
logger.debug(f"[VITEST-BENCH] Found {len(behavior_matches)} behavior END markers instead (no duration)")
|
||||
|
||||
return result_file_path, result
|
||||
|
||||
|
|
@ -719,6 +720,7 @@ def run_vitest_line_profile_tests(
|
|||
"--reporter=default",
|
||||
"--reporter=junit",
|
||||
"--no-file-parallelism", # Serial execution for consistent line profiling
|
||||
"--pool=forks", # Use child processes so timing markers flow to parent stdout
|
||||
]
|
||||
|
||||
# Use codeflash vitest config to override restrictive include patterns
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "codeflash",
|
||||
"version": "0.10.0",
|
||||
"version": "0.10.1",
|
||||
"description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",
|
||||
"main": "runtime/index.js",
|
||||
"types": "runtime/index.d.ts",
|
||||
|
|
|
|||
|
|
@ -266,10 +266,96 @@ if (RANDOM_SEED !== 0) {
|
|||
}
|
||||
}
|
||||
|
||||
// Current test context (set by Jest hooks)
|
||||
// Current test context (set by Jest hooks or Vitest worker)
|
||||
let currentTestName = null;
|
||||
let currentTestPath = null; // Test file path from Jest
|
||||
|
||||
/**
|
||||
* Get the current test name from Vitest's internal worker API.
|
||||
* Vitest doesn't inject beforeEach as a global, so the Jest-style hook doesn't fire.
|
||||
* Instead, we query Vitest's worker directly for the current test name.
|
||||
*
|
||||
* In Vitest's fork pool, `__vitest_worker__.current` is the current task object
|
||||
* with properties: name, fullName, fullTestName, suite, type, file, etc.
|
||||
* Also, `expect.getState().currentTestName` works from within a test.
|
||||
*
|
||||
* @returns {string|null} The current test name, or null if not in Vitest
|
||||
*/
|
||||
function getVitestTestName() {
|
||||
// Prefer expect.getState().currentTestName — returns full path including describe blocks
|
||||
// e.g., "Performance tests > should return true for basic HTML tags"
|
||||
// This matches what Jest's beforeEach hook would set.
|
||||
try {
|
||||
if (typeof expect !== 'undefined' && expect.getState) {
|
||||
const state = expect.getState();
|
||||
if (state?.currentTestName) {
|
||||
return state.currentTestName;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// expect not available
|
||||
}
|
||||
// Fallback: Vitest worker API — worker.current.fullTestName includes describe path
|
||||
try {
|
||||
const worker = globalThis.__vitest_worker__;
|
||||
if (worker?.current?.fullTestName) {
|
||||
return worker.current.fullTestName;
|
||||
}
|
||||
if (worker?.current?.fullName) {
|
||||
return worker.current.fullName;
|
||||
}
|
||||
if (worker?.current?.name) {
|
||||
return worker.current.name;
|
||||
}
|
||||
} catch (e) {
|
||||
// Not in Vitest context
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current test file path from Vitest's internal worker API.
|
||||
* @returns {string|null} The current test file path, or null if not in Vitest
|
||||
*/
|
||||
function getVitestTestPath() {
|
||||
try {
|
||||
const worker = globalThis.__vitest_worker__;
|
||||
if (worker?.filepath) {
|
||||
return worker.filepath;
|
||||
}
|
||||
} catch (e) {
|
||||
// Not in Vitest context
|
||||
}
|
||||
// Fallback: try expect.getState() for testPath
|
||||
try {
|
||||
if (typeof expect !== 'undefined' && expect.getState) {
|
||||
const state = expect.getState();
|
||||
if (state?.testPath) {
|
||||
return state.testPath;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// expect not available
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the effective test name, trying Jest hooks first, then Vitest API, then fallback.
|
||||
* @returns {string} The current test name
|
||||
*/
|
||||
function getEffectiveTestName() {
|
||||
return currentTestName || getVitestTestName() || 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the effective test path, trying Jest hooks first, then Vitest API, then fallback.
|
||||
* @returns {string|null} The current test file path
|
||||
*/
|
||||
function getEffectiveTestPath() {
|
||||
return currentTestPath || getVitestTestPath() || null;
|
||||
}
|
||||
|
||||
// Invocation counter map: tracks how many times each testId has been seen
|
||||
// Key: testId (testModule:testClass:testFunction:lineId:loopIndex)
|
||||
// Value: count (starts at 0, increments each time same key is seen)
|
||||
|
|
@ -549,13 +635,14 @@ function capture(funcName, lineId, fn, ...args) {
|
|||
|
||||
// Get test context (raw values for SQLite storage)
|
||||
// Use TEST_MODULE env var if set, otherwise derive from test file path
|
||||
const effectiveTestPath = getEffectiveTestPath();
|
||||
let testModulePath;
|
||||
if (TEST_MODULE) {
|
||||
testModulePath = TEST_MODULE;
|
||||
} else if (currentTestPath) {
|
||||
} else if (effectiveTestPath) {
|
||||
// Get relative path from cwd and convert to module-style path
|
||||
const path = require('path');
|
||||
const relativePath = path.relative(process.cwd(), currentTestPath);
|
||||
const relativePath = path.relative(process.cwd(), effectiveTestPath);
|
||||
// Convert to Python module-style path (e.g., "tests/test_foo.test.js" -> "tests.test_foo.test")
|
||||
// This matches what Jest's junit XML produces
|
||||
testModulePath = relativePath
|
||||
|
|
@ -564,10 +651,10 @@ function capture(funcName, lineId, fn, ...args) {
|
|||
.replace(/\.test$/, '.test') // Keep .test suffix
|
||||
.replace(/\//g, '.'); // Convert path separators to dots
|
||||
} else {
|
||||
testModulePath = currentTestName || 'unknown';
|
||||
testModulePath = getEffectiveTestName();
|
||||
}
|
||||
const testClassName = null; // Jest doesn't use classes like Python
|
||||
const testFunctionName = currentTestName || 'unknown';
|
||||
const testFunctionName = getEffectiveTestName();
|
||||
|
||||
// Sanitized versions for stdout tags (avoid regex conflicts)
|
||||
const safeModulePath = sanitizeTestId(testModulePath);
|
||||
|
|
@ -583,8 +670,8 @@ function capture(funcName, lineId, fn, ...args) {
|
|||
// Format stdout tag (matches Python format, uses sanitized names)
|
||||
const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${LOOP_INDEX}:${invocationId}`;
|
||||
|
||||
// Print start tag
|
||||
console.log(`!$######${testStdoutTag}######$!`);
|
||||
// Print start tag (use process.stdout.write to bypass test framework console interception)
|
||||
process.stdout.write(`!$######${testStdoutTag}######$!\n`);
|
||||
|
||||
// Timing with nanosecond precision
|
||||
const startTime = getTimeNs();
|
||||
|
|
@ -602,14 +689,14 @@ function capture(funcName, lineId, fn, ...args) {
|
|||
const durationNs = getDurationNs(startTime, endTime);
|
||||
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, resolved, null, durationNs);
|
||||
// Print end tag (no duration for behavior mode)
|
||||
console.log(`!######${testStdoutTag}######!`);
|
||||
process.stdout.write(`!######${testStdoutTag}######!\n`);
|
||||
return resolved;
|
||||
},
|
||||
(err) => {
|
||||
const endTime = getTimeNs();
|
||||
const durationNs = getDurationNs(startTime, endTime);
|
||||
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, null, err, durationNs);
|
||||
console.log(`!######${testStdoutTag}######!`);
|
||||
process.stdout.write(`!######${testStdoutTag}######!\n`);
|
||||
throw err;
|
||||
}
|
||||
);
|
||||
|
|
@ -623,7 +710,7 @@ function capture(funcName, lineId, fn, ...args) {
|
|||
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, returnValue, error, durationNs);
|
||||
|
||||
// Print end tag (no duration for behavior mode, matching Python)
|
||||
console.log(`!######${testStdoutTag}######!`);
|
||||
process.stdout.write(`!######${testStdoutTag}######!\n`);
|
||||
|
||||
if (error) throw error;
|
||||
return returnValue;
|
||||
|
|
@ -656,22 +743,24 @@ function capturePerf(funcName, lineId, fn, ...args) {
|
|||
const shouldLoop = getPerfLoopCount() > 1 && !checkSharedTimeLimit();
|
||||
|
||||
// Get test context (computed once, reused across batch)
|
||||
// Uses Vitest worker API as fallback when Jest-style beforeEach hook doesn't fire
|
||||
const effectiveTestPath = getEffectiveTestPath();
|
||||
let testModulePath;
|
||||
if (TEST_MODULE) {
|
||||
testModulePath = TEST_MODULE;
|
||||
} else if (currentTestPath) {
|
||||
} else if (effectiveTestPath) {
|
||||
const path = require('path');
|
||||
const relativePath = path.relative(process.cwd(), currentTestPath);
|
||||
const relativePath = path.relative(process.cwd(), effectiveTestPath);
|
||||
testModulePath = relativePath
|
||||
.replace(/\\/g, '/')
|
||||
.replace(/\.js$/, '')
|
||||
.replace(/\.test$/, '.test')
|
||||
.replace(/\//g, '.');
|
||||
} else {
|
||||
testModulePath = currentTestName || 'unknown';
|
||||
testModulePath = getEffectiveTestName();
|
||||
}
|
||||
const testClassName = null;
|
||||
const testFunctionName = currentTestName || 'unknown';
|
||||
const testFunctionName = getEffectiveTestName();
|
||||
|
||||
const safeModulePath = sanitizeTestId(testModulePath);
|
||||
const safeTestFunctionName = sanitizeTestId(testFunctionName);
|
||||
|
|
@ -767,8 +856,8 @@ function capturePerf(funcName, lineId, fn, ...args) {
|
|||
lastError = e;
|
||||
}
|
||||
|
||||
// Print end tag with timing
|
||||
console.log(`!######${testStdoutTag}:${durationNs}######!`);
|
||||
// Print end tag with timing (use process.stdout.write to bypass test framework console interception)
|
||||
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
|
||||
|
||||
// Update shared loop counter
|
||||
sharedPerfState.totalLoopsCompleted++;
|
||||
|
|
@ -808,7 +897,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
|
|||
* @private
|
||||
*/
|
||||
function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
|
||||
console.log(`!######${testStdoutTag}:${durationNs}######!`);
|
||||
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
|
||||
sharedPerfState.totalLoopsCompleted++;
|
||||
if (durationNs > 0) {
|
||||
runtimes.push(durationNs / 1000);
|
||||
|
|
|
|||
|
|
@ -636,3 +636,70 @@ class TestRunMochaLineProfileTests:
|
|||
env = call_kwargs.kwargs.get("env") or call_kwargs[1].get("env", {})
|
||||
assert env.get("CODEFLASH_MODE") == "line_profile"
|
||||
assert env.get("CODEFLASH_LINE_PROFILE_OUTPUT") == str(profile_output)
|
||||
|
||||
|
||||
class TestParserUnknownTestNameFallback:
|
||||
"""Tests for the parser's fallback when perf markers have 'unknown' test name."""
|
||||
|
||||
def test_unknown_markers_matched_to_first_testcase(self):
|
||||
"""When capturePerf markers have 'unknown' test name (Vitest beforeEach not firing),
|
||||
the parser should still match them to testcases via the fallback logic."""
|
||||
from codeflash.languages.javascript.parse import parse_jest_test_xml
|
||||
from codeflash.models.models import TestFile, TestFiles
|
||||
from codeflash.models.test_type import TestType
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir_path = Path(tmpdir)
|
||||
|
||||
# Create a JUnit XML with one test suite and one testcase
|
||||
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<testsuites>
|
||||
<testsuite name="src/test_func__perf_test_0.test.ts" tests="1" failures="0" time="10.5">
|
||||
<testcase name="should compute correctly" classname="src/test_func__perf_test_0.test.ts" time="10.5">
|
||||
</testcase>
|
||||
</testsuite>
|
||||
</testsuites>"""
|
||||
xml_path = tmpdir_path / "results.xml"
|
||||
xml_path.write_text(xml_content, encoding="utf-8")
|
||||
|
||||
# Create test files
|
||||
test_file = tmpdir_path / "test_func__perf_test_0.test.ts"
|
||||
test_file.write_text("// perf test", encoding="utf-8")
|
||||
|
||||
test_files = TestFiles(
|
||||
test_files=[
|
||||
TestFile(
|
||||
instrumented_behavior_file_path=test_file,
|
||||
benchmarking_file_path=test_file,
|
||||
test_type=TestType.GENERATED_REGRESSION,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Create a mock subprocess result with perf markers using "unknown" test name
|
||||
# This simulates what happens when Vitest's beforeEach doesn't fire
|
||||
markers = []
|
||||
for i in range(1, 6):
|
||||
markers.append(f"!######test_mod:unknown:computeFunc:{i}:1_0:{1000 + i * 100}######!")
|
||||
stdout = "\n".join(markers)
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = stdout
|
||||
|
||||
test_config = MagicMock()
|
||||
test_config.tests_project_rootdir = tmpdir_path
|
||||
test_config.test_framework = "vitest"
|
||||
|
||||
results = parse_jest_test_xml(
|
||||
test_xml_file_path=xml_path,
|
||||
test_files=test_files,
|
||||
test_config=test_config,
|
||||
run_result=mock_result,
|
||||
)
|
||||
|
||||
# The "unknown" fallback should assign all 5 markers to the testcase
|
||||
assert len(results.test_results) == 5
|
||||
# Verify runtimes were extracted (not the 10.5s XML fallback)
|
||||
runtimes = [r.runtime for r in results.test_results if r.runtime is not None]
|
||||
assert len(runtimes) == 5
|
||||
assert all(r < 100_000 for r in runtimes) # All under 100 microseconds (nanoseconds)
|
||||
|
|
|
|||
Loading…
Reference in a new issue