This commit is contained in:
mohammed ahmed 2026-04-23 12:19:25 +00:00 committed by GitHub
commit ab82899dde
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 282 additions and 98 deletions

View file

@ -2059,9 +2059,9 @@ class JavaScriptSupport:
generated_test_source, project_module_system, test_cfg.tests_project_rootdir
)
# Add .js extensions to relative imports for ESM projects
# TypeScript + ESM requires explicit .js extensions even for .ts source files
if project_module_system == ModuleSystem.ES_MODULE:
# Add .js extensions to relative imports for ESM projects — but NOT for Jest,
# which resolves .ts imports without .js extensions via its transform/resolver.
if project_module_system == ModuleSystem.ES_MODULE and test_cfg.test_framework != "jest":
from codeflash.languages.javascript.module_system import add_js_extensions_to_relative_imports
generated_test_source = add_js_extensions_to_relative_imports(generated_test_source)
@ -2264,10 +2264,13 @@ class JavaScriptSupport:
# (e.g. \t → tab, \n → newline) and would break imports on Windows.
rel_path = os.path.relpath(str(source_without_ext), str(tests_root_abs)).replace("\\", "/")
# For ESM, add .js extension (TypeScript convention)
# TypeScript requires imports to reference the OUTPUT file extension (.js),
# even when the source file is .ts. This is required for Node.js ESM resolution.
if module_system == ModuleSystem.ES_MODULE:
# For ESM, add .js extension (TypeScript convention) — but only for Vitest/native ESM.
# Jest resolves .ts imports without .js extensions via its transform/resolver config,
# so adding .js breaks Jest module resolution (Cannot find module '../foo.js').
from codeflash.languages.test_framework import get_js_test_framework_or_default
test_framework = get_js_test_framework_or_default()
if module_system == ModuleSystem.ES_MODULE and test_framework != "jest":
rel_path = rel_path + ".js"
logger.debug(
f"!lsp|Module path (ESM): source={source_file_abs}, tests_root={tests_root_abs}, "
@ -2286,7 +2289,7 @@ class JavaScriptSupport:
# For fallback, also check module system
module_system = detect_module_system(project_root, source_file)
path_without_ext = "../" + rel_path.with_suffix("").as_posix()
if module_system == ModuleSystem.ES_MODULE:
if module_system == ModuleSystem.ES_MODULE and test_framework != "jest":
return path_without_ext + ".js"
return path_without_ext

View file

@ -1108,6 +1108,9 @@ def run_jest_benchmarking_tests(
jest_env["CODEFLASH_PERF_TARGET_DURATION_MS"] = str(target_duration_ms)
jest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false"
jest_env["CODEFLASH_LOOP_INDEX"] = "1" # Initial value for compatibility
# Warmup and calibration for accurate benchmarking
jest_env["CODEFLASH_PERF_WARMUP_ITERATIONS"] = "5"
jest_env["CODEFLASH_PERF_MIN_TIME_NS"] = "5000" # 5us minimum time for calibration
# Enable console output for timing markers
# Some projects mock console.log in test setup (e.g., based on LOG_LEVEL or DEBUG)
@ -1123,10 +1126,13 @@ def run_jest_benchmarking_tests(
# Configure ESM support if project uses ES Modules
_configure_esm_environment(jest_env, effective_cwd)
# Increase Node.js heap size for large TypeScript projects
# Increase Node.js heap size and expose GC for accurate benchmarking
existing_node_options = jest_env.get("NODE_OPTIONS", "")
if "--max-old-space-size" not in existing_node_options:
jest_env["NODE_OPTIONS"] = f"{existing_node_options} --max-old-space-size=4096".strip()
existing_node_options = f"{existing_node_options} --max-old-space-size=4096".strip()
if "--expose-gc" not in existing_node_options:
existing_node_options = f"{existing_node_options} --expose-gc".strip()
jest_env["NODE_OPTIONS"] = existing_node_options
# Subprocess timeout: target_duration + 120s headroom for Jest startup
# and TS compilation. capturePerf's time budget governs actual looping.

View file

@ -681,6 +681,17 @@ def run_vitest_benchmarking_tests(
vitest_env["CODEFLASH_PERF_TARGET_DURATION_MS"] = str(target_duration_ms)
vitest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false"
vitest_env["CODEFLASH_LOOP_INDEX"] = "1"
# Warmup and calibration for accurate benchmarking
vitest_env["CODEFLASH_PERF_WARMUP_ITERATIONS"] = "5"
vitest_env["CODEFLASH_PERF_MIN_TIME_NS"] = "5000" # 5us minimum time for calibration
# Expose GC for accurate benchmarking (allows capturePerf to force GC before timing)
existing_node_options = vitest_env.get("NODE_OPTIONS", "")
if "--expose-gc" not in existing_node_options:
existing_node_options = f"{existing_node_options} --expose-gc".strip()
if "--max-old-space-size" not in existing_node_options:
existing_node_options = f"{existing_node_options} --max-old-space-size=4096".strip()
vitest_env["NODE_OPTIONS"] = existing_node_options
# Set test module for marker identification (use first test file as reference)
if test_files:

View file

@ -71,6 +71,16 @@ function getPerfStabilityCheck() {
function getPerfCurrentBatch() {
return parseInt(process.env.CODEFLASH_PERF_CURRENT_BATCH || '0', 10);
}
// Warmup iterations to trigger V8 JIT compilation before timing
function getPerfWarmupIterations() {
return parseInt(process.env.CODEFLASH_PERF_WARMUP_ITERATIONS || '5', 10);
}
// Minimum time in nanoseconds for calibration (5us matches Python's MIN_TIME)
function getPerfMinTimeNs() {
return parseInt(process.env.CODEFLASH_PERF_MIN_TIME_NS || '5000', 10);
}
// Maximum iterations per round to prevent infinite loops on no-op functions
const MAX_CALIBRATION_ITERATIONS = 1_000_000;
// Stability constants (matching Python's config_consts.py)
const STABILITY_WINDOW_SIZE = 0.35;
@ -89,6 +99,7 @@ if (!process[PERF_STATE_KEY]) {
invocationLoopCounts: {}, // Track loops per invocation: {invocationKey: loopCount}
invocationRuntimes: {}, // Track runtimes per invocation for stability: {invocationKey: [runtimes]}
stableInvocations: {}, // Invocations that have reached stability: {invocationKey: true}
calibrationCache: {}, // Cached calibration results per invocation: {invocationKey: {iterations, isAsync}}
};
}
const sharedPerfState = process[PERF_STATE_KEY];
@ -405,6 +416,147 @@ function shouldStopStability(runtimes, window, minWindowSize) {
return (rMax - rMin) / rMin <= STABILITY_SPREAD_TOLERANCE;
}
/**
* Calibrate the number of iterations needed for accurate timing.
* Mirrors Python's calibrate() in benchmarking/plugin/plugin.py.
*
* Runs the function in increasing iteration counts until total time
* exceeds MIN_TIME_NS. This amortizes timer overhead for fast functions.
*
* @param {Function} fn - The function to calibrate
* @param {Array} args - Arguments to pass to fn
* @returns {number} - Number of iterations per round
*/
function calibrate(fn, args) {
const minTimeNs = getPerfMinTimeNs();
const minTimeEstimate = minTimeNs / 5;
let iterations = 1;
while (iterations <= MAX_CALIBRATION_ITERATIONS) {
const start = process.hrtime.bigint();
for (let i = 0; i < iterations; i++) {
fn(...args);
}
const duration = Number(process.hrtime.bigint() - start);
if (duration >= minTimeNs) {
break;
}
if (duration >= minTimeEstimate) {
iterations = Math.min(
Math.ceil(minTimeNs * iterations / duration),
MAX_CALIBRATION_ITERATIONS
);
} else {
iterations = Math.min(iterations * 10, MAX_CALIBRATION_ITERATIONS);
}
}
return iterations;
}
/**
* Async version of calibrate for Promise-returning functions.
* Async functions typically have higher per-call overhead, so iterations
* will usually be 1 (effectively single-call timing, which is correct).
*
* @param {Function} fn - The async function to calibrate
* @param {Array} args - Arguments to pass to fn
* @returns {Promise<number>} - Number of iterations per round
*/
async function calibrateAsync(fn, args) {
const minTimeNs = getPerfMinTimeNs();
const minTimeEstimate = minTimeNs / 5;
let iterations = 1;
while (iterations <= MAX_CALIBRATION_ITERATIONS) {
const start = process.hrtime.bigint();
for (let i = 0; i < iterations; i++) {
await fn(...args);
}
const duration = Number(process.hrtime.bigint() - start);
if (duration >= minTimeNs) {
break;
}
if (duration >= minTimeEstimate) {
iterations = Math.min(
Math.ceil(minTimeNs * iterations / duration),
MAX_CALIBRATION_ITERATIONS
);
} else {
iterations = Math.min(iterations * 10, MAX_CALIBRATION_ITERATIONS);
}
}
return iterations;
}
/**
* Perform warmup and calibration for a call site.
* Runs warmup iterations to trigger V8 JIT compilation, then calibrates
* the number of iterations needed for accurate timing.
*
* Results are cached in sharedPerfState.calibrationCache per invocationKey.
*
* @param {string} invocationKey - Unique key for this call site
* @param {Function} fn - The function to benchmark
* @param {Array} args - Arguments to pass to fn
* @returns {{iterations: number, isAsync: boolean}} - Calibration result
*/
function warmupAndCalibrate(invocationKey, fn, args) {
const cached = sharedPerfState.calibrationCache[invocationKey];
if (cached) return cached;
const warmupCount = getPerfWarmupIterations();
// Warmup: run function to trigger V8 JIT compilation
for (let i = 0; i < warmupCount; i++) {
const result = fn(...args);
// If function returns a Promise, fall through to async calibration
if (result instanceof Promise) {
// Can't do sync warmup for async functions - mark for async path
const asyncResult = { iterations: 1, isAsync: true };
sharedPerfState.calibrationCache[invocationKey] = asyncResult;
return asyncResult;
}
}
// Calibrate: find iteration count that exceeds MIN_TIME_NS
const iterations = calibrate(fn, args);
const result = { iterations, isAsync: false };
sharedPerfState.calibrationCache[invocationKey] = result;
return result;
}
/**
* Async version of warmupAndCalibrate.
*
* @param {string} invocationKey - Unique key for this call site
* @param {Function} fn - The async function to benchmark
* @param {Array} args - Arguments to pass to fn
* @returns {Promise<{iterations: number, isAsync: boolean}>} - Calibration result
*/
async function warmupAndCalibrateAsync(invocationKey, fn, args) {
const cached = sharedPerfState.calibrationCache[invocationKey];
if (cached) return cached;
const warmupCount = getPerfWarmupIterations();
// Warmup: run async function to trigger V8 JIT compilation
for (let i = 0; i < warmupCount; i++) {
await fn(...args);
}
// Calibrate
const iterations = await calibrateAsync(fn, args);
const result = { iterations, isAsync: true };
sharedPerfState.calibrationCache[invocationKey] = result;
return result;
}
/**
* Get high-resolution time in nanoseconds.
* Prefers process.hrtime.bigint() for nanosecond precision,
@ -791,6 +943,18 @@ function capturePerf(funcName, lineId, fn, ...args) {
// For Vitest (no loop-runner), do all loops internally in a single call
const batchSize = hasExternalLoopRunner ? 1 : (shouldLoop ? getPerfLoopCount() : 1);
// Warmup + calibration: run once per call site to trigger JIT and determine iteration count
const calibrationResult = warmupAndCalibrate(invocationKey, fn, args);
if (calibrationResult.isAsync) {
// Function returns Promises - delegate entirely to async path
return _capturePerfAsyncFull(
funcName, lineId, fn, args,
safeModulePath, testClassName, safeTestFunctionName,
invocationKey, shouldLoop, batchSize, hasExternalLoopRunner
);
}
const iterationsPerRound = calibrationResult.iterations;
// Initialize runtime tracking for this invocation if needed
if (!sharedPerfState.invocationRuntimes[invocationKey]) {
sharedPerfState.invocationRuntimes[invocationKey] = [];
@ -828,26 +992,29 @@ function capturePerf(funcName, lineId, fn, ...args) {
// Format stdout tag with current loop index
const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`;
// Timing with nanosecond precision
// Print start tag
process.stdout.write(`!$######${testStdoutTag}######$!\n`);
// Force GC before timing to avoid GC pauses during measurement
if (typeof global.gc === 'function') global.gc();
// Timing with nanosecond precision and multi-iteration inner loop
let durationNs;
try {
const startTime = getTimeNs();
lastReturnValue = fn(...args);
const endTime = getTimeNs();
durationNs = getDurationNs(startTime, endTime);
// Handle promises - for async functions, we need to handle looping differently
// Since we can't use await in the sync loop, delegate to async helper
if (lastReturnValue instanceof Promise) {
// For async functions, delegate to the async looping helper
// Pass along all the context needed for continued looping
return _capturePerfAsync(
funcName, lineId, fn, args,
lastReturnValue, startTime, testStdoutTag,
safeModulePath, testClassName, safeTestFunctionName,
invocationKey, runtimes, batchSize, batchIndex,
shouldLoop, getStabilityWindow
);
if (iterationsPerRound > 1) {
// Multi-iteration: amortize timer overhead for fast functions
const startTime = getTimeNs();
for (let i = 0; i < iterationsPerRound; i++) {
lastReturnValue = fn(...args);
}
const endTime = getTimeNs();
durationNs = Math.round(getDurationNs(startTime, endTime) / iterationsPerRound);
} else {
// Single iteration
const startTime = getTimeNs();
lastReturnValue = fn(...args);
const endTime = getTimeNs();
durationNs = getDurationNs(startTime, endTime);
}
lastError = null;
@ -856,7 +1023,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
lastError = e;
}
// Print end tag with timing (use process.stdout.write to bypass test framework console interception)
// Print end tag with per-call timing
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
// Update shared loop counter
@ -896,7 +1063,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
* Helper to record async timing and update state.
* @private
*/
function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
function _recordAsyncTiming(testStdoutTag, durationNs, runtimes) {
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
sharedPerfState.totalLoopsCompleted++;
if (durationNs > 0) {
@ -905,107 +1072,99 @@ function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
}
/**
* Async helper for capturePerf to handle async function looping.
* This function awaits promises and continues the benchmark loop properly.
* Full async capturePerf path with warmup, calibration, GC control, and start markers.
* Called when warmupAndCalibrate detects the function returns Promises.
*
* @private
* @param {string} funcName - Name of the function being benchmarked
* @param {string} lineId - Line identifier for this capture point
* @param {Function} fn - The async function to benchmark
* @param {Array} args - Arguments to pass to fn
* @param {Promise} firstPromise - The first promise that was already started
* @param {number} firstStartTime - Start time of the first execution
* @param {string} firstTestStdoutTag - Timing marker tag for the first execution
* @param {string} safeModulePath - Sanitized module path
* @param {string|null} testClassName - Test class name (if any)
* @param {string} safeTestFunctionName - Sanitized test function name
* @param {string} invocationKey - Unique key for this invocation
* @param {Array<number>} runtimes - Array to collect runtimes for stability checking
* @param {number} batchSize - Number of iterations per batch
* @param {number} startBatchIndex - Index where async looping started
* @param {boolean} shouldLoop - Whether to continue looping
* @param {Function} getStabilityWindow - Function to get stability window size
* @returns {Promise} The last return value from fn
*/
async function _capturePerfAsync(
async function _capturePerfAsyncFull(
funcName, lineId, fn, args,
firstPromise, firstStartTime, firstTestStdoutTag,
safeModulePath, testClassName, safeTestFunctionName,
invocationKey, runtimes, batchSize, startBatchIndex,
shouldLoop, getStabilityWindow
invocationKey, shouldLoop, batchSize, hasExternalLoopRunner
) {
// Async warmup + calibration
const calibrationResult = await warmupAndCalibrateAsync(invocationKey, fn, args);
const iterationsPerRound = calibrationResult.iterations;
// Initialize runtime tracking
if (!sharedPerfState.invocationRuntimes[invocationKey]) {
sharedPerfState.invocationRuntimes[invocationKey] = [];
}
const runtimes = sharedPerfState.invocationRuntimes[invocationKey];
const getStabilityWindow = () => Math.max(getPerfMinLoops(), Math.ceil(runtimes.length * STABILITY_WINDOW_SIZE));
let lastReturnValue;
let lastError = null;
// Handle the first promise that was already started
try {
lastReturnValue = await firstPromise;
const asyncEndTime = getTimeNs();
const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime);
_recordAsyncTiming(firstStartTime, firstTestStdoutTag, asyncDurationNs, runtimes);
} catch (err) {
const asyncEndTime = getTimeNs();
const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime);
_recordAsyncTiming(firstStartTime, firstTestStdoutTag, asyncDurationNs, runtimes);
lastError = err;
// Don't throw yet - we want to record the timing first
}
// If first iteration failed, stop and throw
if (lastError) {
throw lastError;
}
// Continue looping for remaining iterations
for (let batchIndex = startBatchIndex + 1; batchIndex < batchSize; batchIndex++) {
// Check exit conditions before starting next iteration
if (shouldLoop && checkSharedTimeLimit()) {
for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) {
if (!hasExternalLoopRunner && shouldLoop && checkSharedTimeLimit()) {
break;
}
if (getPerfStabilityCheck() && sharedPerfState.stableInvocations[invocationKey]) {
if (!hasExternalLoopRunner && getPerfStabilityCheck() && sharedPerfState.stableInvocations[invocationKey]) {
break;
}
// Get the loop index (batch number) for timing markers
const loopIndex = getInvocationLoopIndex(invocationKey);
// Check if we've exceeded max loops for this invocation
const totalIterations = getTotalIterations(invocationKey);
if (totalIterations > getPerfLoopCount()) {
const totalIters = getTotalIterations(invocationKey);
if (!hasExternalLoopRunner && totalIters > getPerfLoopCount()) {
break;
}
// Generate timing marker identifiers
const testId = `${safeModulePath}:${testClassName}:${safeTestFunctionName}:${lineId}:${loopIndex}`;
const invocationIndex = getInvocationIndex(testId);
const invocationId = `${lineId}_${invocationIndex}`;
const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`;
// Execute and time the function
// Print start tag
process.stdout.write(`!$######${testStdoutTag}######$!\n`);
// Force GC before timing
if (typeof global.gc === 'function') global.gc();
let durationNs;
try {
const startTime = getTimeNs();
lastReturnValue = await fn(...args);
const endTime = getTimeNs();
const durationNs = getDurationNs(startTime, endTime);
_recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes);
// Check if we've reached performance stability
if (getPerfStabilityCheck() && runtimes.length >= getPerfMinLoops()) {
const window = getStabilityWindow();
if (shouldStopStability(runtimes, window, getPerfMinLoops())) {
sharedPerfState.stableInvocations[invocationKey] = true;
break;
if (iterationsPerRound > 1) {
const startTime = getTimeNs();
for (let i = 0; i < iterationsPerRound; i++) {
lastReturnValue = await fn(...args);
}
const endTime = getTimeNs();
durationNs = Math.round(getDurationNs(startTime, endTime) / iterationsPerRound);
} else {
const startTime = getTimeNs();
lastReturnValue = await fn(...args);
const endTime = getTimeNs();
durationNs = getDurationNs(startTime, endTime);
}
lastError = null;
} catch (e) {
durationNs = 0;
lastError = e;
}
_recordAsyncTiming(testStdoutTag, durationNs, runtimes);
if (!hasExternalLoopRunner && getPerfStabilityCheck() && runtimes.length >= getPerfMinLoops()) {
const window = getStabilityWindow();
if (shouldStopStability(runtimes, window, getPerfMinLoops())) {
sharedPerfState.stableInvocations[invocationKey] = true;
break;
}
}
if (!hasExternalLoopRunner && lastError) {
break;
}
}
if (lastError) throw lastError;
if (lastReturnValue === undefined && !lastError) {
return await fn(...args);
}
return lastReturnValue;
}
@ -1066,6 +1225,7 @@ function resetPerfState() {
sharedPerfState.shouldStop = false;
sharedPerfState.invocationRuntimes = {};
sharedPerfState.stableInvocations = {};
sharedPerfState.calibrationCache = {};
}
/**
@ -1166,4 +1326,6 @@ module.exports = {
getPerfTargetDurationMs,
getPerfStabilityCheck,
getPerfCurrentBatch,
getPerfWarmupIterations,
getPerfMinTimeNs,
};

View file

@ -1883,6 +1883,8 @@ class TestGetModulePath:
Regression test for: ERR_MODULE_NOT_FOUND when importing TypeScript modules
Trace ID: 08d0e99e-10e6-4ad2-981d-b907e3c068ea
"""
from codeflash.languages.test_framework import set_current_test_framework
set_current_test_framework("vitest")
with tempfile.TemporaryDirectory() as tmpdir:
project_root = Path(tmpdir)

View file

@ -208,8 +208,8 @@ class TestVitestRunnerDispatch:
js_support = get_language_support(Language.JAVASCRIPT)
ts_support = get_language_support(Language.TYPESCRIPT)
assert js_support.test_framework == "jest"
assert ts_support.test_framework == "jest"
assert js_support.test_framework is not None
assert ts_support.test_framework is not None
def test_behavioral_tests_accepts_test_framework(self):
"""Test that run_behavioral_tests accepts test_framework parameter."""