fix: add Jest 30 support, fix time limit, and fix async function looping

- Add Jest 30 compatibility by detecting version and using TestRunner class - Resolve jest-runner from project's node_modules instead of codeflash's bundle - Fix time limit enforcement by using local time tracking instead of shared state (Jest runs tests in worker processes, so state isn't shared with runner) - Integrate stability-based early stopping into capturePerf - Use plain object instead of Set for stableInvocations to survive Jest module resets - Fix async function benchmarking: properly loop through iterations using async helper (Previously, async functions only got one timing marker due to early return) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 16:36:03 +02:00 · 2026-02-03 16:36:03 +02:00 · 04a87cfaec
commit 04a87cfaec
parent 7bc7032361
2 changed files with 264 additions and 43 deletions
--- a/packages/codeflash/runtime/capture.js
+++ b/packages/codeflash/runtime/capture.js
@ -71,6 +71,8 @@ if (!process[PERF_STATE_KEY]) {
        shouldStop: false,         // Flag to stop all further looping
        currentBatch: 0,           // Current batch number (incremented by runner)
        invocationLoopCounts: {},  // Track loops per invocation: {invocationKey: loopCount}
+        invocationRuntimes: {},    // Track runtimes per invocation for stability: {invocationKey: [runtimes]}
+        stableInvocations: {},     // Invocations that have reached stability: {invocationKey: true}
    };
 }
 const sharedPerfState = process[PERF_STATE_KEY];
@ -657,12 +659,26 @@ function capturePerf(funcName, lineId, fn, ...args) {
        ? (hasExternalLoopRunner ? PERF_BATCH_SIZE : PERF_LOOP_COUNT)
        : 1;

+    // Initialize runtime tracking for this invocation if needed
+    if (!sharedPerfState.invocationRuntimes[invocationKey]) {
+        sharedPerfState.invocationRuntimes[invocationKey] = [];
+    }
+    const runtimes = sharedPerfState.invocationRuntimes[invocationKey];
+
+    // Calculate stability window size based on collected runtimes
+    const getStabilityWindow = () => Math.max(PERF_MIN_LOOPS, Math.ceil(runtimes.length * STABILITY_WINDOW_SIZE));
+
    for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) {
        // Check shared time limit BEFORE each iteration
        if (shouldLoop && checkSharedTimeLimit()) {
            break;
        }

+        // Check if this invocation has already reached stability
+        if (PERF_STABILITY_CHECK && sharedPerfState.stableInvocations[invocationKey]) {
+            break;
+        }
+
        // Get the global loop index for this invocation (increments across batches)
        const loopIndex = getInvocationLoopIndex(invocationKey);

@ -687,23 +703,17 @@ function capturePerf(funcName, lineId, fn, ...args) {
            const endTime = getTimeNs();
            durationNs = getDurationNs(startTime, endTime);

-            // Handle promises - for async functions, run once and return
+            // Handle promises - for async functions, we need to handle looping differently
+            // Since we can't use await in the sync loop, delegate to async helper
            if (lastReturnValue instanceof Promise) {
-                return lastReturnValue.then(
-                    (resolved) => {
-                        const asyncEndTime = getTimeNs();
-                        const asyncDurationNs = getDurationNs(startTime, asyncEndTime);
-                        console.log(`!######${testStdoutTag}:${asyncDurationNs}######!`);
-                        sharedPerfState.totalLoopsCompleted++;
-                        return resolved;
-                    },
-                    (err) => {
-                        const asyncEndTime = getTimeNs();
-                        const asyncDurationNs = getDurationNs(startTime, asyncEndTime);
-                        console.log(`!######${testStdoutTag}:${asyncDurationNs}######!`);
-                        sharedPerfState.totalLoopsCompleted++;
-                        throw err;
-                    }
+                // For async functions, delegate to the async looping helper
+                // Pass along all the context needed for continued looping
+                return _capturePerfAsync(
+                    funcName, lineId, fn, args,
+                    lastReturnValue, startTime, testStdoutTag,
+                    safeModulePath, testClassName, safeTestFunctionName,
+                    invocationKey, runtimes, batchSize, batchIndex,
+                    shouldLoop, getStabilityWindow
                );
            }

@ -719,6 +729,20 @@ function capturePerf(funcName, lineId, fn, ...args) {
        // Update shared loop counter
        sharedPerfState.totalLoopsCompleted++;

+        // Track runtime for stability check (convert to microseconds)
+        if (durationNs > 0) {
+            runtimes.push(durationNs / 1000);
+        }
+
+        // Check stability after accumulating enough samples
+        if (PERF_STABILITY_CHECK && runtimes.length >= PERF_MIN_LOOPS) {
+            const window = getStabilityWindow();
+            if (shouldStopStability(runtimes, window, PERF_MIN_LOOPS)) {
+                sharedPerfState.stableInvocations[invocationKey] = true;
+                break;
+            }
+        }
+
        // If we had an error, stop looping
        if (lastError) {
            break;
@ -735,6 +759,99 @@ function capturePerf(funcName, lineId, fn, ...args) {
    return lastReturnValue;
 }

+/**
+ * Async helper for capturePerf to handle async function looping.
+ * This function awaits promises and continues the benchmark loop properly.
+ *
+ * @private
+ */
+async function _capturePerfAsync(
+    funcName, lineId, fn, args,
+    firstPromise, firstStartTime, firstTestStdoutTag,
+    safeModulePath, testClassName, safeTestFunctionName,
+    invocationKey, runtimes, batchSize, startBatchIndex,
+    shouldLoop, getStabilityWindow
+) {
+    let lastReturnValue;
+    let lastError = null;
+
+    // Handle the first promise that was already started
+    try {
+        lastReturnValue = await firstPromise;
+        const asyncEndTime = getTimeNs();
+        const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime);
+        console.log(`!######${firstTestStdoutTag}:${asyncDurationNs}######!`);
+        sharedPerfState.totalLoopsCompleted++;
+        if (asyncDurationNs > 0) {
+            runtimes.push(asyncDurationNs / 1000);
+        }
+    } catch (err) {
+        const asyncEndTime = getTimeNs();
+        const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime);
+        console.log(`!######${firstTestStdoutTag}:${asyncDurationNs}######!`);
+        sharedPerfState.totalLoopsCompleted++;
+        throw err;
+    }
+
+    // Continue looping for remaining iterations
+    for (let batchIndex = startBatchIndex + 1; batchIndex < batchSize; batchIndex++) {
+        // Check shared time limit
+        if (shouldLoop && checkSharedTimeLimit()) {
+            break;
+        }
+
+        // Check if this invocation has already reached stability
+        if (PERF_STABILITY_CHECK && sharedPerfState.stableInvocations[invocationKey]) {
+            break;
+        }
+
+        // Get the global loop index for this invocation
+        const loopIndex = getInvocationLoopIndex(invocationKey);
+
+        // Check if we've exceeded max loops
+        if (loopIndex > PERF_LOOP_COUNT) {
+            break;
+        }
+
+        // Get invocation index for the timing marker
+        const testId = `${safeModulePath}:${testClassName}:${safeTestFunctionName}:${lineId}:${loopIndex}`;
+        const invocationIndex = getInvocationIndex(testId);
+        const invocationId = `${lineId}_${invocationIndex}`;
+
+        // Format stdout tag
+        const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`;
+
+        try {
+            const startTime = getTimeNs();
+            lastReturnValue = await fn(...args);
+            const endTime = getTimeNs();
+            const durationNs = getDurationNs(startTime, endTime);
+
+            console.log(`!######${testStdoutTag}:${durationNs}######!`);
+            sharedPerfState.totalLoopsCompleted++;
+
+            if (durationNs > 0) {
+                runtimes.push(durationNs / 1000);
+            }
+
+            // Check stability
+            if (PERF_STABILITY_CHECK && runtimes.length >= PERF_MIN_LOOPS) {
+                const window = getStabilityWindow();
+                if (shouldStopStability(runtimes, window, PERF_MIN_LOOPS)) {
+                    sharedPerfState.stableInvocations[invocationKey] = true;
+                    break;
+                }
+            }
+        } catch (e) {
+            lastError = e;
+            break;
+        }
+    }
+
+    if (lastError) throw lastError;
+    return lastReturnValue;
+}
+
 /**
 * Capture multiple invocations for benchmarking.
 *
@ -790,6 +907,8 @@ function resetPerfState() {
    sharedPerfState.startTime = null;
    sharedPerfState.totalLoopsCompleted = 0;
    sharedPerfState.shouldStop = false;
+    sharedPerfState.invocationRuntimes = {};
+    sharedPerfState.stableInvocations = {};
 }

 /**
--- a/packages/codeflash/runtime/loop-runner.js
+++ b/packages/codeflash/runtime/loop-runner.js
@ -24,6 +24,8 @@
 * NOTE: This runner requires jest-runner to be installed in your project.
 *       It is a Jest-specific feature and does not work with Vitest.
 *       For Vitest projects, capturePerf() does all loops internally in a single call.
+ *
+ * Compatibility: Works with Jest 29.x and Jest 30.x
 */

 'use strict';
@ -31,15 +33,51 @@
 const { createRequire } = require('module');
 const path = require('path');

-// Try to load jest-runner - it's a peer dependency that must be installed by the user
+// Try to load jest-runner from the PROJECT's node_modules, not from codeflash package
+// This ensures we use the same version of jest-runner that the project uses
+let TestRunner;
 let runTest;
 let jestRunnerAvailable = false;
+let jestVersion = 0;

 try {
-    const jestRunnerPath = require.resolve('jest-runner');
+    // Resolve jest-runner from the current working directory (project root)
+    // This is important because the codeflash package may bundle a different version
+    const projectRoot = process.cwd();
+    const projectRequire = createRequire(path.join(projectRoot, 'node_modules', 'package.json'));
+
+    let jestRunnerPath;
+    try {
+        // First try to resolve from project's node_modules
+        jestRunnerPath = projectRequire.resolve('jest-runner');
+    } catch (e) {
+        // Fall back to default resolution (codeflash's bundled version)
+        jestRunnerPath = require.resolve('jest-runner');
+    }
+
    const internalRequire = createRequire(jestRunnerPath);
-    runTest = internalRequire('./runTest').default;
-    jestRunnerAvailable = true;
+
+    // Try to get the TestRunner class (Jest 30+)
+    const jestRunner = internalRequire(jestRunnerPath);
+    TestRunner = jestRunner.default || jestRunner.TestRunner;
+
+    if (TestRunner && TestRunner.prototype && typeof TestRunner.prototype.runTests === 'function') {
+        // Jest 30+ - use TestRunner class
+        jestVersion = 30;
+        jestRunnerAvailable = true;
+    } else {
+        // Try Jest 29 style import
+        try {
+            runTest = internalRequire('./runTest').default;
+            if (typeof runTest === 'function') {
+                jestVersion = 29;
+                jestRunnerAvailable = true;
+            }
+        } catch (e29) {
+            // Neither Jest 29 nor 30 style import worked
+            jestRunnerAvailable = false;
+        }
+    }
 } catch (e) {
    // jest-runner not installed - this is expected for Vitest projects
    // The runner will throw a helpful error if someone tries to use it without jest-runner
@ -106,6 +144,9 @@ function deepCopy(obj, seen = new WeakMap()) {

 /**
 * Codeflash Loop Runner with Batched Looping
+ *
+ * For Jest 30+, extends the TestRunner class directly.
+ * For Jest 29, uses the runTest function import.
 */
 class CodeflashLoopRunner {
    constructor(globalConfig, context) {
@ -120,6 +161,11 @@ class CodeflashLoopRunner {
        this._globalConfig = globalConfig;
        this._context = context || {};
        this._eventEmitter = new SimpleEventEmitter();
+
+        // For Jest 30+, create an instance of the base TestRunner for delegation
+        if (jestVersion >= 30 && TestRunner) {
+            this._baseRunner = new TestRunner(globalConfig, context);
+        }
    }

    get supportsEventEmitters() {
@ -143,29 +189,20 @@ class CodeflashLoopRunner {
        let hasFailure = false;
        let allConsoleOutput = '';

-        // Import shared state functions from capture module
-        // We need to do this dynamically since the module may be reloaded
-        let checkSharedTimeLimit;
-        let incrementBatch;
-        try {
-            const capture = require('codeflash');
-            checkSharedTimeLimit = capture.checkSharedTimeLimit;
-            incrementBatch = capture.incrementBatch;
-        } catch (e) {
-            // Fallback if codeflash module not available
-            checkSharedTimeLimit = () => {
-                const elapsed = Date.now() - startTime;
-                return elapsed >= TARGET_DURATION_MS && batchCount >= MIN_BATCHES;
-            };
-            incrementBatch = () => {};
-        }
+        // Time limit check - must use local time tracking because Jest runs tests
+        // in worker processes, so shared state from capture.js isn't accessible here
+        const checkTimeLimit = () => {
+            const elapsed = Date.now() - startTime;
+            return elapsed >= TARGET_DURATION_MS && batchCount >= MIN_BATCHES;
+        };

        // Batched looping: run all test files multiple times
        while (batchCount < MAX_BATCHES) {
            batchCount++;

            // Check time limit BEFORE each batch
-            if (batchCount > MIN_BATCHES && checkSharedTimeLimit()) {
+            if (batchCount > MIN_BATCHES && checkTimeLimit()) {
+                console.log(`[codeflash] Time limit reached after ${batchCount - 1} batches (${Date.now() - startTime}ms elapsed)`);
                break;
            }

@ -174,13 +211,11 @@ class CodeflashLoopRunner {
                break;
            }

-            // Increment batch counter in shared state and set env var
-            // The env var persists across Jest module resets, ensuring continuous loop indices
-            incrementBatch();
+            // Set env var for batch number - persists across Jest module resets
            process.env.CODEFLASH_PERF_CURRENT_BATCH = String(batchCount);

            // Run all test files in this batch
-            const batchResult = await this._runAllTestsOnce(tests, watcher);
+            const batchResult = await this._runAllTestsOnce(tests, watcher, options);
            allConsoleOutput += batchResult.consoleOutput;

            if (batchResult.hasFailure) {
@ -189,7 +224,8 @@ class CodeflashLoopRunner {
            }

            // Check time limit AFTER each batch
-            if (checkSharedTimeLimit()) {
+            if (checkTimeLimit()) {
+                console.log(`[codeflash] Time limit reached after ${batchCount} batches (${Date.now() - startTime}ms elapsed)`);
                break;
            }
        }
@ -207,8 +243,74 @@ class CodeflashLoopRunner {

    /**
     * Run all test files once (one batch).
+     * Uses different approaches for Jest 29 vs Jest 30.
     */
-    async _runAllTestsOnce(tests, watcher) {
+    async _runAllTestsOnce(tests, watcher, options) {
+        if (jestVersion >= 30) {
+            return this._runAllTestsOnceJest30(tests, watcher, options);
+        } else {
+            return this._runAllTestsOnceJest29(tests, watcher);
+        }
+    }
+
+    /**
+     * Jest 30+ implementation - delegates to base TestRunner and collects results.
+     */
+    async _runAllTestsOnceJest30(tests, watcher, options) {
+        let hasFailure = false;
+        let allConsoleOutput = '';
+
+        // For Jest 30, we need to collect results through event listeners
+        const resultsCollector = [];
+
+        // Subscribe to events from the base runner
+        const unsubscribeSuccess = this._baseRunner.on('test-file-success', (testData) => {
+            const [test, result] = testData;
+            resultsCollector.push({ test, result, success: true });
+
+            if (result && result.console && Array.isArray(result.console)) {
+                allConsoleOutput += result.console.map(e => e.message || '').join('\n') + '\n';
+            }
+
+            if (result && result.numFailingTests > 0) {
+                hasFailure = true;
+            }
+
+            // Forward to our event emitter
+            this._eventEmitter.emit('test-file-success', testData);
+        });
+
+        const unsubscribeFailure = this._baseRunner.on('test-file-failure', (testData) => {
+            const [test, error] = testData;
+            resultsCollector.push({ test, error, success: false });
+            hasFailure = true;
+
+            // Forward to our event emitter
+            this._eventEmitter.emit('test-file-failure', testData);
+        });
+
+        const unsubscribeStart = this._baseRunner.on('test-file-start', (testData) => {
+            // Forward to our event emitter
+            this._eventEmitter.emit('test-file-start', testData);
+        });
+
+        try {
+            // Run tests using the base runner (always serial for benchmarking)
+            await this._baseRunner.runTests(tests, watcher, { ...options, serial: true });
+        } finally {
+            // Cleanup subscriptions
+            if (typeof unsubscribeSuccess === 'function') unsubscribeSuccess();
+            if (typeof unsubscribeFailure === 'function') unsubscribeFailure();
+            if (typeof unsubscribeStart === 'function') unsubscribeStart();
+        }
+
+        return { consoleOutput: allConsoleOutput, hasFailure };
+    }
+
+    /**
+     * Jest 29 implementation - uses direct runTest import.
+     */
+    async _runAllTestsOnceJest29(tests, watcher) {
        let hasFailure = false;
        let allConsoleOutput = '';