merge: resolve conflict with main in test_build_tools.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Mohamed Ashraf 2026-04-09 15:07:17 +00:00
commit ebd72acb18
54 changed files with 1377 additions and 162 deletions

26
.github/dependabot.yml vendored Normal file
View file

@ -0,0 +1,26 @@
# TEMPORARILY DISABLED — re-enable by removing open-pull-requests-limit: 0
version: 2
updates:
# Python (root pyproject.toml)
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 0
# JavaScript (codeflash npm package)
- package-ecosystem: "npm"
directory: "/packages/codeflash"
schedule:
interval: "weekly"
open-pull-requests-limit: 0
# GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 0
# code_to_optimize/ directories are test fixtures — do NOT update them.
# Dependabot PRs for these always fail (missing secrets) and waste CI.

View file

@ -27,17 +27,21 @@ on:
jobs:
# Automatic PR review (can fix linting issues and push)
# Blocked for fork PRs to prevent malicious code execution
# TEMPORARILY DISABLED — re-enable by removing `false &&` below
pr-review:
concurrency:
group: pr-review-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
if: |
false &&
(
github.event_name == 'pull_request' &&
github.event.sender.login != 'claude[bot]' &&
github.event.pull_request.head.repo.full_name == github.repository
) ||
github.event_name == 'workflow_dispatch'
(
github.event_name == 'pull_request' &&
github.event.sender.login != 'claude[bot]' &&
github.event.pull_request.head.repo.full_name == github.repository
) ||
github.event_name == 'workflow_dispatch'
)
runs-on: ubuntu-latest
permissions:
contents: write
@ -53,7 +57,7 @@ jobs:
ref: ${{ github.event.pull_request.head.ref || github.ref }}
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
- name: Install dependencies
run: |
@ -68,7 +72,7 @@ jobs:
- name: Run Claude Code
id: claude
uses: anthropics/claude-code-action@v1
uses: anthropics/claude-code-action@v1.0.89
with:
use_bedrock: "true"
use_sticky_comment: true
@ -313,7 +317,7 @@ jobs:
ref: ${{ steps.pr-ref.outputs.ref }}
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
- name: Install dependencies
run: |
@ -328,7 +332,7 @@ jobs:
- name: Run Claude Code
id: claude
uses: anthropics/claude-code-action@v1
uses: anthropics/claude-code-action@v1.0.89
with:
use_bedrock: "true"
claude_args: '--model us.anthropic.claude-sonnet-4-6 --allowedTools "Read,Edit,Write,Glob,Grep,Bash(git status*),Bash(git diff*),Bash(git add *),Bash(git commit *),Bash(git push*),Bash(git log*),Bash(git merge*),Bash(git fetch*),Bash(git checkout*),Bash(git branch*),Bash(uv run prek *),Bash(prek *),Bash(uv run ruff *),Bash(uv run pytest *),Bash(uv run mypy *),Bash(uv run coverage *),Bash(gh pr comment*),Bash(gh pr view*),Bash(gh pr diff*),Bash(gh pr merge*),Bash(gh pr close*)"'

View file

@ -3,7 +3,10 @@ name: CodeFlash
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
workflow_dispatch:
@ -28,7 +31,7 @@ jobs:
fetch-depth: 0
- name: 🐍 Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Async
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -59,7 +63,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Bubble Sort Benchmark
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -59,7 +63,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Bubble Sort Pytest (No Git)
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -58,7 +62,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Bubble Sort Unittest
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -58,7 +62,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: Coverage E2E
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -56,7 +60,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Futurehouse Structure
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -58,7 +62,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Init Optimization
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
concurrency:
@ -57,7 +61,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -72,7 +72,7 @@ jobs:
cache: maven
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -67,7 +67,7 @@ jobs:
cache: maven
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -0,0 +1,105 @@
name: E2E - Java Void Optimization (No Git)
on:
pull_request:
paths:
- 'codeflash/languages/java/**'
- 'codeflash/languages/base.py'
- 'codeflash/languages/registry.py'
- 'codeflash/optimization/**'
- 'codeflash/verification/**'
- 'code_to_optimize/java/**'
- 'codeflash-java-runtime/**'
- 'tests/scripts/end_to_end_test_java_void_optimization.py'
- '.github/workflows/e2e-java-void-optimization.yaml'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
jobs:
java-void-optimization-no-git:
environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
runs-on: ubuntu-latest
env:
CODEFLASH_AIS_SERVER: prod
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
COLUMNS: 110
MAX_RETRIES: 3
RETRY_DELAY: 5
EXPECTED_IMPROVEMENT_PCT: 70
CODEFLASH_END_TO_END: 1
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Validate PR
env:
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
PR_STATE: ${{ github.event.pull_request.state }}
BASE_SHA: ${{ github.event.pull_request.base.sha }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
run: |
if git diff --name-only "$BASE_SHA" "$HEAD_SHA" | grep -q "^.github/workflows/"; then
echo "⚠️ Workflow changes detected."
echo "PR Author: $PR_AUTHOR"
if [[ "$PR_AUTHOR" == "misrasaurabh1" || "$PR_AUTHOR" == "KRRT7" ]]; then
echo "✅ Authorized user ($PR_AUTHOR). Proceeding."
elif [[ "$PR_STATE" == "open" ]]; then
echo "✅ PR is open. Proceeding."
else
echo "⛔ Unauthorized user ($PR_AUTHOR) attempting to modify workflows. Exiting."
exit 1
fi
else
echo "✅ No workflow file changes detected. Proceeding."
fi
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
with:
python-version: 3.11.6
- name: Install dependencies (CLI)
run: uv sync
- name: Build codeflash-runtime JAR
run: |
cd codeflash-java-runtime
mvn clean package -q -DskipTests
mvn install -q -DskipTests
- name: Verify Java installation
run: |
java -version
mvn --version
- name: Remove .git
run: |
if [ -d ".git" ]; then
sudo rm -rf .git
echo ".git directory removed."
else
echo ".git directory does not exist."
exit 1
fi
- name: Run Codeflash to optimize void function
run: |
uv run python tests/scripts/end_to_end_test_java_void_optimization.py

View file

@ -3,7 +3,12 @@ name: E2E - JS CommonJS Function
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'packages/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -74,7 +79,7 @@ jobs:
npm install
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,12 @@ name: E2E - JS ESM Async
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'packages/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -74,7 +79,7 @@ jobs:
npm install
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,12 @@ name: E2E - JS TypeScript Class
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'packages/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -74,7 +79,7 @@ jobs:
npm install
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Topological Sort (Worktree)
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
@ -83,7 +87,7 @@ jobs:
fi
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -3,7 +3,11 @@ name: E2E - Tracer Replay
on:
pull_request:
paths:
- '**' # Trigger for all paths
- 'codeflash/**'
- 'tests/**'
- 'pyproject.toml'
- 'uv.lock'
- '.github/workflows/e2e-*.yaml'
workflow_dispatch:
concurrency:
@ -58,7 +62,7 @@ jobs:
- name: Set up Python 3.11 for CLI
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: 3.11.6

View file

@ -39,7 +39,7 @@ jobs:
cache: maven
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
- name: Set up Python environment
run: |

View file

@ -4,7 +4,17 @@ on:
push:
branches:
- main
paths:
- 'codeflash/**'
- 'pyproject.toml'
- 'uv.lock'
- 'mypy_allowlist.txt'
pull_request:
paths:
- 'codeflash/**'
- 'pyproject.toml'
- 'uv.lock'
- 'mypy_allowlist.txt'
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
@ -21,7 +31,7 @@ jobs:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
- name: sync uv
run: |

View file

@ -1,5 +1,12 @@
name: Lint
on: [pull_request]
on:
pull_request:
paths:
- 'codeflash/**'
- 'tests/**'
- 'packages/**'
- 'pyproject.toml'
- 'uv.lock'
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
@ -12,7 +19,7 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: astral-sh/setup-uv@v6
- uses: astral-sh/setup-uv@v8.0.0
- uses: j178/prek-action@v1
with:
extra-args: '--from-ref origin/${{ github.base_ref }} --to-ref ${{ github.sha }}'

View file

@ -78,7 +78,7 @@ jobs:
- name: Install uv
if: steps.check_tag.outputs.exists == 'false'
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
- name: Build
if: steps.check_tag.outputs.exists == 'false'
@ -150,7 +150,7 @@ jobs:
- name: Install uv
if: steps.check_tag.outputs.exists == 'false'
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
- name: Build
if: steps.check_tag.outputs.exists == 'false'

View file

@ -3,7 +3,23 @@ name: unit-tests
on:
push:
branches: [main]
paths:
- 'codeflash/**'
- 'codeflash-benchmark/**'
- 'codeflash-java-runtime/**'
- 'tests/**'
- 'packages/**'
- 'pyproject.toml'
- 'uv.lock'
pull_request:
paths:
- 'codeflash/**'
- 'codeflash-benchmark/**'
- 'codeflash-java-runtime/**'
- 'tests/**'
- 'packages/**'
- 'pyproject.toml'
- 'uv.lock'
workflow_dispatch:
concurrency:
@ -54,7 +70,7 @@ jobs:
mvn install -q -DskipTests
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.0.0
with:
python-version: ${{ matrix.python-version }}

View file

@ -0,0 +1,21 @@
package com.example;
public class InPlaceSorter {
public static void bubbleSortInPlace(int[] arr) {
if (arr == null || arr.length <= 1) {
return;
}
int n = arr.length;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n - 1; j++) {
if (arr[j] > arr[j + 1]) {
int temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
}
}
}

View file

@ -0,0 +1,21 @@
package com.example;
public class InstanceSorter {
public void bubbleSortInPlace(int[] arr) {
if (arr == null || arr.length <= 1) {
return;
}
int n = arr.length;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n - 1; j++) {
if (arr[j] > arr[j + 1]) {
int temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
}
}
}

View file

@ -0,0 +1,62 @@
package com.example;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class InPlaceSorterTest {
@Test
void testBubbleSortInPlace() {
int[] arr = {5, 3, 1, 4, 2};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 3, 4, 5}, arr);
}
@Test
void testBubbleSortInPlaceAlreadySorted() {
int[] arr = {1, 2, 3, 4, 5};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 3, 4, 5}, arr);
}
@Test
void testBubbleSortInPlaceReversed() {
int[] arr = {5, 4, 3, 2, 1};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 3, 4, 5}, arr);
}
@Test
void testBubbleSortInPlaceWithDuplicates() {
int[] arr = {3, 2, 4, 1, 3, 2};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 2, 3, 3, 4}, arr);
}
@Test
void testBubbleSortInPlaceWithNegatives() {
int[] arr = {3, -2, 7, 0, -5};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{-5, -2, 0, 3, 7}, arr);
}
@Test
void testBubbleSortInPlaceSingleElement() {
int[] arr = {42};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{42}, arr);
}
@Test
void testBubbleSortInPlaceEmpty() {
int[] arr = {};
InPlaceSorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{}, arr);
}
@Test
void testBubbleSortInPlaceNull() {
InPlaceSorter.bubbleSortInPlace(null);
}
}

View file

@ -0,0 +1,69 @@
package com.example;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class InstanceSorterTest {
@Test
void testBubbleSortInPlace() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {5, 3, 1, 4, 2};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 3, 4, 5}, arr);
}
@Test
void testBubbleSortInPlaceAlreadySorted() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {1, 2, 3, 4, 5};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 3, 4, 5}, arr);
}
@Test
void testBubbleSortInPlaceReversed() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {5, 4, 3, 2, 1};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 3, 4, 5}, arr);
}
@Test
void testBubbleSortInPlaceWithDuplicates() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {3, 2, 4, 1, 3, 2};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{1, 2, 2, 3, 3, 4}, arr);
}
@Test
void testBubbleSortInPlaceWithNegatives() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {3, -2, 7, 0, -5};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{-5, -2, 0, 3, 7}, arr);
}
@Test
void testBubbleSortInPlaceSingleElement() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {42};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{42}, arr);
}
@Test
void testBubbleSortInPlaceEmpty() {
InstanceSorter sorter = new InstanceSorter();
int[] arr = {};
sorter.bubbleSortInPlace(arr);
assertArrayEquals(new int[]{}, arr);
}
@Test
void testBubbleSortInPlaceNull() {
InstanceSorter sorter = new InstanceSorter();
sorter.bubbleSortInPlace(null);
}
}

View file

@ -209,6 +209,87 @@ class ComparatorCorrectnessTest {
assertFalse(Comparator.isDeserializationError(42));
}
// ============================================================
// VOID METHOD STATE COMPARISON proves we actually compare
// post-call state for void methods, not just skip them
// ============================================================
@Test
@DisplayName("void state: both sides sorted identically → equivalent")
void testVoidState_identicalMutation_equivalent() throws Exception {
createTestDb(originalDb);
createTestDb(candidateDb);
// Simulate: bubbleSortInPlace(arr) both original and candidate sort correctly
// Post-call state: Object[]{sorted_array}
int[] sortedArr = {1, 2, 3, 4, 5};
byte[] origState = Serializer.serialize(new Object[]{sortedArr});
byte[] candState = Serializer.serialize(new Object[]{new int[]{1, 2, 3, 4, 5}});
insertRow(originalDb, "L1_1", 1, origState);
insertRow(candidateDb, "L1_1", 1, candState);
String json = Comparator.compareDatabases(originalDb.toString(), candidateDb.toString());
Map<String, Object> result = parseJson(json);
assertTrue((Boolean) result.get("equivalent"),
"Both sides produce same sorted array — should be equivalent");
assertEquals(1, ((Number) result.get("actualComparisons")).intValue());
}
@Test
@DisplayName("void state: candidate mutates array differently → NOT equivalent")
void testVoidState_differentMutation_rejected() throws Exception {
createTestDb(originalDb);
createTestDb(candidateDb);
// Simulate: original sorts [3,1,2] [1,2,3]
// Bad optimization doesn't sort correctly [3,1,2] unchanged
byte[] origState = Serializer.serialize(new Object[]{new int[]{1, 2, 3}});
byte[] candState = Serializer.serialize(new Object[]{new int[]{3, 1, 2}});
insertRow(originalDb, "L1_1", 1, origState);
insertRow(candidateDb, "L1_1", 1, candState);
String json = Comparator.compareDatabases(originalDb.toString(), candidateDb.toString());
Map<String, Object> result = parseJson(json);
assertFalse((Boolean) result.get("equivalent"),
"Candidate produced wrong array — must be rejected");
assertEquals(1, ((Number) result.get("actualComparisons")).intValue());
}
@Test
@DisplayName("void state: receiver + args both compared — wrong receiver state rejected")
void testVoidState_receiverAndArgs_wrongReceiverRejected() throws Exception {
createTestDb(originalDb);
createTestDb(candidateDb);
// Simulate: instance method sorter.sort(data)
// Post-call state is Object[]{receiver_fields_map, mutated_data}
// Original: receiver has size=3, data is [1,2,3]
// Candidate: receiver has size=0 (wrong), data is [1,2,3]
Map<String, Object> origReceiver = new HashMap<>();
origReceiver.put("size", 3);
origReceiver.put("sorted", true);
Map<String, Object> candReceiver = new HashMap<>();
candReceiver.put("size", 0);
candReceiver.put("sorted", true);
byte[] origState = Serializer.serialize(new Object[]{origReceiver, new int[]{1, 2, 3}});
byte[] candState = Serializer.serialize(new Object[]{candReceiver, new int[]{1, 2, 3}});
insertRow(originalDb, "L1_1", 1, origState);
insertRow(candidateDb, "L1_1", 1, candState);
String json = Comparator.compareDatabases(originalDb.toString(), candidateDb.toString());
Map<String, Object> result = parseJson(json);
assertFalse((Boolean) result.get("equivalent"),
"Receiver state differs (size 3 vs 0) — must be rejected even though args match");
assertEquals(1, ((Number) result.get("actualComparisons")).intValue());
}
// --- Helpers ---
private void createTestDb(Path dbPath) throws Exception {

View file

@ -17,7 +17,7 @@ import tomlkit
from codeflash.cli_cmds.console import logger, paneled_text
from codeflash.code_utils.config_parser import find_pyproject_toml, get_all_closest_config_files
from codeflash.lsp.helpers import is_LSP_enabled
from codeflash.lsp.helpers import is_LSP_enabled, is_subagent_mode
_INVALID_CHARS_NT = {"<", ">", ":", '"', "|", "?", "*"}
@ -471,6 +471,11 @@ def exit_with_message(message: str, *, error_on_exit: bool = False) -> None:
if is_LSP_enabled():
logger.error(message)
return
if is_subagent_mode():
from xml.sax.saxutils import escape
sys.stdout.write(f"<codeflash-error>{escape(message)}</codeflash-error>\n")
sys.exit(1 if error_on_exit else 0)
paneled_text(message, panel_args={"style": "red"})
sys.exit(1 if error_on_exit else 0)

View file

@ -195,7 +195,8 @@ def _find_all_functions_via_language_support(file_path: Path) -> dict[Path, list
try:
lang_support = get_language_support(file_path)
criteria = FunctionFilterCriteria(require_return=True)
require_return = lang_support.language != Language.JAVA
criteria = FunctionFilterCriteria(require_return=require_return)
functions[file_path] = lang_support.discover_functions(file_path, criteria)
except Exception as e:
logger.debug(f"Failed to discover functions in {file_path}: {e}")
@ -454,7 +455,8 @@ def find_all_functions_in_file(file_path: Path) -> dict[Path, list[FunctionToOpt
from codeflash.languages.base import FunctionFilterCriteria
lang_support = get_language_support(file_path)
criteria = FunctionFilterCriteria(require_return=True)
require_return = lang_support.language != Language.JAVA
criteria = FunctionFilterCriteria(require_return=require_return)
source = file_path.read_text(encoding="utf-8")
return {file_path: lang_support.discover_functions(source, file_path, criteria)}
except Exception as e:

View file

@ -44,7 +44,8 @@ gradle.projectsEvaluated {
'spotbugsMain', 'spotbugsTest',
'pmdMain', 'pmdTest',
'rat', 'japicmp',
'jarHell', 'thirdPartyAudit'
'jarHell', 'thirdPartyAudit',
'spotlessCheck', 'spotlessApply', 'spotlessJava', 'spotlessKotlin', 'spotlessScala'
]
}.configureEach {
enabled = false

View file

@ -203,6 +203,7 @@ def _generate_sqlite_write_code(
func_name: str,
test_method_name: str,
invocation_id: str = "",
verification_type: str = "function_call",
) -> list[str]:
"""Generate SQLite write code for a single function call.
@ -249,7 +250,7 @@ def _generate_sqlite_write_code(
f'{inner_indent} _cf_pstmt{id_pair}.setString(6, "{inv_id_str}");',
f"{inner_indent} _cf_pstmt{id_pair}.setLong(7, _cf_dur{id_pair});",
f"{inner_indent} _cf_pstmt{id_pair}.setBytes(8, _cf_serializedResult{id_pair});",
f'{inner_indent} _cf_pstmt{id_pair}.setString(9, "function_call");',
f'{inner_indent} _cf_pstmt{id_pair}.setString(9, "{verification_type}");',
f"{inner_indent} _cf_pstmt{id_pair}.executeUpdate();",
f"{inner_indent} }}",
f"{inner_indent} }}",
@ -337,22 +338,53 @@ def wrap_target_calls_with_treesitter(
orig_line = body_lines[line_idx]
line_indent_str = " " * (len(orig_line) - len(orig_line.lstrip()))
is_void = target_return_type == "void"
var_name = f"_cf_result{iter_id}_{call_counter}"
receiver = call.get("receiver", "this")
arg_texts: list[str] = call.get("arg_texts", [])
cast_type = _infer_array_cast_type(orig_line)
if not cast_type and target_return_type and target_return_type != "void":
if not cast_type and target_return_type and not is_void:
cast_type = target_return_type
var_with_cast = f"({cast_type}){var_name}" if cast_type else var_name
capture_stmt_with_decl = f"var {var_name} = {call['full_call']};"
capture_stmt_assign = f"{var_name} = {call['full_call']};"
if precise_call_timing:
serialize_stmt = f"_cf_serializedResult{iter_id}_{call_counter} = com.codeflash.Serializer.serialize((Object) {var_name});"
start_stmt = f"_cf_start{iter_id}_{call_counter} = System.nanoTime();"
end_stmt = f"_cf_end{iter_id}_{call_counter} = System.nanoTime();"
if is_void:
bare_call_stmt = f"{call['full_call']};"
# For void methods, serialize the post-call state to capture side effects.
# We always serialize the arguments (which are mutated in place).
# For instance methods, we also include the receiver to capture object state changes.
# For static methods, the receiver is a class name (not a value), so args only.
is_static_call = receiver != "this" and receiver[:1].isupper()
parts: list[str] = []
if not is_static_call:
parts.append(receiver)
parts.extend(arg_texts)
if parts:
serialize_target = f"new Object[]{{{', '.join(parts)}}}"
else:
serialize_target = "new Object[]{}"
if precise_call_timing:
serialize_stmt = f"_cf_serializedResult{iter_id}_{call_counter} = com.codeflash.Serializer.serialize({serialize_target});"
start_stmt = f"_cf_start{iter_id}_{call_counter} = System.nanoTime();"
end_stmt = f"_cf_end{iter_id}_{call_counter} = System.nanoTime();"
else:
serialize_stmt = (
f"_cf_serializedResult{iter_id} = com.codeflash.Serializer.serialize({serialize_target});"
)
start_stmt = f"_cf_start{iter_id} = System.nanoTime();"
end_stmt = f"_cf_end{iter_id} = System.nanoTime();"
else:
serialize_stmt = f"_cf_serializedResult{iter_id} = com.codeflash.Serializer.serialize((Object) {var_name});"
start_stmt = f"_cf_start{iter_id} = System.nanoTime();"
end_stmt = f"_cf_end{iter_id} = System.nanoTime();"
capture_stmt_with_decl = f"var {var_name} = {call['full_call']};"
capture_stmt_assign = f"{var_name} = {call['full_call']};"
if precise_call_timing:
serialize_stmt = f"_cf_serializedResult{iter_id}_{call_counter} = com.codeflash.Serializer.serialize((Object) {var_name});"
start_stmt = f"_cf_start{iter_id}_{call_counter} = System.nanoTime();"
end_stmt = f"_cf_end{iter_id}_{call_counter} = System.nanoTime();"
else:
serialize_stmt = (
f"_cf_serializedResult{iter_id} = com.codeflash.Serializer.serialize((Object) {var_name});"
)
start_stmt = f"_cf_start{iter_id} = System.nanoTime();"
end_stmt = f"_cf_end{iter_id} = System.nanoTime();"
if call["parent_type"] == "expression_statement":
es_start = call["_es_start_char"]
@ -360,31 +392,61 @@ def wrap_target_calls_with_treesitter(
if precise_call_timing:
# No indent on first line — body_text[:es_start] already has leading whitespace.
# Subsequent lines get line_indent_str.
var_decls = [
f"Object {var_name} = null;",
f"long _cf_end{iter_id}_{call_counter} = -1;",
f"long _cf_start{iter_id}_{call_counter} = 0;",
f"byte[] _cf_serializedResult{iter_id}_{call_counter} = null;",
]
if is_void:
var_decls = [
f"long _cf_end{iter_id}_{call_counter} = -1;",
f"long _cf_start{iter_id}_{call_counter} = 0;",
f"byte[] _cf_serializedResult{iter_id}_{call_counter} = null;",
]
else:
var_decls = [
f"Object {var_name} = null;",
f"long _cf_end{iter_id}_{call_counter} = -1;",
f"long _cf_start{iter_id}_{call_counter} = 0;",
f"byte[] _cf_serializedResult{iter_id}_{call_counter} = null;",
]
start_marker = f'System.out.println("!$######" + _cf_mod{iter_id} + ":" + _cf_cls{iter_id} + "." + _cf_test{iter_id} + ":" + _cf_fn{iter_id} + ":" + _cf_loop{iter_id} + ":{inv_id}" + "######$!");'
try_block = [
"try {",
f" {start_stmt}",
f" {capture_stmt_assign}",
f" {end_stmt}",
f" {serialize_stmt}",
]
if is_void:
try_block = [
"try {",
f" {start_stmt}",
f" {bare_call_stmt}",
f" {end_stmt}",
f" {serialize_stmt}",
]
else:
try_block = [
"try {",
f" {start_stmt}",
f" {capture_stmt_assign}",
f" {end_stmt}",
f" {serialize_stmt}",
]
finally_block = _generate_sqlite_write_code(
iter_id, call_counter, "", class_name, func_name, test_method_name, invocation_id=inv_id
iter_id,
call_counter,
"",
class_name,
func_name,
test_method_name,
invocation_id=inv_id,
verification_type="void_state" if is_void else "function_call",
)
all_lines = [*var_decls, start_marker, *try_block, *finally_block]
replacement = (
all_lines[0] + "\n" + "\n".join(f"{line_indent_str}{repl_line}" for repl_line in all_lines[1:])
)
elif is_void:
replacement = f"{bare_call_stmt} {serialize_stmt}"
else:
replacement = f"{capture_stmt_with_decl} {serialize_stmt}"
body_text = body_text[:es_start] + replacement + body_text[es_end:]
else:
if is_void:
# Void calls cannot be embedded in expressions in valid Java — skip instrumentation
logger.warning("Skipping instrumentation of embedded void call: %s", call["full_call"])
continue
# Embedded call: replace call with variable, then insert capture lines before the line
call_start = call["_call_start_char"]
call_end = call["_call_end_char"]
@ -451,6 +513,15 @@ def _collect_calls(
if parent_type == "expression_statement":
es_start = parent.start_byte - prefix_len
es_end = parent.end_byte - prefix_len
object_node = node.child_by_field_name("object")
receiver = analyzer.get_node_text(object_node, wrapper_bytes) if object_node else "this"
# Extract argument texts for void method serialization
args_node = node.child_by_field_name("arguments")
arg_texts: list[str] = []
if args_node:
for child in args_node.children:
if child.type not in ("(", ")", ","):
arg_texts.append(analyzer.get_node_text(child, wrapper_bytes))
out.append(
{
"start_byte": start,
@ -461,6 +532,8 @@ def _collect_calls(
"in_complex": _is_inside_complex_expression(node),
"es_start_byte": es_start,
"es_end_byte": es_end,
"receiver": receiver,
"arg_texts": arg_texts,
}
)
for child in node.children:

View file

@ -43,6 +43,8 @@ _MAVEN_VALIDATION_SKIP_FLAGS = [
"-Denforcer.skip=true",
"-Djapicmp.skip=true",
"-Derrorprone.skip=true",
"-Dspotless.check.skip=true",
"-Dspotless.apply.skip=true",
"-Dmaven.compiler.failOnWarning=false",
"-Dmaven.compiler.showWarnings=false",
]

View file

@ -189,6 +189,7 @@ class JavaAssertTransformer:
qualified_name: str | None = None,
analyzer: JavaAnalyzer | None = None,
mode: str = "capture",
target_return_type: str = "",
) -> None:
self.analyzer = analyzer or get_java_analyzer()
self.func_name = function_name
@ -196,6 +197,7 @@ class JavaAssertTransformer:
self.invocation_counter = 0
self._detected_framework: str | None = None
self.mode = mode # "capture" (default, instrumentation) or "strip" (clean display)
self.target_return_type = target_return_type
# Precompile the assignment-detection regex to avoid recompiling on each call.
self._assign_re = re.compile(r"(\w+(?:<[^>]+>)?)\s+(\w+)\s*=\s*$")
@ -1062,7 +1064,7 @@ class JavaAssertTransformer:
if not assertion.target_calls:
return ""
if self.mode == "strip":
if self.mode == "strip" or self.target_return_type == "void":
return self._generate_strip_replacement(assertion)
# Infer the return type from assertion context to avoid Object→primitive cast errors
@ -1244,7 +1246,9 @@ class JavaAssertTransformer:
return "".join(cur).rstrip()
def transform_java_assertions(source: str, function_name: str, qualified_name: str | None = None) -> str:
def transform_java_assertions(
source: str, function_name: str, qualified_name: str | None = None, target_return_type: str = ""
) -> str:
"""Transform Java test code by removing assertions and capturing function calls.
This is the main entry point for Java assertion transformation.
@ -1253,12 +1257,15 @@ def transform_java_assertions(source: str, function_name: str, qualified_name: s
source: The Java test source code.
function_name: Name of the function being tested.
qualified_name: Optional fully qualified name of the function.
target_return_type: Return type of the target function (e.g., "void", "int").
Returns:
Transformed source code with assertions replaced by capture statements.
"""
transformer = JavaAssertTransformer(function_name=function_name, qualified_name=qualified_name)
transformer = JavaAssertTransformer(
function_name=function_name, qualified_name=qualified_name, target_return_type=target_return_type
)
return transformer.transform(source)

View file

@ -2268,7 +2268,10 @@ class JavaScriptSupport:
source_without_ext = source_file_abs.with_suffix("")
# Use os.path.relpath to compute relative path from tests_root to source file
rel_path = os.path.relpath(str(source_without_ext), str(tests_root_abs))
# Replace backslashes with forward slashes — JavaScript import/require paths
# must use forward slashes. Backslashes are escape chars in JS strings
# (e.g. \t → tab, \n → newline) and would break imports on Windows.
rel_path = os.path.relpath(str(source_without_ext), str(tests_root_abs)).replace("\\", "/")
# For ESM, add .js extension (TypeScript convention)
# TypeScript requires imports to reference the OUTPUT file extension (.js),

View file

@ -369,7 +369,9 @@ def _create_runtime_jest_config(base_config_path: Path | None, project_root: Pat
runtime_config_path = config_dir / f"jest.codeflash.runtime.config{config_ext}"
test_dirs_js = ", ".join(f"'{d}'" for d in sorted(test_dirs))
# Normalize to forward slashes — backslashes in JS strings are escape chars
# (e.g. \t → tab, \n → newline) and would corrupt paths on Windows.
test_dirs_js = ", ".join(f"'{d.replace(chr(92), '/')}'" for d in sorted(test_dirs))
# In monorepos, add the root node_modules to moduleDirectories so Jest
# can resolve workspace packages that are hoisted to the monorepo root.
@ -382,6 +384,8 @@ def _create_runtime_jest_config(base_config_path: Path | None, project_root: Pat
else:
module_dirs_line_no_base = ""
project_root_posix = project_root.as_posix()
# TypeScript configs (.ts) cannot be required from CommonJS modules
# because Node.js cannot parse TypeScript syntax in require().
# When the base config is TypeScript, we create a standalone config
@ -403,7 +407,7 @@ module.exports = {{
else:
config_content = f"""// Auto-generated by codeflash - runtime config with test roots
module.exports = {{
roots: ['{project_root}', {test_dirs_js}],
roots: ['{project_root_posix}', {test_dirs_js}],
testMatch: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
{module_dirs_line_no_base}}};
"""

View file

@ -8,8 +8,7 @@ from __future__ import annotations
import os
import sys
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Callable
if "--subagent" in sys.argv:
os.environ["CODEFLASH_SUBAGENT_MODE"] = "true"
@ -17,19 +16,26 @@ if "--subagent" in sys.argv:
warnings.filterwarnings("ignore")
from codeflash.cli_cmds.cli import parse_args, process_pyproject_config
from codeflash.cli_cmds.console import paneled_text
from codeflash.code_utils import env_utils
from codeflash.code_utils.checkpoint import ask_should_use_checkpoint_get_functions
from codeflash.code_utils.config_parser import parse_config_file
from codeflash.code_utils.version_check import check_for_newer_minor_version
if TYPE_CHECKING:
from argparse import Namespace
def main() -> None:
"""Entry point for the codeflash command-line interface."""
# Fast path: --version exits before importing the full stack
if len(sys.argv) == 2 and sys.argv[1] == "--version":
from codeflash.version import __version__
print(f"Codeflash version {__version__}")
return
from pathlib import Path
from codeflash.cli_cmds.cli import parse_args, process_pyproject_config
from codeflash.code_utils import env_utils
from codeflash.code_utils.checkpoint import ask_should_use_checkpoint_get_functions
from codeflash.code_utils.config_parser import parse_config_file
from codeflash.code_utils.version_check import check_for_newer_minor_version
from codeflash.telemetry import posthog_cf
from codeflash.telemetry.sentry import init_sentry
@ -89,7 +95,7 @@ def main() -> None:
ask_run_end_to_end_test(args)
else:
# Check for first-run experience (no config exists)
loaded_args = _handle_config_loading(args)
loaded_args = _handle_config_loading(args, process_pyproject_config)
if loaded_args is None:
sys.exit(0)
args = loaded_args
@ -105,7 +111,9 @@ def main() -> None:
optimizer.run_with_args(args)
def _handle_config_loading(args: Namespace) -> Namespace | None:
def _handle_config_loading(
args: Namespace, process_pyproject_config: Callable[[Namespace], Namespace]
) -> Namespace | None:
"""Handle config loading with first-run experience support.
If no config exists and not in CI, triggers the first-run experience.
@ -113,6 +121,7 @@ def _handle_config_loading(args: Namespace) -> Namespace | None:
Args:
args: CLI args namespace.
process_pyproject_config: Config processing function.
Returns:
Updated args with config loaded, or None if user cancelled first-run.
@ -157,6 +166,7 @@ def print_codeflash_banner() -> None:
Renders the Codeflash ASCII logo inside a non-expanding panel titled with
https://codeflash.ai, using bold gold text for visual emphasis.
"""
from codeflash.cli_cmds.console import paneled_text
from codeflash.cli_cmds.console_constants import CODEFLASH_LOGO
paneled_text(

View file

@ -740,6 +740,7 @@ class VerificationType(str, Enum):
)
INIT_STATE_FTO = "init_state_fto" # Correctness verification for fto class instance attributes after init
INIT_STATE_HELPER = "init_state_helper" # Correctness verification for helper class instance attributes after init
VOID_STATE = "void_state" # Correctness verification for void methods (no return value)
@dataclass(frozen=True, slots=True)

View file

@ -1,15 +1,11 @@
from __future__ import annotations
import logging
from typing import Any
from typing import TYPE_CHECKING, Any
from posthog import Posthog
if TYPE_CHECKING:
from posthog import Posthog
from codeflash.api.cfapi import get_user_id
from codeflash.cli_cmds.console import logger
from codeflash.version import __version__
_posthog = None
_posthog: Posthog | None = None
def initialize_posthog(*, enabled: bool = True) -> None:
@ -20,6 +16,10 @@ def initialize_posthog(*, enabled: bool = True) -> None:
if not enabled:
return
import logging
from posthog import Posthog
global _posthog
_posthog = Posthog(project_api_key="phc_aUO790jHd7z1SXwsYCz8dRApxueplZlZWeDSpKc5hol", host="https://us.posthog.com")
_posthog.log.setLevel(logging.CRITICAL) # Suppress PostHog logging
@ -35,12 +35,18 @@ def ph(event: str, properties: dict[str, Any] | None = None) -> None:
if _posthog is None:
return
from codeflash.api.cfapi import get_user_id
from codeflash.lsp.helpers import is_subagent_mode
from codeflash.version import __version__
properties = properties or {}
properties.update({"cli_version": __version__})
properties.update({"cli_version": __version__, "subagent": is_subagent_mode()})
user_id = get_user_id()
if user_id:
_posthog.capture(distinct_id=user_id, event=event, properties=properties)
else:
from codeflash.cli_cmds.console import logger
logger.debug("Failed to log event to PostHog: User ID could not be retrieved.")

View file

@ -1,24 +1,25 @@
import logging
import sentry_sdk
from sentry_sdk.integrations.logging import LoggingIntegration
from sentry_sdk.integrations.stdlib import StdlibIntegration
def init_sentry(*, enabled: bool = False, exclude_errors: bool = False) -> None:
if enabled:
sentry_logging = LoggingIntegration(
level=logging.INFO, # Capture info and above as breadcrumbs
event_level=logging.CRITICAL # Send only fatal errors as events if exclude_errors is True
if exclude_errors
else logging.ERROR, # Otherwise, error logs will create sentry events
)
if not enabled:
return
sentry_sdk.init(
dsn="https://4b9a1902f9361b48c04376df6483bc96@o4506833230561280.ingest.sentry.io/4506833262477312",
integrations=[sentry_logging],
disabled_integrations=[StdlibIntegration],
traces_sample_rate=0,
profiles_sample_rate=0,
ignore_errors=[KeyboardInterrupt],
)
import logging
import sentry_sdk
from sentry_sdk.integrations.logging import LoggingIntegration
from sentry_sdk.integrations.stdlib import StdlibIntegration
sentry_logging = LoggingIntegration(
level=logging.INFO, # Capture info and above as breadcrumbs
event_level=logging.CRITICAL # Send only fatal errors as events if exclude_errors is True
if exclude_errors
else logging.ERROR, # Otherwise, error logs will create sentry events
)
sentry_sdk.init(
dsn="https://4b9a1902f9361b48c04376df6483bc96@o4506833230561280.ingest.sentry.io/4506833262477312",
integrations=[sentry_logging],
disabled_integrations=[StdlibIntegration],
traces_sample_rate=0,
profiles_sample_rate=0,
ignore_errors=[KeyboardInterrupt],
)

View file

@ -54,14 +54,17 @@ class JestCoverageUtils:
return CoverageData.create_empty(source_code_path, function_name, code_context)
# Find the file entry in coverage data
# Jest uses absolute paths as keys
# Jest/Vitest always writes coverage keys with forward slashes (POSIX paths),
# so we normalize our paths to POSIX for comparison — critical on Windows
# where Path.resolve() and str(Path) produce backslash paths.
file_coverage = None
source_path_str = str(source_code_path.resolve())
source_path_posix = source_code_path.resolve().as_posix()
source_relative_posix = source_code_path.as_posix()
for file_path, file_data in coverage_data.items():
# Match exact path or path ending with full relative path from src/
# Avoid matching files with same name in different directories (e.g., db/utils.ts vs utils/utils.ts)
if file_path == source_path_str or file_path.endswith(str(source_code_path)):
if file_path == source_path_posix or file_path.endswith(source_relative_posix):
file_coverage = file_data
break

View file

@ -3,20 +3,20 @@ title: "How Codeflash Works"
description: "Understand Codeflash's generate-and-verify approach to code optimization and correctness verification"
icon: "gear"
sidebarTitle: "How It Works"
keywords: ["architecture", "verification", "correctness", "testing", "optimization", "LLM", "benchmarking", "javascript", "typescript", "python"]
keywords: ["architecture", "verification", "correctness", "testing", "optimization", "LLM", "benchmarking", "javascript", "typescript", "python", "java"]
---
# How Codeflash Works
Codeflash follows a "generate and verify" approach to optimize code. It uses LLMs to generate optimizations, then it rigorously verifies if those optimizations are indeed
faster and if they have the same behavior. The basic unit of optimization is a function—Codeflash tries to speed up the function, and tries to ensure that it still behaves the same way. This way if you merge the optimized code, it simply runs faster without breaking any functionality.
Codeflash supports **Python**, **JavaScript**, and **TypeScript** projects.
Codeflash supports **Python**, **JavaScript**, **TypeScript**, and **Java** projects.
## Analysis of your code
Codeflash scans your codebase to identify all available functions. It locates existing unit tests in your projects and maps which functions they test. When optimizing a function, Codeflash runs these discovered tests to verify nothing has broken.
For Python, code analysis uses `libcst` and `jedi`. For JavaScript/TypeScript, it uses `tree-sitter` for AST parsing.
For Python, code analysis uses `libcst` and `jedi`. For JavaScript/TypeScript and Java, it uses `tree-sitter` for AST parsing.
#### What kind of functions can Codeflash optimize?
@ -25,7 +25,7 @@ Codeflash supports optimizing async functions in all supported languages.
#### Test Discovery
Codeflash discovers tests that directly call the target function in their test body. For Python, it finds pytest and unittest tests. For JavaScript/TypeScript, it finds Jest and Vitest test files.
Codeflash discovers tests that directly call the target function in their test body. For Python, it finds pytest and unittest tests. For JavaScript/TypeScript, it finds Jest and Vitest test files. For Java, it finds JUnit 5, JUnit 4, and TestNG test classes.
To discover tests that indirectly call the function, you can use the Codeflash Tracer. The Tracer analyzes your test suite and identifies all tests that eventually call a function.
@ -54,12 +54,12 @@ We recommend manually reviewing the optimized code since there might be importan
Codeflash generates two types of tests:
- **LLM Generated tests** - Codeflash uses LLMs to create several regression test cases that cover typical function usage, edge cases, and large-scale inputs to verify both correctness and performance. This works for Python, JavaScript, and TypeScript.
- **LLM Generated tests** - Codeflash uses LLMs to create several regression test cases that cover typical function usage, edge cases, and large-scale inputs to verify both correctness and performance. This works for Python, JavaScript, TypeScript, and Java.
- **Concolic coverage tests** - Codeflash uses state-of-the-art concolic testing with an SMT Solver (a theorem prover) to explore execution paths and generate function arguments. This aims to maximize code coverage for the function being optimized. Currently, this feature only supports Python (pytest).
## Code Execution
Codeflash runs tests for the target function on your machine. For Python, it uses pytest or unittest. For JavaScript/TypeScript, it uses Jest or Vitest. Running on your machine ensures access to your environment and dependencies, and provides accurate performance measurements since runtime varies by system.
Codeflash runs tests for the target function on your machine. For Python, it uses pytest or unittest. For JavaScript/TypeScript, it uses Jest or Vitest. For Java, it uses Maven Surefire or Gradle's test task. Running on your machine ensures access to your environment and dependencies, and provides accurate performance measurements since runtime varies by system.
#### Performance benchmarking

View file

@ -47,7 +47,38 @@ uv tool install codeflash
```
</Step>
<Step title="Initialize your project">
<Step title="Authenticate with Codeflash">
Codeflash uses cloud-hosted AI models. You need to authenticate before running any commands.
**Option A: Browser login (recommended)**
```bash
codeflash auth login
```
This opens your browser to sign in with your GitHub account. Your API key is saved automatically to your shell profile.
If you're on a remote server without a browser, a URL will be displayed that you can open on any device.
**Option B: API key**
1. Visit the [Codeflash Web App](https://app.codeflash.ai/) and sign up with your GitHub account (free tier available)
2. Navigate to the [API Key](https://app.codeflash.ai/app/apikeys) page to generate your key
3. Set it as an environment variable:
```bash
export CODEFLASH_API_KEY="your-api-key-here"
```
Add this to your shell profile (`~/.bashrc`, `~/.zshrc`) so it persists across sessions.
<Info>
If you skip this step, `codeflash init` will prompt you to authenticate interactively.
</Info>
</Step>
<Step title="Initialize your project (recommended)">
Navigate to your Java project root (where `pom.xml` or `build.gradle` is) and run:
@ -55,10 +86,33 @@ Navigate to your Java project root (where `pom.xml` or `build.gradle` is) and ru
codeflash init
```
This will:
- Detect your build tool (Maven/Gradle)
- Find your source and test directories
- Write Codeflash configuration to your `pom.xml` properties (Maven) or `gradle.properties` (Gradle)
The init command will:
1. **Auto-detect your project** — find your build tool, source root (e.g., `src/main/java`), test root (e.g., `src/test/java`), and test framework
2. **Confirm settings** — show the detected values and ask if you want to change anything
3. **Configure formatter** — let you set up a code formatter (e.g., Spotless, google-java-format)
4. **Install GitHub App** — offer to set up the [Codeflash GitHub App](https://github.com/apps/codeflash-ai/installations/select_target) for automatic PR creation (see next step)
5. **Install GitHub Actions** — offer to add a CI workflow for automated optimization on PRs
Only non-default settings are written to your `pom.xml` properties (Maven) or `gradle.properties` (Gradle). For standard layouts, no config changes are needed.
<Info>
**Can I skip init?** Yes. For standard Maven/Gradle projects, Codeflash auto-detects your project structure from `pom.xml` or `build.gradle` at runtime. If you're already authenticated and your project uses a standard layout (`src/main/java`, `src/test/java`), you can skip straight to optimizing.
Init is recommended because it also sets up the GitHub App and Actions workflow, and lets you override paths for non-standard project layouts (e.g., multi-module projects where source is under `client/src/`).
</Info>
</Step>
<Step title="Install the Codeflash GitHub App (recommended)">
To have Codeflash create pull requests with optimizations automatically, install the GitHub App:
[Install Codeflash GitHub App](https://github.com/apps/codeflash-ai/installations/select_target)
Select the repositories you want Codeflash to optimize. This allows the codeflash-ai bot to open PRs with optimization suggestions in your repository.
<Info>
If you prefer to try Codeflash locally first, you can skip this step and use the `--no-pr` flag to apply optimizations directly to your local files (see next step).
</Info>
</Step>
<Step title="Run your first optimization">
@ -69,6 +123,12 @@ Optimize a specific function:
codeflash --file src/main/java/com/example/Utils.java --function myMethod
```
If you installed the GitHub App, Codeflash will create a pull request with the optimization. If you haven't installed the app yet, or prefer to review changes locally first, add `--no-pr`:
```bash
codeflash --file src/main/java/com/example/Utils.java --function myMethod --no-pr
```
Or optimize all functions in your project:
```bash
@ -80,7 +140,7 @@ Codeflash will:
2. Generate tests and optimization candidates using AI
3. Verify correctness by running tests (JUnit 5, JUnit 4, or TestNG)
4. Benchmark performance improvements
5. Create a pull request with the optimization (if the GitHub App is installed)
5. Create a pull request with the optimization (or apply locally with `--no-pr`)
For advanced workflow tracing (profiling a running Java program), see [Trace & Optimize](/optimizing-with-codeflash/trace-and-optimize).

View file

@ -3,7 +3,7 @@ title: "Optimize Your Entire Codebase"
description: "Automatically optimize all codepaths in your project with Codeflash's comprehensive analysis"
icon: "database"
sidebarTitle: "Optimize Entire Codebase"
keywords: ["codebase optimization", "all functions", "batch optimization", "github app", "checkpoint", "recovery", "javascript", "typescript", "python"]
keywords: ["codebase optimization", "all functions", "batch optimization", "github app", "checkpoint", "recovery", "javascript", "typescript", "python", "java"]
---
# Optimize your entire codebase
@ -45,6 +45,11 @@ codeflash --all path/to/dir
codeflash optimize --trace-only --vitest ; codeflash --all
```
</Tab>
<Tab title="Java">
```bash
codeflash optimize --timeout 60 java -cp target/classes com.example.Main ; codeflash --all
```
</Tab>
</Tabs>
This runs your test suite, traces all the code covered by your tests, ensuring higher correctness guarantees

View file

@ -93,5 +93,7 @@ codeflash --file path/to/your/file.ts --function ClassName.methodName
```bash
codeflash --file src/main/java/com/example/Utils.java --function methodName
```
In Java, use just the method name — no `ClassName.` prefix is needed. Codeflash discovers the method by name within the specified file.
</Tab>
</Tabs>

View file

@ -60,12 +60,12 @@ codeflash optimize --language javascript script.js
To trace and optimize a running Java program, replace your `java` command with `codeflash optimize java`:
```bash
# JAR application
codeflash optimize java -jar target/my-app.jar --app-args
# Class with classpath
# Class with classpath (recommended — works with any compiled project)
codeflash optimize java -cp target/classes com.example.Main
# Executable JAR (requires maven-jar-plugin or equivalent with Main-Class manifest)
codeflash optimize java -jar target/my-app.jar --app-args
# Maven exec
codeflash optimize mvn exec:java -Dexec.mainClass="com.example.Main"
```
@ -73,7 +73,7 @@ codeflash optimize mvn exec:java -Dexec.mainClass="com.example.Main"
For long-running programs (servers, benchmarks), use `--timeout` to limit each tracing stage:
```bash
codeflash optimize --timeout 30 java -jar target/my-app.jar
codeflash optimize --timeout 30 java -cp target/classes com.example.Main
```
</Tab>
</Tabs>
@ -228,13 +228,15 @@ The Java tracer uses a **two-stage approach**: JFR (Java Flight Recorder) for ac
Replace your `java` command with `codeflash optimize java`:
```bash
# JAR application
codeflash optimize java -jar target/my-app.jar --app-args
# Class with classpath
# Class with classpath (recommended — works with any compiled project)
codeflash optimize java -cp target/classes com.example.Main
# Executable JAR (requires maven-jar-plugin or equivalent with Main-Class manifest)
codeflash optimize java -jar target/my-app.jar --app-args
```
The `-cp` approach works with any project after `mvn compile` or `gradle build`. The `-jar` approach requires your project to produce an executable JAR with a `Main-Class` entry in the manifest — this is not the default Maven behavior.
Codeflash will run your program twice (once for profiling, once for argument capture), generate JUnit replay tests, then optimize the most impactful functions.
2. **Long-running programs**
@ -242,7 +244,7 @@ The Java tracer uses a **two-stage approach**: JFR (Java Flight Recorder) for ac
For servers, benchmarks, or programs that don't terminate on their own, use `--timeout` to limit each tracing stage:
```bash
codeflash optimize --timeout 30 java -jar target/my-benchmark.jar
codeflash optimize --timeout 30 java -cp target/classes com.example.Main
```
Each stage runs for at most 30 seconds, then the program is terminated and captured data is processed.

View file

@ -0,0 +1,18 @@
import os
import pathlib
from end_to_end_test_utilities import TestConfig, run_codeflash_command, run_with_retries
def run_test(expected_improvement_pct: int) -> bool:
config = TestConfig(
file_path="src/main/java/com/example/InPlaceSorter.java",
function_name="bubbleSortInPlace",
min_improvement_x=0.70,
)
cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "java").resolve()
return run_codeflash_command(cwd, config, expected_improvement_pct)
if __name__ == "__main__":
exit(run_with_retries(run_test, int(os.getenv("EXPECTED_IMPROVEMENT_PCT", 70))))

View file

@ -8,6 +8,7 @@ import pytest
from codeflash.code_utils.code_utils import (
cleanup_paths,
exit_with_message,
file_name_from_test_module_name,
file_path_from_module_name,
get_all_function_names,
@ -751,3 +752,33 @@ class MyClass:
"""
result = validate_python_code(code)
assert result == code
class TestExitWithMessageSubagent:
@patch("codeflash.code_utils.code_utils.is_subagent_mode", return_value=True)
def test_outputs_structured_xml_in_subagent_mode(self, _mock_subagent: MagicMock, capsys: pytest.CaptureFixture[str]) -> None:
with pytest.raises(SystemExit) as exc_info:
exit_with_message("Something went wrong", error_on_exit=True)
assert exc_info.value.code == 1
captured = capsys.readouterr()
assert "<codeflash-error>" in captured.out
assert "Something went wrong" in captured.out
assert "</codeflash-error>" in captured.out
@patch("codeflash.code_utils.code_utils.is_subagent_mode", return_value=True)
def test_escapes_xml_special_chars(self, _mock_subagent: MagicMock, capsys: pytest.CaptureFixture[str]) -> None:
with pytest.raises(SystemExit):
exit_with_message('File <foo> & "bar" not found', error_on_exit=True)
captured = capsys.readouterr()
assert "&lt;foo&gt;" in captured.out
assert "&amp;" in captured.out
@patch("codeflash.code_utils.code_utils.is_subagent_mode", return_value=False)
@patch("codeflash.code_utils.code_utils.is_LSP_enabled", return_value=False)
def test_no_xml_when_not_subagent(
self, _mock_lsp: MagicMock, _mock_subagent: MagicMock, capsys: pytest.CaptureFixture[str]
) -> None:
with pytest.raises(SystemExit):
exit_with_message("Normal error", error_on_exit=True)
captured = capsys.readouterr()
assert "<codeflash-error>" not in captured.out

View file

@ -11,7 +11,18 @@
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<checkstyle.skip>true</checkstyle.skip>
<disable.checks>true</disable.checks>
<spotbugs.skip>true</spotbugs.skip>
<pmd.skip>true</pmd.skip>
<rat.skip>true</rat.skip>
<enforcer.skip>true</enforcer.skip>
<japicmp.skip>true</japicmp.skip>
<checkstyle.failOnViolation>false</checkstyle.failOnViolation>
<checkstyle.failsOnError>false</checkstyle.failsOnError>
<maven-checkstyle-plugin.failsOnError>false</maven-checkstyle-plugin.failsOnError>
<maven-checkstyle-plugin.failOnViolation>false</maven-checkstyle-plugin.failOnViolation>
</properties>
<dependencies>
<dependency>
@ -62,6 +73,26 @@
</execution>
</executions>
</plugin>
</plugins>
<!-- codeflash-validation-skip -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<configuration>
<skip>true</skip>
<failOnViolation>false</failOnViolation>
<failsOnError>false</failsOnError>
</configuration>
</plugin>
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<configuration><skip>true</skip></configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-pmd-plugin</artifactId>
<configuration><skip>true</skip></configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -830,3 +830,28 @@ class TestGradleVersionUpdate:
content = build_file.read_text(encoding="utf-8")
# Should preserve the 8-space indent
assert f" testImplementation 'com.codeflash:codeflash-runtime:{CODEFLASH_RUNTIME_VERSION}'" in content
class TestValidationSkipFlags:
"""Tests that validation skip flags include all known static analysis and formatting plugins."""
def test_maven_skip_flags_include_spotless(self):
from codeflash.languages.java.maven_strategy import _MAVEN_VALIDATION_SKIP_FLAGS
flags_str = " ".join(_MAVEN_VALIDATION_SKIP_FLAGS)
assert "-Dspotless.check.skip=true" in flags_str
assert "-Dspotless.apply.skip=true" in flags_str
def test_maven_skip_flags_include_all_known_plugins(self):
from codeflash.languages.java.maven_strategy import _MAVEN_VALIDATION_SKIP_FLAGS
flags_str = " ".join(_MAVEN_VALIDATION_SKIP_FLAGS)
for plugin in ["rat", "checkstyle", "spotbugs", "pmd", "enforcer", "japicmp", "errorprone", "spotless"]:
assert plugin in flags_str, f"Missing skip flag for {plugin}"
def test_gradle_skip_script_includes_spotless(self):
from codeflash.languages.java.gradle_strategy import _GRADLE_SKIP_VALIDATION_INIT_SCRIPT
assert "spotlessCheck" in _GRADLE_SKIP_VALIDATION_INIT_SCRIPT
assert "spotlessApply" in _GRADLE_SKIP_VALIDATION_INIT_SCRIPT
assert "spotlessJava" in _GRADLE_SKIP_VALIDATION_INIT_SCRIPT

View file

@ -22,7 +22,6 @@ os.environ["CODEFLASH_API_KEY"] = "cf-test-key"
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.languages.base import Language
from codeflash.languages.current import set_current_language
from codeflash.languages.java.maven_strategy import MavenStrategy
from codeflash.languages.java.discovery import discover_functions_from_source
from codeflash.languages.java.instrumentation import (
_add_behavior_instrumentation,
@ -34,6 +33,7 @@ from codeflash.languages.java.instrumentation import (
instrument_generated_java_test,
remove_instrumentation,
)
from codeflash.languages.java.maven_strategy import MavenStrategy
class TestInstrumentForBehavior:
@ -2177,7 +2177,7 @@ public class AccentTest {
# Skip all E2E tests if Maven is not available
requires_maven = pytest.mark.skipif(
MavenStrategy().find_executable(Path(".")) is None, reason="Maven not found - skipping execution tests"
MavenStrategy().find_executable(Path()) is None, reason="Maven not found - skipping execution tests"
)
@ -3485,3 +3485,444 @@ public class SpinWaitTest__perfonlyinstrumented {
assert math.isclose(duration, 100_000_000, rel_tol=0.15), (
f"Long spin measured {duration}ns, expected ~100_000_000ns (15% tolerance)"
)
class TestVoidMethodInstrumentation:
"""Tests for void method instrumentation — behavior mode captures receiver state."""
def test_behavior_mode_void_method_serializes_receiver(self, tmp_path: Path):
"""Void method instrumentation should serialize the receiver, not a return value."""
source_file = (tmp_path / "Sorter.java").resolve()
source_file.write_text(
"public class Sorter {\n"
" public void sort(int[] data) {\n"
" java.util.Arrays.sort(data);\n"
" }\n"
"}\n",
encoding="utf-8",
)
test_file = (tmp_path / "SorterTest.java").resolve()
test_source = (
"import org.junit.jupiter.api.Test;\n"
"\n"
"public class SorterTest {\n"
" @Test\n"
" public void testSort() {\n"
" Sorter sorter = new Sorter();\n"
" int[] data = {3, 1, 2};\n"
" sorter.sort(data);\n"
" }\n"
"}\n"
)
test_file.write_text(test_source, encoding="utf-8")
func = FunctionToOptimize(
function_name="sort",
file_path=source_file,
starting_line=2,
ending_line=4,
parents=[],
is_method=True,
language="java",
)
success, result = instrument_existing_test(
test_string=test_source, function_to_optimize=func, mode="behavior", test_path=test_file
)
assert success is True
assert result == (
"import org.junit.jupiter.api.Test;\n"
"import java.sql.Connection;\n"
"import java.sql.DriverManager;\n"
"import java.sql.PreparedStatement;\n"
"\n"
'@SuppressWarnings("CheckReturnValue")\n'
"public class SorterTest__perfinstrumented {\n"
" @Test\n"
" public void testSort() {\n"
" // Codeflash behavior instrumentation\n"
' int _cf_loop1 = Integer.parseInt(System.getenv("CODEFLASH_LOOP_INDEX"));\n'
" int _cf_iter1 = 1;\n"
' String _cf_mod1 = "SorterTest__perfinstrumented";\n'
' String _cf_cls1 = "SorterTest__perfinstrumented";\n'
' String _cf_fn1 = "sort";\n'
' String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");\n'
' String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");\n'
' if (_cf_testIteration1 == null) _cf_testIteration1 = "0";\n'
' String _cf_test1 = "testSort";\n'
" Sorter sorter = new Sorter();\n"
" int[] data = {3, 1, 2};\n"
" long _cf_end1_1 = -1;\n"
" long _cf_start1_1 = 0;\n"
" byte[] _cf_serializedResult1_1 = null;\n"
' System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":L12_1" + "######$!");\n'
" try {\n"
" _cf_start1_1 = System.nanoTime();\n"
" sorter.sort(data);\n"
" _cf_end1_1 = System.nanoTime();\n"
" _cf_serializedResult1_1 = com.codeflash.Serializer.serialize(new Object[]{sorter, data});\n"
" } finally {\n"
" long _cf_end1_1_finally = System.nanoTime();\n"
" long _cf_dur1_1 = (_cf_end1_1 != -1 ? _cf_end1_1 : _cf_end1_1_finally) - _cf_start1_1;\n"
' System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "L12_1" + "######!");\n'
" // Write to SQLite if output file is set\n"
" if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {\n"
" try {\n"
' Class.forName("org.sqlite.JDBC");\n'
' try (Connection _cf_conn1_1 = DriverManager.getConnection("jdbc:sqlite:" + _cf_outputFile1)) {\n'
" try (java.sql.Statement _cf_stmt1_1 = _cf_conn1_1.createStatement()) {\n"
' _cf_stmt1_1.execute("CREATE TABLE IF NOT EXISTS test_results (" +\n'
' "test_module_path TEXT, test_class_name TEXT, test_function_name TEXT, " +\n'
' "function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, " +\n'
' "runtime INTEGER, return_value BLOB, verification_type TEXT)");\n'
" }\n"
' String _cf_sql1_1 = "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";\n'
" try (PreparedStatement _cf_pstmt1_1 = _cf_conn1_1.prepareStatement(_cf_sql1_1)) {\n"
" _cf_pstmt1_1.setString(1, _cf_mod1);\n"
" _cf_pstmt1_1.setString(2, _cf_cls1);\n"
" _cf_pstmt1_1.setString(3, _cf_test1);\n"
" _cf_pstmt1_1.setString(4, _cf_fn1);\n"
" _cf_pstmt1_1.setInt(5, _cf_loop1);\n"
' _cf_pstmt1_1.setString(6, "L12_1");\n'
" _cf_pstmt1_1.setLong(7, _cf_dur1_1);\n"
" _cf_pstmt1_1.setBytes(8, _cf_serializedResult1_1);\n"
' _cf_pstmt1_1.setString(9, "void_state");\n'
" _cf_pstmt1_1.executeUpdate();\n"
" }\n"
" }\n"
" } catch (Exception _cf_e1_1) {\n"
' System.err.println("CodeflashHelper: SQLite error: " + _cf_e1_1.getMessage());\n'
" }\n"
" }\n"
" }\n"
" }\n"
"}\n"
)
def test_behavior_mode_void_method_implicit_this_receiver(self, tmp_path: Path):
"""Void method with no explicit receiver uses 'this' for serialization."""
source_file = (tmp_path / "Container.java").resolve()
source_file.write_text(
"public class Container {\n"
" public void clear() {\n"
" // clears internal state\n"
" }\n"
"}\n",
encoding="utf-8",
)
test_file = (tmp_path / "ContainerTest.java").resolve()
test_source = (
"import org.junit.jupiter.api.Test;\n"
"\n"
"public class ContainerTest {\n"
" @Test\n"
" public void testClear() {\n"
" clear();\n"
" }\n"
"}\n"
)
test_file.write_text(test_source, encoding="utf-8")
func = FunctionToOptimize(
function_name="clear",
file_path=source_file,
starting_line=2,
ending_line=4,
parents=[],
is_method=True,
language="java",
)
success, result = instrument_existing_test(
test_string=test_source, function_to_optimize=func, mode="behavior", test_path=test_file
)
assert success is True
assert result == (
"import org.junit.jupiter.api.Test;\n"
"import java.sql.Connection;\n"
"import java.sql.DriverManager;\n"
"import java.sql.PreparedStatement;\n"
"\n"
'@SuppressWarnings("CheckReturnValue")\n'
"public class ContainerTest__perfinstrumented {\n"
" @Test\n"
" public void testClear() {\n"
" // Codeflash behavior instrumentation\n"
' int _cf_loop1 = Integer.parseInt(System.getenv("CODEFLASH_LOOP_INDEX"));\n'
" int _cf_iter1 = 1;\n"
' String _cf_mod1 = "ContainerTest__perfinstrumented";\n'
' String _cf_cls1 = "ContainerTest__perfinstrumented";\n'
' String _cf_fn1 = "clear";\n'
' String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");\n'
' String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");\n'
' if (_cf_testIteration1 == null) _cf_testIteration1 = "0";\n'
' String _cf_test1 = "testClear";\n'
" long _cf_end1_1 = -1;\n"
" long _cf_start1_1 = 0;\n"
" byte[] _cf_serializedResult1_1 = null;\n"
' System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":L10_1" + "######$!");\n'
" try {\n"
" _cf_start1_1 = System.nanoTime();\n"
" clear();\n"
" _cf_end1_1 = System.nanoTime();\n"
" _cf_serializedResult1_1 = com.codeflash.Serializer.serialize(new Object[]{this});\n"
" } finally {\n"
" long _cf_end1_1_finally = System.nanoTime();\n"
" long _cf_dur1_1 = (_cf_end1_1 != -1 ? _cf_end1_1 : _cf_end1_1_finally) - _cf_start1_1;\n"
' System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "L10_1" + "######!");\n'
" // Write to SQLite if output file is set\n"
" if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {\n"
" try {\n"
' Class.forName("org.sqlite.JDBC");\n'
' try (Connection _cf_conn1_1 = DriverManager.getConnection("jdbc:sqlite:" + _cf_outputFile1)) {\n'
" try (java.sql.Statement _cf_stmt1_1 = _cf_conn1_1.createStatement()) {\n"
' _cf_stmt1_1.execute("CREATE TABLE IF NOT EXISTS test_results (" +\n'
' "test_module_path TEXT, test_class_name TEXT, test_function_name TEXT, " +\n'
' "function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, " +\n'
' "runtime INTEGER, return_value BLOB, verification_type TEXT)");\n'
" }\n"
' String _cf_sql1_1 = "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";\n'
" try (PreparedStatement _cf_pstmt1_1 = _cf_conn1_1.prepareStatement(_cf_sql1_1)) {\n"
" _cf_pstmt1_1.setString(1, _cf_mod1);\n"
" _cf_pstmt1_1.setString(2, _cf_cls1);\n"
" _cf_pstmt1_1.setString(3, _cf_test1);\n"
" _cf_pstmt1_1.setString(4, _cf_fn1);\n"
" _cf_pstmt1_1.setInt(5, _cf_loop1);\n"
' _cf_pstmt1_1.setString(6, "L10_1");\n'
" _cf_pstmt1_1.setLong(7, _cf_dur1_1);\n"
" _cf_pstmt1_1.setBytes(8, _cf_serializedResult1_1);\n"
' _cf_pstmt1_1.setString(9, "void_state");\n'
" _cf_pstmt1_1.executeUpdate();\n"
" }\n"
" }\n"
" } catch (Exception _cf_e1_1) {\n"
' System.err.println("CodeflashHelper: SQLite error: " + _cf_e1_1.getMessage());\n'
" }\n"
" }\n"
" }\n"
" }\n"
"}\n"
)
def test_behavior_mode_non_void_still_captures_result(self, tmp_path: Path):
"""Non-void methods should still capture the return value (not the receiver)."""
source_file = (tmp_path / "Calculator.java").resolve()
source_file.write_text(
"public class Calculator {\n"
" public int add(int a, int b) {\n"
" return a + b;\n"
" }\n"
"}\n",
encoding="utf-8",
)
test_file = (tmp_path / "CalculatorTest.java").resolve()
test_source = (
"import org.junit.jupiter.api.Test;\n"
"\n"
"public class CalculatorTest {\n"
" @Test\n"
" public void testAdd() {\n"
" Calculator calc = new Calculator();\n"
" assertEquals(4, calc.add(2, 2));\n"
" }\n"
"}\n"
)
test_file.write_text(test_source, encoding="utf-8")
func = FunctionToOptimize(
function_name="add",
file_path=source_file,
starting_line=2,
ending_line=4,
parents=[],
is_method=True,
language="java",
)
success, result = instrument_existing_test(
test_string=test_source, function_to_optimize=func, mode="behavior", test_path=test_file
)
assert success is True
assert result == (
"import org.junit.jupiter.api.Test;\n"
"import java.sql.Connection;\n"
"import java.sql.DriverManager;\n"
"import java.sql.PreparedStatement;\n"
"\n"
'@SuppressWarnings("CheckReturnValue")\n'
"public class CalculatorTest__perfinstrumented {\n"
" @Test\n"
" public void testAdd() {\n"
" // Codeflash behavior instrumentation\n"
' int _cf_loop1 = Integer.parseInt(System.getenv("CODEFLASH_LOOP_INDEX"));\n'
" int _cf_iter1 = 1;\n"
' String _cf_mod1 = "CalculatorTest__perfinstrumented";\n'
' String _cf_cls1 = "CalculatorTest__perfinstrumented";\n'
' String _cf_fn1 = "add";\n'
' String _cf_outputFile1 = System.getenv("CODEFLASH_OUTPUT_FILE");\n'
' String _cf_testIteration1 = System.getenv("CODEFLASH_TEST_ITERATION");\n'
' if (_cf_testIteration1 == null) _cf_testIteration1 = "0";\n'
' String _cf_test1 = "testAdd";\n'
" Calculator calc = new Calculator();\n"
" Object _cf_result1_1 = null;\n"
" long _cf_end1_1 = -1;\n"
" long _cf_start1_1 = 0;\n"
" byte[] _cf_serializedResult1_1 = null;\n"
' System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":L11_1" + "######$!");\n'
" try {\n"
" _cf_start1_1 = System.nanoTime();\n"
" _cf_result1_1 = calc.add(2, 2);\n"
" _cf_end1_1 = System.nanoTime();\n"
" _cf_serializedResult1_1 = com.codeflash.Serializer.serialize((Object) _cf_result1_1);\n"
" } finally {\n"
" long _cf_end1_1_finally = System.nanoTime();\n"
" long _cf_dur1_1 = (_cf_end1_1 != -1 ? _cf_end1_1 : _cf_end1_1_finally) - _cf_start1_1;\n"
' System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loop1 + ":" + "L11_1" + "######!");\n'
" // Write to SQLite if output file is set\n"
" if (_cf_outputFile1 != null && !_cf_outputFile1.isEmpty()) {\n"
" try {\n"
' Class.forName("org.sqlite.JDBC");\n'
' try (Connection _cf_conn1_1 = DriverManager.getConnection("jdbc:sqlite:" + _cf_outputFile1)) {\n'
" try (java.sql.Statement _cf_stmt1_1 = _cf_conn1_1.createStatement()) {\n"
' _cf_stmt1_1.execute("CREATE TABLE IF NOT EXISTS test_results (" +\n'
' "test_module_path TEXT, test_class_name TEXT, test_function_name TEXT, " +\n'
' "function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, " +\n'
' "runtime INTEGER, return_value BLOB, verification_type TEXT)");\n'
" }\n"
' String _cf_sql1_1 = "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";\n'
" try (PreparedStatement _cf_pstmt1_1 = _cf_conn1_1.prepareStatement(_cf_sql1_1)) {\n"
" _cf_pstmt1_1.setString(1, _cf_mod1);\n"
" _cf_pstmt1_1.setString(2, _cf_cls1);\n"
" _cf_pstmt1_1.setString(3, _cf_test1);\n"
" _cf_pstmt1_1.setString(4, _cf_fn1);\n"
" _cf_pstmt1_1.setInt(5, _cf_loop1);\n"
' _cf_pstmt1_1.setString(6, "L11_1");\n'
" _cf_pstmt1_1.setLong(7, _cf_dur1_1);\n"
" _cf_pstmt1_1.setBytes(8, _cf_serializedResult1_1);\n"
' _cf_pstmt1_1.setString(9, "function_call");\n'
" _cf_pstmt1_1.executeUpdate();\n"
" }\n"
" }\n"
" } catch (Exception _cf_e1_1) {\n"
' System.err.println("CodeflashHelper: SQLite error: " + _cf_e1_1.getMessage());\n'
" }\n"
" }\n"
" }\n"
" assertEquals(4, (int)_cf_result1_1);\n"
" }\n"
"}\n"
)
def test_void_discovery_with_require_return_false(self):
"""Void methods should be discovered when require_return=False."""
from codeflash.languages.base import FunctionFilterCriteria
from codeflash.languages.java.discovery import discover_functions_from_source
source = (
"public class Example {\n"
" public void doSomething() {\n"
' System.out.println("hello");\n'
" }\n"
"\n"
" public int getValue() {\n"
" return 42;\n"
" }\n"
"}\n"
)
criteria_no_return = FunctionFilterCriteria(require_return=False)
functions = discover_functions_from_source(source, filter_criteria=criteria_no_return)
method_names = {f.function_name for f in functions}
assert "doSomething" in method_names
assert "getValue" in method_names
criteria_require_return = FunctionFilterCriteria(require_return=True)
functions = discover_functions_from_source(source, filter_criteria=criteria_require_return)
method_names = {f.function_name for f in functions}
assert "doSomething" not in method_names
assert "getValue" in method_names
def test_performance_mode_void_method_generates_valid_code(self, tmp_path: Path):
"""Void methods in performance mode should generate valid timing code."""
source_file = (tmp_path / "Sorter.java").resolve()
source_file.write_text(
"public class Sorter {\n"
" public void sort(int[] data) {\n"
" java.util.Arrays.sort(data);\n"
" }\n"
"}\n",
encoding="utf-8",
)
test_file = (tmp_path / "SorterTest.java").resolve()
test_source = (
"import org.junit.jupiter.api.Test;\n"
"\n"
"public class SorterTest {\n"
" @Test\n"
" public void testSort() {\n"
" Sorter sorter = new Sorter();\n"
" int[] data = {3, 1, 2};\n"
" sorter.sort(data);\n"
" }\n"
"}\n"
)
test_file.write_text(test_source, encoding="utf-8")
func = FunctionToOptimize(
function_name="sort",
file_path=source_file,
starting_line=2,
ending_line=4,
parents=[],
is_method=True,
language="java",
)
success, result = instrument_existing_test(
test_string=test_source, function_to_optimize=func, mode="performance", test_path=test_file
)
assert success is True
assert result == (
"import org.junit.jupiter.api.Test;\n"
"\n"
'@SuppressWarnings("CheckReturnValue")\n'
"public class SorterTest__perfonlyinstrumented {\n"
" @Test\n"
" public void testSort() {\n"
" // Codeflash timing instrumentation with inner loop for JIT warmup\n"
' int _cf_outerLoop1 = Integer.parseInt(System.getenv("CODEFLASH_LOOP_INDEX"));\n'
' int _cf_maxInnerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "10"));\n'
' int _cf_innerIterations1 = Integer.parseInt(System.getenv().getOrDefault("CODEFLASH_INNER_ITERATIONS", "10"));\n'
' String _cf_mod1 = "SorterTest__perfonlyinstrumented";\n'
' String _cf_cls1 = "SorterTest__perfonlyinstrumented";\n'
' String _cf_test1 = "testSort";\n'
' String _cf_fn1 = "sort";\n'
" \n"
" Sorter sorter = new Sorter();\n"
" int[] data = {3, 1, 2};\n"
" for (int _cf_i1 = 0; _cf_i1 < _cf_innerIterations1; _cf_i1++) {\n"
" int _cf_loopId1 = _cf_outerLoop1 * _cf_maxInnerIterations1 + _cf_i1;\n"
' System.out.println("!$######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loopId1 + ":" + "L9_1" + "######$!");\n'
" long _cf_end1 = -1;\n"
" long _cf_start1 = 0;\n"
" try {\n"
" _cf_start1 = System.nanoTime();\n"
" sorter.sort(data);\n"
" _cf_end1 = System.nanoTime();\n"
" } finally {\n"
" long _cf_end1_finally = System.nanoTime();\n"
" long _cf_dur1 = (_cf_end1 != -1 ? _cf_end1 : _cf_end1_finally) - _cf_start1;\n"
' System.out.println("!######" + _cf_mod1 + ":" + _cf_cls1 + "." + _cf_test1 + ":" + _cf_fn1 + ":" + _cf_loopId1 + ":" + "L9_1" + ":" + _cf_dur1 + "######!");\n'
" }\n"
" }\n"
" }\n"
"}\n"
)

View file

@ -512,13 +512,16 @@ public class PreciseWaiterTest {
stddev_runtime = statistics.stdev(runtimes)
coefficient_of_variation = stddev_runtime / mean_runtime
# Target: 10ms (10,000,000 ns), allow <5% coefficient of variation
# (accounts for JIT warmup - first iteration is cold, subsequent are optimized)
# Target: 10ms (10,000,000 ns), allow <15% coefficient of variation.
# The first iteration per test method runs with cold JIT, and shared CI VMs
# (especially Windows) have ~15ms scheduler granularity that adds noise.
# 15% still catches instrumentation bugs (e.g., 0ms or 100ms outliers)
# while the ±5% mean check below validates timing accuracy.
expected_ns = 10_000_000
runtimes_ms = [r / 1_000_000 for r in runtimes]
assert coefficient_of_variation < 0.05, (
f"Timing variance too high: CV={coefficient_of_variation:.2%} (should be <5%). "
assert coefficient_of_variation < 0.15, (
f"Timing variance too high: CV={coefficient_of_variation:.2%} (should be <15%). "
f"Runtimes: {runtimes_ms} ms (mean={mean_runtime / 1_000_000:.3f}ms)"
)
@ -597,13 +600,16 @@ public class PreciseWaiterMultiTest {
stddev_runtime = statistics.stdev(runtimes)
coefficient_of_variation = stddev_runtime / mean_runtime
# Target: 10ms (10,000,000 ns), allow <5% coefficient of variation
# (accounts for JIT warmup - first iteration is cold, subsequent are optimized)
# Target: 10ms (10,000,000 ns), allow <15% coefficient of variation.
# The first iteration per test method runs with cold JIT, and shared CI VMs
# (especially Windows) have ~15ms scheduler granularity that adds noise.
# 15% still catches instrumentation bugs (e.g., 0ms or 100ms outliers)
# while the ±5% mean check below validates timing accuracy.
expected_ns = 10_000_000
runtimes_ms = [r / 1_000_000 for r in runtimes]
assert coefficient_of_variation < 0.05, (
f"Timing variance too high: CV={coefficient_of_variation:.2%} (should be <5%). "
assert coefficient_of_variation < 0.15, (
f"Timing variance too high: CV={coefficient_of_variation:.2%} (should be <15%). "
f"Runtimes: {runtimes_ms} ms (mean={mean_runtime / 1_000_000:.3f}ms)"
)

View file

@ -122,7 +122,7 @@ class TestJestRootsConfiguration:
runtime_configs = [f for f in get_created_config_files() if "codeflash.runtime" in f.name]
assert len(runtime_configs) == 1, f"Expected 1 runtime config, got {len(runtime_configs)}"
config_content = runtime_configs[0].read_text(encoding="utf-8")
assert str(external_path) in config_content, "Runtime config should contain external test directory"
assert external_path.as_posix() in config_content, "Runtime config should contain external test directory"
clear_created_config_files()

View file

@ -91,7 +91,7 @@ try {{
capture_output=True,
text=True,
cwd=project_path,
timeout=5,
timeout=30,
)
assert result.returncode == 0, (
@ -148,7 +148,7 @@ try {{
capture_output=True,
text=True,
cwd=project_path,
timeout=5,
timeout=30,
)
assert result.returncode == 0, f"JS config should load: {result.stderr}"