Merge remote-tracking branch 'origin/main' into fix-dependabot-vulns

2026-05-04 18:25:17 +00:00 · 2026-03-10 16:54:29 -06:00 · 2026-03-10 16:54:29 -06:00 · 748094c7e0
commit 748094c7e0
parent a29bca3372 15f8e6f7be
12 changed files with 93 additions and 82 deletions
--- a/.github/workflows/claude.yml
+++ b/.github/workflows/claude.yml
@ -198,12 +198,20 @@ jobs:

            For each PR:
            - If CI passes and the PR is mergeable → merge with `--squash --delete-branch`
-            - Close the PR as stale if ANY of these apply:
+            - If CI is failing:
+              1. Check out the PR branch and inspect the failing tests
+              2. Attempt to fix the failures (the optimization may have broken tests or introduced issues)
+              3. If fixed: commit, push, and leave a comment explaining what was fixed
+              4. If unfixable: close with `gh pr close <number> --comment "Closing: CI checks are failing — <describe the specific failures and why they can't be auto-fixed>." --delete-branch`
+            - Close the PR (without attempting fixes) if ANY of these apply:
              - Older than 7 days
              - Has merge conflicts (mergeable state is "CONFLICTING")
-              - CI is failing
              - The optimized function no longer exists in the target file (check the diff)
-              Close with: `gh pr close <number> --comment "Closing stale optimization PR." --delete-branch`
+              Close with: `gh pr close <number> --comment "<reason>" --delete-branch`
+              where <reason> explains WHY the PR is being closed. Examples:
+              - "Closing: PR is older than 7 days without being merged."
+              - "Closing: merge conflicts with the target branch."
+              - "Closing: the optimized function no longer exists in the target file."
            </step>

            <verification>
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@ -23,6 +23,7 @@ def parse_args() -> Namespace:
        args.yes = True
        args.no_pr = True
        args.worktree = True
+        args.effort = "low"
    return process_and_validate_cmd_args(args)


--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@ -1,5 +1,6 @@
 from __future__ import annotations

+import contextlib
 import logging
 from collections import deque
 from contextlib import contextmanager
@ -408,6 +409,7 @@ def subagent_log_optimization_result(
    new_code: dict[Path, str],
    review: str,
    test_results: TestResults,
+    project_root: Path | None = None,
 ) -> None:
    import sys
    from xml.sax.saxutils import escape
@ -421,7 +423,11 @@ def subagent_log_optimization_result(
        old = original_code.get(path, "")
        new = new_code.get(path, "")
        if old != new:
-            diff = unified_diff_strings(old, new, fromfile=str(path), tofile=str(path))
+            display_path = str(path)
+            if project_root is not None:
+                with contextlib.suppress(ValueError):
+                    display_path = str(path.relative_to(project_root))
+            diff = unified_diff_strings(old, new, fromfile=display_path, tofile=display_path)
            if diff:
                diff_parts.append(diff)

@ -468,18 +474,17 @@ def subagent_log_optimization_result(
        if new_code[path] != original_code.get(path, ""):
            xml.append(f'  <optimized-code file="{escape(str(path))}">{escape(new_code[path])}</optimized-code>')
    xml.append("  <action>")
-    xml.append("    First, present a short summary: function name, performance improvement, and explanation.")
-    xml.append("    If you are running inside an IDE (VS Code, Cursor, Windsurf, etc.):")
-    xml.append("      For each optimized-code element, use the Write tool to write the optimized code to the file.")
-    xml.append("      This will surface the diff in the IDE's native diff view for the user to accept or reject.")
-    xml.append("    If you are running in a terminal without an IDE connection:")
-    xml.append("      Present this optimization to the user using AskUserQuestion.")
-    xml.append("      Show the function name, performance improvement, and explanation as the question text.")
-    xml.append("      Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
+    xml.append("    1. Review the diff and optimized code yourself. Write a brief assessment (2-3 sentences) covering:")
+    xml.append("       - Whether the optimization is correct and preserves behavior")
+    xml.append("       - What technique was used (e.g. algorithmic improvement, caching, vectorization)")
+    xml.append("       - Any risks or trade-offs (e.g. increased memory usage, reduced readability)")
+    xml.append("    2. Present this optimization to the user using AskUserQuestion.")
+    xml.append("       Show the function name, performance improvement, and your review as the question text.")
+    xml.append("       Provide two options: 'Apply' (write the optimized code to the file) and 'Reject' (do nothing).")
    xml.append(
-        "      Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
+        "       Put the full diff in the 'Apply' option's markdown preview so the user can review the exact changes."
    )
-    xml.append("      If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
+    xml.append("    3. If the user chooses 'Apply', write the content from optimized-code to the corresponding file.")
    xml.append("  </action>")
    xml.append("</codeflash-optimization>")

--- a/codeflash/cli_cmds/init_javascript.py
+++ b/codeflash/cli_cmds/init_javascript.py
@ -694,22 +694,9 @@ def get_js_codeflash_install_step(pkg_manager: JsPackageManager, *, is_dependenc
        # Codeflash will be installed with other dependencies
        return ""

-    # Need to install codeflash separately
-    if pkg_manager == JsPackageManager.BUN:
-        return """- name: 📥 Install Codeflash
-        run: bun add -g codeflash"""
-
-    if pkg_manager == JsPackageManager.PNPM:
-        return """- name: 📥 Install Codeflash
-        run: pnpm add -g codeflash"""
-
-    if pkg_manager == JsPackageManager.YARN:
-        return """- name: 📥 Install Codeflash
-        run: yarn global add codeflash"""
-
-    # NPM or UNKNOWN
+    # Install codeflash via uv (Python + uv are set up in the workflow)
    return """- name: 📥 Install Codeflash
-        run: npm install -g codeflash"""
+        run: uv tool install codeflash"""


 def get_js_codeflash_run_command(pkg_manager: JsPackageManager, *, is_dependency: bool) -> str:
--- a/codeflash/cli_cmds/workflows/codeflash-optimize-js.yaml
+++ b/codeflash/cli_cmds/workflows/codeflash-optimize-js.yaml
@ -27,6 +27,12 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
+      - name: 🐍 Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: 📦 Setup uv
+        uses: astral-sh/setup-uv@v4
      {{ setup_runtime_steps }}
      - name: 📦 Install Dependencies
        run: {{ install_dependencies_command }}
--- a/codeflash/cli_cmds/workflows/codeflash-optimize.yaml
+++ b/codeflash/cli_cmds/workflows/codeflash-optimize.yaml
@ -27,7 +27,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
-      - {{ setup_python_dependency_manager }}
+      - {{ setup_runtime_environment }}
      - name: 📦 Install Dependencies
        run: {{ install_dependencies_command }}
      - name: ⚡️Codeflash Optimization
--- a/codeflash/languages/function_optimizer.py
+++ b/codeflash/languages/function_optimizer.py
@ -1018,16 +1018,18 @@ class FunctionOptimizer:
            runtimes_list.append(new_best_opt.runtime)

        if len(optimization_ids) > 1:
-            future_ranking = self.executor.submit(
-                ai_service_client.generate_ranking,
-                diffs=diff_strs,
-                optimization_ids=optimization_ids,
-                speedups=speedups_list,
-                trace_id=self.get_trace_id(exp_type),
-                function_references=function_references,
-            )
-            concurrent.futures.wait([future_ranking])
-            ranking = future_ranking.result()
+            ranking = None
+            if not is_subagent_mode():
+                future_ranking = self.executor.submit(
+                    ai_service_client.generate_ranking,
+                    diffs=diff_strs,
+                    optimization_ids=optimization_ids,
+                    speedups=speedups_list,
+                    trace_id=self.get_trace_id(exp_type),
+                    function_references=function_references,
+                )
+                concurrent.futures.wait([future_ranking])
+                ranking = future_ranking.result()
            if ranking:
                min_key = ranking[0]
            else:
@ -2390,6 +2392,25 @@ class FunctionOptimizer:
        code_context: CodeOptimizationContext,
        function_references: str,
    ) -> None:
+        if is_subagent_mode():
+            subagent_log_optimization_result(
+                function_name=explanation.function_name,
+                file_path=explanation.file_path,
+                perf_improvement_line=explanation.perf_improvement_line,
+                original_runtime_ns=explanation.original_runtime_ns,
+                best_runtime_ns=explanation.best_runtime_ns,
+                raw_explanation=explanation.raw_explanation_message,
+                original_code=original_code_combined,
+                new_code=new_code_combined,
+                review="",
+                test_results=explanation.winning_behavior_test_results,
+                project_root=self.project_root,
+            )
+            mark_optimization_success(
+                trace_id=self.function_trace_id, is_optimization_found=best_optimization is not None
+            )
+            return
+
        coverage_message = (
            original_code_baseline.coverage_results.build_message()
            if original_code_baseline.coverage_results
@ -2537,20 +2558,7 @@ class FunctionOptimizer:
        self.optimization_review = opt_review_result.review

        # Display the reviewer result to the user
-        if is_subagent_mode():
-            subagent_log_optimization_result(
-                function_name=new_explanation.function_name,
-                file_path=new_explanation.file_path,
-                perf_improvement_line=new_explanation.perf_improvement_line,
-                original_runtime_ns=new_explanation.original_runtime_ns,
-                best_runtime_ns=new_explanation.best_runtime_ns,
-                raw_explanation=new_explanation.raw_explanation_message,
-                original_code=original_code_combined,
-                new_code=new_code_combined,
-                review=opt_review_result.review,
-                test_results=new_explanation.winning_behavior_test_results,
-            )
-        elif opt_review_result.review:
+        if opt_review_result.review:
            review_display = {
                "high": ("[bold green]High[/bold green]", "green", "Recommended to merge"),
                "medium": ("[bold yellow]Medium[/bold yellow]", "yellow", "Review recommended before merging"),
@ -2667,12 +2675,15 @@ class FunctionOptimizer:
                        logger.debug(
                            f"[PIPELINE] Test file {idx}: behavior={tf.instrumented_behavior_file_path}, perf={tf.benchmarking_file_path}"
                        )
+                    total_looping_time = (
+                        TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
+                    )
                    behavioral_results, coverage_results = self.run_and_parse_tests(
                        testing_type=TestingMode.BEHAVIOR,
                        test_env=test_env,
                        test_files=self.test_files,
                        optimization_iteration=0,
-                        testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
+                        testing_time=total_looping_time,
                        enable_coverage=True,
                        code_context=code_context,
                    )
@ -2713,6 +2724,7 @@ class FunctionOptimizer:
                self.instrument_async_for_mode(TestingMode.PERFORMANCE)

            try:
+                subagent = is_subagent_mode()
                benchmarking_results, _ = self.run_and_parse_tests(
                    testing_type=TestingMode.PERFORMANCE,
                    test_env=test_env,
@ -2721,6 +2733,7 @@ class FunctionOptimizer:
                    testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
                    enable_coverage=False,
                    code_context=code_context,
+                    **({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
                )
                logger.debug(f"[BENCHMARK-DONE] Got {len(benchmarking_results.test_results)} benchmark results")
            finally:
@ -2871,6 +2884,10 @@ class FunctionOptimizer:

            try:
                self.instrument_capture(file_path_to_helper_classes)
+
+                total_looping_time = (
+                    TOTAL_LOOPING_TIME_EFFECTIVE / 2 if is_subagent_mode() else TOTAL_LOOPING_TIME_EFFECTIVE
+                )
                candidate_behavior_results, _ = self.run_and_parse_tests(
                    testing_type=TestingMode.BEHAVIOR,
                    test_env=test_env,
@ -2911,6 +2928,7 @@ class FunctionOptimizer:
                self.instrument_async_for_mode(TestingMode.PERFORMANCE)

            try:
+                subagent = is_subagent_mode()
                candidate_benchmarking_results, _ = self.run_and_parse_tests(
                    testing_type=TestingMode.PERFORMANCE,
                    test_env=test_env,
@ -2918,6 +2936,7 @@ class FunctionOptimizer:
                    optimization_iteration=optimization_candidate_index,
                    testing_time=TOTAL_LOOPING_TIME_EFFECTIVE,
                    enable_coverage=False,
+                    **({"pytest_min_loops": 3, "pytest_max_loops": 100} if subagent else {}),
                )
            finally:
                if self.function_to_optimize.is_async:
--- a/codeflash/version.py
+++ b/codeflash/version.py
@ -1,2 +1,2 @@
 # These version placeholders will be replaced by uv-dynamic-versioning during build.
-__version__ = "0.20.1"
+__version__ = "0.20.2"
--- a/packages/codeflash/package-lock.json
+++ b/packages/codeflash/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "codeflash",
-  "version": "0.10.1",
+  "version": "0.10.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "codeflash",
-      "version": "0.10.1",
+      "version": "0.10.2",
      "hasInstallScript": true,
      "license": "MIT",
      "dependencies": {
--- a/packages/codeflash/package.json
+++ b/packages/codeflash/package.json
@ -1,6 +1,6 @@
 {
  "name": "codeflash",
-  "version": "0.10.1",
+  "version": "0.10.2",
  "description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",
  "main": "runtime/index.js",
  "types": "runtime/index.d.ts",
--- a/tests/code_utils/test_js_workflow_helpers.py
+++ b/tests/code_utils/test_js_workflow_helpers.py
@ -131,30 +131,13 @@ class TestGetJsCodeflashInstallStep:

        assert result == ""

-    def test_npm_global_install(self) -> None:
-        """Should generate npm global install when not a dependency."""
-        result = get_js_codeflash_install_step(JsPackageManager.NPM, is_dependency=False)
+    def test_uv_tool_install_when_not_dependency(self) -> None:
+        """Should generate uv tool install when not a dependency, regardless of package manager."""
+        for pkg_manager in (JsPackageManager.NPM, JsPackageManager.YARN, JsPackageManager.PNPM, JsPackageManager.BUN):
+            result = get_js_codeflash_install_step(pkg_manager, is_dependency=False)

-        assert "Install Codeflash" in result
-        assert "npm install -g codeflash" in result
-
-    def test_yarn_global_install(self) -> None:
-        """Should generate yarn global install when not a dependency."""
-        result = get_js_codeflash_install_step(JsPackageManager.YARN, is_dependency=False)
-
-        assert "yarn global add codeflash" in result
-
-    def test_pnpm_global_install(self) -> None:
-        """Should generate pnpm global install when not a dependency."""
-        result = get_js_codeflash_install_step(JsPackageManager.PNPM, is_dependency=False)
-
-        assert "pnpm add -g codeflash" in result
-
-    def test_bun_global_install(self) -> None:
-        """Should generate bun global install when not a dependency."""
-        result = get_js_codeflash_install_step(JsPackageManager.BUN, is_dependency=False)
-
-        assert "bun add -g codeflash" in result
+            assert "Install Codeflash" in result
+            assert "uv tool install codeflash" in result


 class TestGetJsCodeflashRunCommand:
--- a/tests/test_languages/test_javascript_test_runner.py
+++ b/tests/test_languages/test_javascript_test_runner.py
@ -1,5 +1,6 @@
 """Tests for JavaScript/Jest test runner functionality."""

+import sys
 import tempfile
 from pathlib import Path
 from unittest.mock import patch, MagicMock
@ -896,6 +897,7 @@ class TestBundledJestReporter:
                    reporter_args = [a for a in cmd if "--reporters=codeflash/jest-reporter" in a]
                    assert len(reporter_args) == 1

+    @pytest.mark.skipif(sys.platform == "win32", reason="Node.js subprocess pipe behavior unreliable on Windows CI")
    def test_reporter_produces_valid_junit_xml(self):
        """The reporter JS should produce JUnit XML parseable by junitparser."""
        import subprocess