fix: resolve all ruff lint errors across repo (#38)

* fix: resolve all ruff lint errors across repo Auto-fixed 31 errors (unused imports, formatting, simplifications). Manually fixed 14 remaining: - EXE001: removed shebangs from non-executable bench scripts - C417: replaced map(lambda) with generator expression - C901/PLR0915: extracted _write_and_instrument_tests from generate_ai_tests - C901/PLR0912: extracted _parse_toml_addopts and _ini_section_name from modify_addopts - RUF001/RUF002: replaced ambiguous Unicode chars (en dash, multiplication sign) - FBT002: made boolean params keyword-only in report functions - E402: moved `import re` to top of file in security reports * fix: resolve pre-existing mypy errors across packages - _testgen.py: annotate `generated` as `str` to avoid no-any-return - _test_runner.py: use str() for TimeoutExpired stdout/stderr (bytes|str), remove unused type: ignore on proc.kill() - _candidate_eval.py: annotate `speedup` as `float` to avoid no-any-return from lazy-loaded performance_gain
2026-05-04 18:25:19 +00:00 · 2026-04-23 10:22:42 -05:00 · 2026-04-23 10:22:42 -05:00 · 3ee9c22c8e
commit 3ee9c22c8e
parent c249bcd0ce
24 changed files with 1706 additions and 1078 deletions
--- a/.codeflash/krrt7/plotly/plotly.py/bench/bench_split_multichar.py
+++ b/.codeflash/krrt7/plotly/plotly.py/bench/bench_split_multichar.py
@ -1,4 +1,3 @@
 #!/usr/bin/env python
 """Benchmark for split_multichar optimization (Target 6)"""
 import sys
@ -12,7 +11,7 @@ def split_multichar_original(ss, chars):
    if len(chars) == 0:
        return ss
    c = chars.pop()
-    ss = reduce(lambda x, y: x + y, map(lambda x: x.split(c), ss))
+    ss = reduce(lambda x, y: x + y, (x.split(c) for x in ss))
    return split_multichar_original(ss, chars)
@ -70,7 +69,7 @@ def main():
        # Verify results match
        if orig_result != opt_result:
-            print(f"  ERROR: Results don't match!")
+            print("  ERROR: Results don't match!")
            print(f"    Original: {orig_result}")
            print(f"    Optimized: {opt_result}")
            sys.exit(1)
@ -78,8 +77,8 @@ def main():
        # Report results
        speedup = orig_time / opt_time
        print(f"  Result: {opt_result}")
-        print(f"  Original:  {orig_time*1e6:.2f} µs")
+        print(f"  Original:  {orig_time * 1e6:.2f} µs")
-        print(f"  Optimized: {opt_time*1e6:.2f} µs")
+        print(f"  Optimized: {opt_time * 1e6:.2f} µs")
        print(f"  Speedup:   {speedup:.2f}x")
    print("\n" + "=" * 60)
--- a/.codeflash/krrt7/plotly/plotly.py/bench/bench_vals_equal.py
+++ b/.codeflash/krrt7/plotly/plotly.py/bench/bench_vals_equal.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python
 """Benchmark for _vals_equal optimization (Target 5)"""
 import sys
 import time
 import numpy as np
@ -13,20 +13,19 @@ def vals_equal_original(v1, v2):
        isinstance(v1, np.ndarray) or isinstance(v2, np.ndarray)
    ):
        return np.array_equal(v1, v2)
-    elif isinstance(v1, (list, tuple)):
+    if isinstance(v1, (list, tuple)):
        return (
            isinstance(v2, (list, tuple))
            and len(v1) == len(v2)
            and all(vals_equal_original(e1, e2) for e1, e2 in zip(v1, v2))
        )
-    elif isinstance(v1, dict):
+    if isinstance(v1, dict):
        return (
            isinstance(v2, dict)
            and set(v1.keys()) == set(v2.keys())
            and all(vals_equal_original(v1[k], v2[k]) for k in v1)
        )
-    else:
+    return v1 == v2
        return v1 == v2
 # Optimized implementation with early exits
@ -43,20 +42,19 @@ def vals_equal_optimized(v1, v2):
            return False
        # Now do element-wise comparison (np.array_equal handles dtype conversion)
        return np.array_equal(v1, v2)
-    elif isinstance(v1, (list, tuple)):
+    if isinstance(v1, (list, tuple)):
        return (
            isinstance(v2, (list, tuple))
            and len(v1) == len(v2)
            and all(vals_equal_optimized(e1, e2) for e1, e2 in zip(v1, v2))
        )
-    elif isinstance(v1, dict):
+    if isinstance(v1, dict):
        return (
            isinstance(v2, dict)
            and set(v1.keys()) == set(v2.keys())
            and all(vals_equal_optimized(v1[k], v2[k]) for k in v1)
        )
-    else:
+    return v1 == v2
        return v1 == v2
 def benchmark(func, v1, v2, iterations=10000):
@ -77,29 +75,28 @@ def main():
    # Test cases
    test_cases = [
-        ("Equal arrays (same data)",
+        (
-         np.array([1, 2, 3, 4, 5] * 100),
+            "Equal arrays (same data)",
-         np.array([1, 2, 3, 4, 5] * 100)),
+            np.array([1, 2, 3, 4, 5] * 100),
-
+            np.array([1, 2, 3, 4, 5] * 100),
-        ("Different shapes (early exit)",
+        ),
-         np.array([1, 2, 3, 4, 5] * 100),
+        (
-         np.array([1, 2, 3, 4] * 100)),
+            "Different shapes (early exit)",
-
+            np.array([1, 2, 3, 4, 5] * 100),
-        ("Different dtypes (but same values)",
+            np.array([1, 2, 3, 4] * 100),
-         np.array([1, 2, 3, 4, 5] * 100, dtype=np.int32),
+        ),
-         np.array([1, 2, 3, 4, 5] * 100, dtype=np.float64)),
+        (
-
+            "Different dtypes (but same values)",
-        ("Different values (late difference)",
+            np.array([1, 2, 3, 4, 5] * 100, dtype=np.int32),
-         np.array([1, 2, 3, 4, 5] * 100),
+            np.array([1, 2, 3, 4, 5] * 100, dtype=np.float64),
-         np.array([1, 2, 3, 4, 6] * 100)),
+        ),
-
+        (
-        ("Large equal arrays",
+            "Different values (late difference)",
-         np.arange(10000),
+            np.array([1, 2, 3, 4, 5] * 100),
-         np.arange(10000)),
+            np.array([1, 2, 3, 4, 6] * 100),
-
+        ),
-        ("Array vs scalar (type mismatch)",
+        ("Large equal arrays", np.arange(10000), np.arange(10000)),
-         np.array([1, 2, 3]),
+        ("Array vs scalar (type mismatch)", np.array([1, 2, 3]), [1, 2, 3]),
         [1, 2, 3]),
    ]
    for name, v1, v2 in test_cases:
@ -113,16 +110,16 @@ def main():
        # Verify results match
        if orig_result != opt_result:
-            print(f"  ERROR: Results don't match!")
+            print("  ERROR: Results don't match!")
            print(f"    Original: {orig_result}")
            print(f"    Optimized: {opt_result}")
            sys.exit(1)
        # Report results
-        speedup = orig_time / opt_time if opt_time > 0 else float('inf')
+        speedup = orig_time / opt_time if opt_time > 0 else float("inf")
        print(f"  Result: {orig_result}")
-        print(f"  Original:  {orig_time*1e6:.2f} µs")
+        print(f"  Original:  {orig_time * 1e6:.2f} µs")
-        print(f"  Optimized: {opt_time*1e6:.2f} µs")
+        print(f"  Optimized: {opt_time * 1e6:.2f} µs")
        print(f"  Speedup:   {speedup:.2f}x")
    print("\n" + "=" * 60)
--- a/packages/codeflash-api/tests/test_repair.py
+++ b/packages/codeflash-api/tests/test_repair.py
@ -11,9 +11,9 @@ from codeflash_api.repair._context import (
    is_valid_repair,
 )
 from codeflash_api.repair.schemas import (
    CodeRepairRequest,
    BehaviorDiff,
    BehaviorDiffScope,
    CodeRepairRequest,
 )
 # -------------------------------------------------------------------
--- a/packages/codeflash-python/src/codeflash_python/analysis/_discovery_worker.py
+++ b/packages/codeflash-python/src/codeflash_python/analysis/_discovery_worker.py
@ -57,7 +57,9 @@ class PytestCollectionPlugin:
        global pytest_rootdir, collected_tests
        collected_tests.extend(session.items)
-        pytest_rootdir = getattr(session.config, "rootdir", None) or getattr(session.config, "rootpath", None)
+        pytest_rootdir = getattr(session.config, "rootdir", None) or getattr(
            session.config, "rootpath", None
        )
        # Write results immediately since pytest.main() will exit after
        # this callback, not always with a success code.
@ -87,7 +89,8 @@ if __name__ == "__main__":
                tests_root,
                "-p",
                "no:logging",
-                "-o", "addopts=",
+                "-o",
                "addopts=",
                "--collect-only",
                "-m",
                "not skip",
--- a/packages/codeflash-python/src/codeflash_python/pipeline/_candidate_eval.py
+++ b/packages/codeflash-python/src/codeflash_python/pipeline/_candidate_eval.py
@ -168,7 +168,7 @@ async def run_tests_and_benchmark(  # noqa: PLR0913
            optimized_runtime,
        )
-    speedup = performance_gain(
+    speedup: float = performance_gain(
        original_runtime_ns=baseline.runtime,
        optimized_runtime_ns=optimized_runtime,
    )
--- a/packages/codeflash-python/src/codeflash_python/pipeline/_test_orchestrator.py
+++ b/packages/codeflash-python/src/codeflash_python/pipeline/_test_orchestrator.py
@ -362,9 +362,7 @@ def generate_ai_tests(  # noqa: PLR0913
        AIServiceError,
    )
    from ..test_discovery.models import TestType  # noqa: PLC0415
    from ..testing._testgen import generate_tests  # noqa: PLC0415
    from ..testing.models import TestFile  # noqa: PLC0415
    n_tests = 2  # matches original effort default
    testgen_source = code_context.testgen_context.markdown
@ -444,10 +442,7 @@ def generate_ai_tests(  # noqa: PLR0913
    pending: list[PendingTest] = []
    with ThreadPoolExecutor(max_workers=n_tests) as pool:
-        futures = {
+        futures = {pool.submit(_generate_one, i): i for i in range(n_tests)}
            pool.submit(_generate_one, i): i
            for i in range(n_tests)
        }
        for future in as_completed(futures):
            p = future.result()
            if p is not None:
@ -467,19 +462,24 @@ def generate_ai_tests(  # noqa: PLR0913
        fn_input=fn_input,
    )
-    # Phase 4: write files, instrument client-side, create TestFile objects.
+    return _write_and_instrument_tests(pending, func, tests_rootdir)
 def _write_and_instrument_tests(
    pending: list[PendingTest],
    func: FunctionToOptimize,
    tests_rootdir: Path,
 ) -> list[TestFile]:
    """Write generated tests to disk and instrument for behavior/perf capture."""
    from .._model import TestingMode  # noqa: PLC0415
    from ..test_discovery.models import TestType  # noqa: PLC0415
    from ..testing._instrumentation import (  # noqa: PLC0415
        inject_profiling_into_existing_test,
    )
    from ..testing.models import TestFile  # noqa: PLC0415
    test_file_objects: list[TestFile] = []
-    for (
+    for _idx, generated_source, test_path, test_perf_path in pending:
        _idx,
        generated_source,
        test_path,
        test_perf_path,
    ) in pending:
        test_path.write_text(generated_source, encoding="utf-8")
        ok_beh, beh_src = inject_profiling_into_existing_test(
@ -497,10 +497,10 @@ def generate_ai_tests(  # noqa: PLR0913
            mode=TestingMode.PERFORMANCE,
        )
-        beh_path: _Path | None = test_path.parent / (
+        beh_path: Path | None = test_path.parent / (
            test_path.stem + "__perfinstrumented" + test_path.suffix
        )
-        perf_path: _Path | None = test_perf_path
+        perf_path: Path | None = test_perf_path
        if ok_beh and beh_src is not None:
            beh_path.write_text(beh_src, encoding="utf-8")  # type: ignore[union-attr]
@ -694,9 +694,7 @@ def build_test_env(
        module_root = str(_Path(test_cfg.module_root))
        existing = env.get("PYTHONPATH", "")
        env["PYTHONPATH"] = (
-            f"{module_root}{os.pathsep}{existing}"
+            f"{module_root}{os.pathsep}{existing}" if existing else module_root
            if existing
            else module_root
        )
    return env
--- a/packages/codeflash-python/src/codeflash_python/test_discovery/discovery.py
+++ b/packages/codeflash-python/src/codeflash_python/test_discovery/discovery.py
@ -93,7 +93,9 @@ def discover_unit_tests(
            for func in funcs
        ]
    return discover_tests_pytest(
-        cfg, discover_only_these_tests, functions_to_optimize,
+        cfg,
        discover_only_these_tests,
        functions_to_optimize,
    )
@ -229,5 +231,3 @@ def discover_tests_pytest(  # noqa: C901, PLR0912, PLR0915
        functions_to_optimize,
    )
    return _count_results(function_to_tests)
--- a/packages/codeflash-python/src/codeflash_python/test_discovery/linking.py
+++ b/packages/codeflash-python/src/codeflash_python/test_discovery/linking.py
@ -62,7 +62,6 @@ def module_name_from_file_path(
    return relative_path.with_suffix("").as_posix().replace("/", ".")
 def add_test_entries(  # noqa: PLR0913
    function_to_test_map: dict[str, set[FunctionCalledInTest]],
    qualified_name: str,
--- a/packages/codeflash-python/src/codeflash_python/testing/_pytest_config.py
+++ b/packages/codeflash-python/src/codeflash_python/testing/_pytest_config.py
@ -85,85 +85,75 @@ def filter_args(addopts_args: list[str]) -> list[str]:
    return filtered_args
 def _parse_toml_addopts(
    content: str,
 ) -> tuple[list[str], tomlkit.TOMLDocument, bool] | None:
    """Parse addopts from pyproject.toml, return (args, doc, uses_ini_options)."""
    data = tomlkit.parse(content)
    pytest_section = data.get("tool", {}).get("pytest", {})
    original_addopts = pytest_section.get("ini_options", {}).get(
        "addopts", ""
    ) or pytest_section.get("addopts", "")
    if original_addopts == "":
        return None
    uses_ini_options = (
        "ini_options" in pytest_section
        and "addopts" in pytest_section.get("ini_options", {})
    )
    if isinstance(original_addopts, list):
        original_addopts = " ".join(original_addopts)
    addopts_args = original_addopts.replace("=", " ").split()
    return addopts_args, data, uses_ini_options
 def _ini_section_name(filename: str) -> str:
    """Return the config section name that holds pytest addopts."""
    if filename in {"pytest.ini", ".pytest.ini", "tox.ini"}:
        return "pytest"
    return "tool:pytest"
 def modify_addopts(
    config_file: Path,
 ) -> tuple[str, bool]:
    """Modify addopts in *config_file*, return (original_content, was_modified)."""
    file_type = config_file.suffix.lower()
    filename = config_file.name
    config = None
    if file_type not in {".toml", ".ini", ".cfg"} or not config_file.exists():
        return "", False
    with config_file.open(encoding="utf-8") as f:
        content = f.read()
    try:
        if filename == "pyproject.toml":
-            data = tomlkit.parse(content)
+            parsed = _parse_toml_addopts(content)
-            pytest_section = data.get("tool", {}).get("pytest", {})
+            if parsed is None:
            original_addopts = (
                pytest_section.get("ini_options", {}).get("addopts", "")
                or pytest_section.get("addopts", "")
            )
            if original_addopts == "":
                return content, False
-            uses_ini_options = "ini_options" in pytest_section and "addopts" in pytest_section.get("ini_options", {})
+            addopts_args, data, uses_ini_options = parsed
            if isinstance(original_addopts, list):
                original_addopts = " ".join(original_addopts)
            original_addopts = original_addopts.replace("=", " ")
            addopts_args = original_addopts.split()
        else:
            config = configparser.ConfigParser()
            config.read_string(content)
            section = _ini_section_name(filename)
            cfg_data: dict[str, dict[str, str]] = {
-                section: dict(config[section]) for section in config.sections()
+                s: dict(config[s]) for s in config.sections()
            }
-            if filename in {
+            original = cfg_data.get(section, {}).get("addopts", "")
-                "pytest.ini",
+            addopts_args = original.replace("=", " ").split()
                ".pytest.ini",
                "tox.ini",
            }:
                original_addopts = cfg_data.get(
                    "pytest",
                    {},
                ).get("addopts", "")
            else:
                original_addopts = cfg_data.get(
                    "tool:pytest",
                    {},
                ).get("addopts", "")
            original_addopts = original_addopts.replace("=", " ")
            addopts_args = original_addopts.split()
        new_addopts_args = filter_args(addopts_args)
        if new_addopts_args == addopts_args:
            return content, False
        new_value = " ".join(new_addopts_args)
        if file_type == ".toml":
            if uses_ini_options:
-                data["tool"]["pytest"]["ini_options"]["addopts"] = (  # type: ignore[index]
+                data["tool"]["pytest"]["ini_options"]["addopts"] = new_value  # type: ignore[index]
                    " ".join(new_addopts_args)
                )
            else:
-                data["tool"]["pytest"]["addopts"] = (  # type: ignore[index]
+                data["tool"]["pytest"]["addopts"] = new_value  # type: ignore[index]
                    " ".join(new_addopts_args)
                )
            with config_file.open("w", encoding="utf-8") as f:
                f.write(tomlkit.dumps(data))
                return content, True
-        if filename in {"pytest.ini", ".pytest.ini", "tox.ini"}:
+        section = _ini_section_name(filename)
-            config.set(  # type: ignore[union-attr]
+        config.set(section, "addopts", new_value)
                "pytest",
                "addopts",
                " ".join(new_addopts_args),
            )
            with config_file.open("w", encoding="utf-8") as f:
                config.write(f)  # type: ignore[union-attr]
                return content, True
        config.set(  # type: ignore[union-attr]
            "tool:pytest",
            "addopts",
            " ".join(new_addopts_args),
        )
        with config_file.open("w", encoding="utf-8") as f:
-            config.write(f)  # type: ignore[union-attr]
+            config.write(f)
            return content, True
    except Exception:  # noqa: BLE001
--- a/packages/codeflash-python/src/codeflash_python/testing/_test_runner.py
+++ b/packages/codeflash-python/src/codeflash_python/testing/_test_runner.py
@ -71,8 +71,8 @@ def execute_test_subprocess(
        return subprocess.CompletedProcess(
            args=cmd_list,
            returncode=-1,
-            stdout=exc.stdout or "",
+            stdout=str(exc.stdout) if exc.stdout else "",
-            stderr=exc.stderr or "",
+            stderr=str(exc.stderr) if exc.stderr else "",
        )
@ -383,7 +383,7 @@ async def async_execute_test_subprocess(
            timeout,
            " ".join(cmd_list),
        )
-        proc.kill()  # type: ignore[union-attr]
+        proc.kill()
        return subprocess.CompletedProcess(
            args=cmd_list,
            returncode=-1,
--- a/packages/codeflash-python/src/codeflash_python/testing/_testgen.py
+++ b/packages/codeflash-python/src/codeflash_python/testing/_testgen.py
@ -204,7 +204,7 @@ def repair_generated_tests(
        data = client.post("/testgen_repair", payload)
    except (AIServiceError, AIServiceConnectionError):
        return None
-    generated = data.get("generated_tests", "")
+    generated: str = data.get("generated_tests", "")
    if not generated:
        return None
    return generated
@ -272,9 +272,7 @@ def merge_unit_tests(
    )
    for node in ast.iter_child_nodes(modified_ast):
-        if isinstance(node, ast.FunctionDef) and node.name.startswith(
+        if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
            "test_"
        ):
            node.name = node.name + "__inspired"
    unit_test_source_ast.body.extend(modified_ast.body)
--- a/packages/codeflash-python/src/codeflash_python/testing/_xml_parser.py
+++ b/packages/codeflash-python/src/codeflash_python/testing/_xml_parser.py
@ -95,7 +95,9 @@ def parse_test_xml(  # noqa: C901, PLR0912, PLR0915
            )
            log.debug(
                "XML parse: suite_file=%r, classname=%r, testcase=%r, tc_attribs=%s",
-                test_file_name, class_name, testcase.name,
+                test_file_name,
                class_name,
                testcase.name,
                dict(testcase._elem.attrib),  # noqa: SLF001
            )
@ -194,8 +196,14 @@ def parse_test_xml(  # noqa: C901, PLR0912, PLR0915
                    "Known instrumented: %s, known original: %s",
                    test_file_path,
                    class_name,
-                    [str(tf.instrumented_behavior_file_path) for tf in test_files.test_files],
+                    [
-                    [str(tf.original_file_path) for tf in test_files.test_files],
+                        str(tf.instrumented_behavior_file_path)
                        for tf in test_files.test_files
                    ],
                    [
                        str(tf.original_file_path)
                        for tf in test_files.test_files
                    ],
                )
                log.warning(
                    "Test type not found for %s, skipping.",
--- a/packages/codeflash-python/tests/test_baseline.py
+++ b/packages/codeflash-python/tests/test_baseline.py
@ -494,8 +494,12 @@ class TestEstablishOriginalCodeBaseline:
        return test_files, test_config, test_env
    @patch("codeflash_python.testing._parse_results.parse_test_results")
-    @patch("codeflash_python.testing._test_runner.async_run_line_profile_tests")
+    @patch(
-    @patch("codeflash_python.testing._test_runner.async_run_benchmarking_tests")
+        "codeflash_python.testing._test_runner.async_run_line_profile_tests"
    )
    @patch(
        "codeflash_python.testing._test_runner.async_run_benchmarking_tests"
    )
    @patch("codeflash_python.testing._test_runner.async_run_behavioral_tests")
    async def test_successful_baseline(
        self,
@ -551,8 +555,12 @@ class TestEstablishOriginalCodeBaseline:
        assert result.runtime > 0
    @patch("codeflash_python.testing._parse_results.parse_test_results")
-    @patch("codeflash_python.testing._test_runner.async_run_line_profile_tests")
+    @patch(
-    @patch("codeflash_python.testing._test_runner.async_run_benchmarking_tests")
+        "codeflash_python.testing._test_runner.async_run_line_profile_tests"
    )
    @patch(
        "codeflash_python.testing._test_runner.async_run_benchmarking_tests"
    )
    @patch("codeflash_python.testing._test_runner.async_run_behavioral_tests")
    async def test_empty_behavioral_returns_none(
        self,
@ -585,8 +593,12 @@ class TestEstablishOriginalCodeBaseline:
        assert result is None
    @patch("codeflash_python.testing._parse_results.parse_test_results")
-    @patch("codeflash_python.testing._test_runner.async_run_line_profile_tests")
+    @patch(
-    @patch("codeflash_python.testing._test_runner.async_run_benchmarking_tests")
+        "codeflash_python.testing._test_runner.async_run_line_profile_tests"
    )
    @patch(
        "codeflash_python.testing._test_runner.async_run_benchmarking_tests"
    )
    @patch("codeflash_python.testing._test_runner.async_run_behavioral_tests")
    async def test_zero_benchmark_runtime_returns_none(
        self,
@ -641,8 +653,12 @@ class TestEstablishOriginalCodeBaseline:
        assert result is None
    @patch("codeflash_python.testing._parse_results.parse_test_results")
-    @patch("codeflash_python.testing._test_runner.async_run_line_profile_tests")
+    @patch(
-    @patch("codeflash_python.testing._test_runner.async_run_benchmarking_tests")
+        "codeflash_python.testing._test_runner.async_run_line_profile_tests"
    )
    @patch(
        "codeflash_python.testing._test_runner.async_run_benchmarking_tests"
    )
    @patch("codeflash_python.testing._test_runner.async_run_behavioral_tests")
    async def test_precomputed_behavioral_skips_behavioral_run(
        self,
@ -689,8 +705,12 @@ class TestEstablishOriginalCodeBaseline:
        assert precomputed is result.behavior_test_results
    @patch("codeflash_python.testing._parse_results.parse_test_results")
-    @patch("codeflash_python.testing._test_runner.async_run_line_profile_tests")
+    @patch(
-    @patch("codeflash_python.testing._test_runner.async_run_benchmarking_tests")
+        "codeflash_python.testing._test_runner.async_run_line_profile_tests"
    )
    @patch(
        "codeflash_python.testing._test_runner.async_run_benchmarking_tests"
    )
    @patch("codeflash_python.testing._test_runner.async_run_behavioral_tests")
    async def test_failed_regression_in_functions_to_remove(
        self,
--- a/packages/codeflash-python/tests/test_eval_worktree.py
+++ b/packages/codeflash-python/tests/test_eval_worktree.py
@ -33,9 +33,7 @@ def _make_project(tmp_path: Path) -> tuple[Path, Path]:
 class TestCreateProjectOverlay:
    """create_project_overlay directory structure."""
-    def test_overlay_contains_candidate_code(
+    def test_overlay_contains_candidate_code(self, tmp_path: Path) -> None:
        self, tmp_path: Path
    ) -> None:
        """The target module file has the candidate code."""
        root, mod = _make_project(tmp_path)
        overlay = create_project_overlay(mod, root, "def fast(): ...")
@ -61,9 +59,7 @@ class TestCreateProjectOverlay:
        finally:
            cleanup_overlay(overlay)
-    def test_project_root_siblings_are_symlinked(
+    def test_project_root_siblings_are_symlinked(self, tmp_path: Path) -> None:
        self, tmp_path: Path
    ) -> None:
        """Files at project root level are symlinked."""
        root, mod = _make_project(tmp_path)
        overlay = create_project_overlay(mod, root, "code")
@ -95,9 +91,7 @@ class TestCreateProjectOverlay:
        root, mod = _make_project(tmp_path)
        overlay = create_project_overlay(mod, root, "def fast(): ...")
        try:
-            assert not (
+            assert not (overlay / "src" / "mypkg" / "core.py").is_symlink()
                overlay / "src" / "mypkg" / "core.py"
            ).is_symlink()
        finally:
            cleanup_overlay(overlay)
@ -145,9 +139,7 @@ class TestCreateProjectOverlay:
        import sys
        root, mod = _make_project(tmp_path)
-        overlay = create_project_overlay(
+        overlay = create_project_overlay(mod, root, "VALUE = 42\n")
            mod, root, "VALUE = 42\n"
        )
        try:
            result = subprocess.run(
                [
--- a/packages/codeflash-python/tests/test_function_optimizer.py
+++ b/packages/codeflash-python/tests/test_function_optimizer.py
@ -156,16 +156,29 @@ class TestNoGenTests:
        fn_input.function.is_async = False
        with (
-            patch("codeflash_python.pipeline._test_orchestrator.generate_ai_tests") as mock_gen,
+            patch(
-            patch("codeflash_python.pipeline._test_orchestrator.instrument_tests_for_function", return_value=None),
+                "codeflash_python.pipeline._test_orchestrator.generate_ai_tests"
-            patch("codeflash_python.pipeline._test_orchestrator.generate_concolic_tests", return_value=({}, "", None)),
+            ) as mock_gen,
            patch(
                "codeflash_python.pipeline._test_orchestrator.instrument_tests_for_function",
                return_value=None,
            ),
            patch(
                "codeflash_python.pipeline._test_orchestrator.generate_concolic_tests",
                return_value=({}, "", None),
            ),
            patch(
                "codeflash_python.context.pipeline.get_code_optimization_context",
                return_value=MagicMock(),
            ),
-            patch("codeflash_python.pipeline._module_prep.resolve_python_function_ast", return_value=None),
+            patch(
                "codeflash_python.pipeline._module_prep.resolve_python_function_ast",
                return_value=None,
            ),
            patch(f"{_mod}.is_numerical_code", return_value=False),
-            patch("codeflash_python.verification._baseline.establish_original_code_baseline"),
+            patch(
                "codeflash_python.verification._baseline.establish_original_code_baseline"
            ),
        ):
            ctx = OptimizationContext(
                plugin=MagicMock(),
--- a/packages/codeflash-python/tests/test_ranking_boost.py
+++ b/packages/codeflash-python/tests/test_ranking_boost.py
@ -235,7 +235,8 @@ def test_trace_ranking_keeps_addressable_time_primary_over_test_count(
            return addressable_times[function.function_name]
    with patch(
-        "codeflash_python.analysis._function_ranking.FunctionRanker", FakeRanker
+        "codeflash_python.analysis._function_ranking.FunctionRanker",
        FakeRanker,
    ):
        ranked = rank_functions_globally(
            {project_root / "mod.py": funcs},
@ -288,7 +289,8 @@ def test_trace_ranking_uses_test_count_as_tiebreaker(
            return addressable_times[function.function_name]
    with patch(
-        "codeflash_python.analysis._function_ranking.FunctionRanker", FakeRanker
+        "codeflash_python.analysis._function_ranking.FunctionRanker",
        FakeRanker,
    ):
        ranked = rank_functions_globally(
            {project_root / "mod.py": funcs},
--- a/packages/codeflash-python/tests/test_testgen.py
+++ b/packages/codeflash-python/tests/test_testgen.py
@ -239,6 +239,7 @@ class TestModifyInspiredTests:
        assert 2 == len(import_list)
        assert all(isinstance(n, ast.ImportFrom) for n in import_list)
 class TestMergeUnitTests:
    """merge_unit_tests test merging."""
--- a/packages/codeflash-python/tests/test_verification.py
+++ b/packages/codeflash-python/tests/test_verification.py
@ -17,9 +17,9 @@ from codeflash_python.verification._verification import (
    performance_gain,
 )
 from codeflash_python.verification.models import (
    OptimizedCandidateResult,
    BehaviorDiff,
    BehaviorDiffScope,
    OptimizedCandidateResult,
 )
--- a/packages/github-app/tests/test_auth.py
+++ b/packages/github-app/tests/test_auth.py
@ -5,13 +5,13 @@ from __future__ import annotations
 import httpx
 import jwt as pyjwt
 import respx
 from helpers import WEBHOOK_SECRET
 from github_app.auth import (
    generate_jwt,
    get_installation_token,
    verify_signature,
 )
 from helpers import WEBHOOK_SECRET
 def test_generate_jwt_structure(mock_config):
--- a/packages/github-app/tests/test_config.py
+++ b/packages/github-app/tests/test_config.py
@ -7,9 +7,9 @@ from pathlib import Path
 from unittest.mock import patch
 import pytest
 from helpers import FAKE_RSA_PEM
 from github_app.config import Config, default_plugin_dir, load_private_key
 from helpers import FAKE_RSA_PEM
 def test_load_private_key_from_env():
--- a/reports/codeflash-ci-audit/app.py
+++ b/reports/codeflash-ci-audit/app.py
--- a/reports/unstructured-security/app.py
+++ b/reports/unstructured-security/app.py
@ -9,9 +9,10 @@ findings identified during the performance engagement:
 """
 import json
 import re
 from pathlib import Path
-from dash import Dash, clientside_callback, html, Input, Output
+from dash import Dash, Input, Output, clientside_callback, html
 from theme import (
    ACCENT,
    AMBER,
@ -20,15 +21,12 @@ from theme import (
    CARD,
    CARD_BG,
    CARD_BORDER,
    DARK,
    FONT,
    GRAY,
    GREEN,
    LIGHT_GRAY,
    LIGHT_GREEN,
    LIGHT_RED,
    MONO,
    PURPLE,
    RED,
    SLATE,
    WHITE,
@ -60,10 +58,10 @@ SEVERITY_BG = {
 }
 STATUS_COLORS = {
-    "fixed": "#34d399",        # emerald-400 (distinct from severity green)
+    "fixed": "#34d399",  # emerald-400 (distinct from severity green)
    "partially-fixed": "#c084fc",  # purple-400 (avoids orange/amber overlap)
-    "open": "#f87171",         # red-400 (open = still a problem)
+    "open": "#f87171",  # red-400 (open = still a problem)
-    "active": "#22d3ee",       # cyan-400 (distinct from severity blue)
+    "active": "#22d3ee",  # cyan-400 (distinct from severity blue)
 }
 _TAB_BTN_STYLE = {
@ -122,11 +120,11 @@ def card(children, **kw):
 SEVERITY_ICONS = {
-    "critical": "\u25cf",   # filled circle
+    "critical": "\u25cf",  # filled circle
-    "high": "\u25b2",       # filled triangle
+    "high": "\u25b2",  # filled triangle
-    "medium": "\u25c6",     # filled diamond
+    "medium": "\u25c6",  # filled diamond
-    "low": "\u25cb",        # open circle
+    "low": "\u25cb",  # open circle
-    "info": "\u2139",       # info symbol
+    "info": "\u2139",  # info symbol
 }
@ -170,8 +168,6 @@ def status_badge(status):
    )
 import re
 _REPO_URLS = {
    "core-product": "https://github.com/Unstructured-IO/core-product/pull",
    "github-workflows": "https://github.com/Unstructured-IO/github-workflows/pull",
@ -213,7 +209,9 @@ def _linkify_fixed_by(text, repo):
                )
            else:
                result.append(
-                    html.Span(part, style={"color": GREEN, "fontWeight": "600"})
+                    html.Span(
                        part, style={"color": GREEN, "fontWeight": "600"}
                    )
                )
        else:
            result.append(
@ -387,7 +385,11 @@ def finding_card(f):
                    severity_badge(sev),
                    status_badge(f["status"]),
                ],
-                style={"display": "flex", "gap": "8px", "alignItems": "center"},
+                style={
                    "display": "flex",
                    "gap": "8px",
                    "alignItems": "center",
                },
            ),
        ],
        style={
@ -592,7 +594,10 @@ _SEV_PRIORITY = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
 def _finding_sort_key(f):
-    return (_SEV_PRIORITY.get(f["severity"], 9), _CATEGORY_PRIORITY.get(f["category"], 9))
+    return (
        _SEV_PRIORITY.get(f["severity"], 9),
        _CATEGORY_PRIORITY.get(f["category"], 9),
    )
 _CRITICAL_HIGH = sorted(
@ -654,7 +659,10 @@ def _build_summary_tab():
                            " and ",
                            html.Span(
                                f"{SUMMARY['high']} high",
-                                style={"fontWeight": "700", "color": "#f97316"},
+                                style={
                                    "fontWeight": "700",
                                    "color": "#f97316",
                                },
                            ),
                            " severity issues. The lockfile-bypass pattern you fixed in core-product "
                            "(PR #1465) still persists in CI steps and Makefiles across the org. "
@ -738,7 +746,7 @@ def _build_summary_tab():
                        },
                    ),
                    html.Div(
-                        f"of findings remain unresolved",
+                        "of findings remain unresolved",
                        style={
                            "fontSize": "20px",
                            "fontWeight": "600",
@ -781,7 +789,7 @@ def _build_critical_high_tab():
        style={"display": "none"},
        children=[
            section(
-                f"Critical & High Findings",
+                "Critical & High Findings",
                f"{n} findings \u2014 supply chain, container, CI/CD, and secrets",
            ),
            *[finding_card(f) for f in _CRITICAL_HIGH],
@ -797,7 +805,7 @@ def _build_medium_low_tab():
        style={"display": "none"},
        children=[
            section(
-                f"Medium & Low Findings",
+                "Medium & Low Findings",
                f"{n} findings for planned remediation",
            ),
            *[finding_card(f) for f in _MEDIUM_LOW],
@ -999,7 +1007,9 @@ app.layout = html.Div(
                        "zIndex": "1",
                    },
                    children=[
-                        _hero_metric(str(SUMMARY["critical"]), "Critical", RED),
+                        _hero_metric(
                            str(SUMMARY["critical"]), "Critical", RED
                        ),
                        _hero_metric(str(SUMMARY["high"]), "High", "#f97316"),
                        _hero_metric(str(SUMMARY["medium"]), "Medium", AMBER),
                        _hero_metric(str(SUMMARY["low"]), "Low", BLUE),
@ -1027,10 +1037,30 @@ app.layout = html.Div(
                                "border": f"1px solid {CARD_BORDER}",
                            },
                            children=[
-                                html.Button("Summary", id="btn-summary", n_clicks=1, style=_TAB_BTN_ACTIVE),
+                                html.Button(
-                                html.Button(f"Critical & High ({len(_CRITICAL_HIGH)})", id="btn-crit-high", n_clicks=0, style=_TAB_BTN_STYLE),
+                                    "Summary",
-                                html.Button(f"Medium & Low ({len(_MEDIUM_LOW)})", id="btn-med-low", n_clicks=0, style=_TAB_BTN_STYLE),
+                                    id="btn-summary",
-                                html.Button("By Category", id="btn-category", n_clicks=0, style=_TAB_BTN_STYLE),
+                                    n_clicks=1,
                                    style=_TAB_BTN_ACTIVE,
                                ),
                                html.Button(
                                    f"Critical & High ({len(_CRITICAL_HIGH)})",
                                    id="btn-crit-high",
                                    n_clicks=0,
                                    style=_TAB_BTN_STYLE,
                                ),
                                html.Button(
                                    f"Medium & Low ({len(_MEDIUM_LOW)})",
                                    id="btn-med-low",
                                    n_clicks=0,
                                    style=_TAB_BTN_STYLE,
                                ),
                                html.Button(
                                    "By Category",
                                    id="btn-category",
                                    n_clicks=0,
                                    style=_TAB_BTN_STYLE,
                                ),
                            ],
                        ),
                    ],
--- a/reports/unstructured/engagement_report.py
+++ b/reports/unstructured/engagement_report.py
--- a/reports/unstructured/security_report.py
+++ b/reports/unstructured/security_report.py
@ -9,9 +9,10 @@ findings identified during the performance engagement:
 """
 import json
 import re
 from pathlib import Path
-from dash import Dash, clientside_callback, html, Input, Output
+from dash import Dash, Input, Output, clientside_callback, html
 from theme import (
    ACCENT,
    AMBER,
@ -20,15 +21,12 @@ from theme import (
    CARD,
    CARD_BG,
    CARD_BORDER,
    DARK,
    FONT,
    GRAY,
    GREEN,
    LIGHT_GRAY,
    LIGHT_GREEN,
    LIGHT_RED,
    MONO,
    PURPLE,
    RED,
    SLATE,
    WHITE,
@ -60,10 +58,10 @@ SEVERITY_BG = {
 }
 STATUS_COLORS = {
-    "fixed": "#34d399",        # emerald-400 (distinct from severity green)
+    "fixed": "#34d399",  # emerald-400 (distinct from severity green)
    "partially-fixed": "#c084fc",  # purple-400 (avoids orange/amber overlap)
-    "open": "#f87171",         # red-400 (open = still a problem)
+    "open": "#f87171",  # red-400 (open = still a problem)
-    "active": "#22d3ee",       # cyan-400 (distinct from severity blue)
+    "active": "#22d3ee",  # cyan-400 (distinct from severity blue)
 }
 _TAB_BTN_STYLE = {
@ -122,11 +120,11 @@ def card(children, **kw):
 SEVERITY_ICONS = {
-    "critical": "\u25cf",   # filled circle
+    "critical": "\u25cf",  # filled circle
-    "high": "\u25b2",       # filled triangle
+    "high": "\u25b2",  # filled triangle
-    "medium": "\u25c6",     # filled diamond
+    "medium": "\u25c6",  # filled diamond
-    "low": "\u25cb",        # open circle
+    "low": "\u25cb",  # open circle
-    "info": "\u2139",       # info symbol
+    "info": "\u2139",  # info symbol
 }
@ -170,8 +168,6 @@ def status_badge(status):
    )
 import re
 _REPO_URLS = {
    "core-product": "https://github.com/Unstructured-IO/core-product/pull",
    "github-workflows": "https://github.com/Unstructured-IO/github-workflows/pull",
@ -213,7 +209,9 @@ def _linkify_fixed_by(text, repo):
                )
            else:
                result.append(
-                    html.Span(part, style={"color": GREEN, "fontWeight": "600"})
+                    html.Span(
                        part, style={"color": GREEN, "fontWeight": "600"}
                    )
                )
        else:
            result.append(
@ -387,7 +385,11 @@ def finding_card(f):
                    severity_badge(sev),
                    status_badge(f["status"]),
                ],
-                style={"display": "flex", "gap": "8px", "alignItems": "center"},
+                style={
                    "display": "flex",
                    "gap": "8px",
                    "alignItems": "center",
                },
            ),
        ],
        style={
@ -592,7 +594,10 @@ _SEV_PRIORITY = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
 def _finding_sort_key(f):
-    return (_SEV_PRIORITY.get(f["severity"], 9), _CATEGORY_PRIORITY.get(f["category"], 9))
+    return (
        _SEV_PRIORITY.get(f["severity"], 9),
        _CATEGORY_PRIORITY.get(f["category"], 9),
    )
 _CRITICAL_HIGH = sorted(
@ -654,7 +659,10 @@ def _build_summary_tab():
                            " and ",
                            html.Span(
                                f"{SUMMARY['high']} high",
-                                style={"fontWeight": "700", "color": "#f97316"},
+                                style={
                                    "fontWeight": "700",
                                    "color": "#f97316",
                                },
                            ),
                            " severity issues. The lockfile-bypass pattern you fixed in core-product "
                            "(PR #1465) still persists in CI steps and Makefiles across the org. "
@ -738,7 +746,7 @@ def _build_summary_tab():
                        },
                    ),
                    html.Div(
-                        f"of findings remain unresolved",
+                        "of findings remain unresolved",
                        style={
                            "fontSize": "20px",
                            "fontWeight": "600",
@ -781,7 +789,7 @@ def _build_critical_high_tab():
        style={"display": "none"},
        children=[
            section(
-                f"Critical & High Findings",
+                "Critical & High Findings",
                f"{n} findings \u2014 supply chain, container, CI/CD, and secrets",
            ),
            *[finding_card(f) for f in _CRITICAL_HIGH],
@ -797,7 +805,7 @@ def _build_medium_low_tab():
        style={"display": "none"},
        children=[
            section(
-                f"Medium & Low Findings",
+                "Medium & Low Findings",
                f"{n} findings for planned remediation",
            ),
            *[finding_card(f) for f in _MEDIUM_LOW],
@ -999,7 +1007,9 @@ app.layout = html.Div(
                        "zIndex": "1",
                    },
                    children=[
-                        _hero_metric(str(SUMMARY["critical"]), "Critical", RED),
+                        _hero_metric(
                            str(SUMMARY["critical"]), "Critical", RED
                        ),
                        _hero_metric(str(SUMMARY["high"]), "High", "#f97316"),
                        _hero_metric(str(SUMMARY["medium"]), "Medium", AMBER),
                        _hero_metric(str(SUMMARY["low"]), "Low", BLUE),
@ -1027,10 +1037,30 @@ app.layout = html.Div(
                                "border": f"1px solid {CARD_BORDER}",
                            },
                            children=[
-                                html.Button("Summary", id="btn-summary", n_clicks=1, style=_TAB_BTN_ACTIVE),
+                                html.Button(
-                                html.Button(f"Critical & High ({len(_CRITICAL_HIGH)})", id="btn-crit-high", n_clicks=0, style=_TAB_BTN_STYLE),
+                                    "Summary",
-                                html.Button(f"Medium & Low ({len(_MEDIUM_LOW)})", id="btn-med-low", n_clicks=0, style=_TAB_BTN_STYLE),
+                                    id="btn-summary",
-                                html.Button("By Category", id="btn-category", n_clicks=0, style=_TAB_BTN_STYLE),
+                                    n_clicks=1,
                                    style=_TAB_BTN_ACTIVE,
                                ),
                                html.Button(
                                    f"Critical & High ({len(_CRITICAL_HIGH)})",
                                    id="btn-crit-high",
                                    n_clicks=0,
                                    style=_TAB_BTN_STYLE,
                                ),
                                html.Button(
                                    f"Medium & Low ({len(_MEDIUM_LOW)})",
                                    id="btn-med-low",
                                    n_clicks=0,
                                    style=_TAB_BTN_STYLE,
                                ),
                                html.Button(
                                    "By Category",
                                    id="btn-category",
                                    n_clicks=0,
                                    style=_TAB_BTN_STYLE,
                                ),
                            ],
                        ),
                    ],