Many fixes, concolic test coverage.

2026-05-04 18:25:17 +00:00 · 2024-11-07 11:33:10 -08:00 · 2024-11-07 11:33:10 -08:00 · c2a08cddb8
commit c2a08cddb8
parent 96db48c240
5 changed files with 194 additions and 103 deletions
--- a/code_to_optimize/typed_topological_sort.py
+++ b/code_to_optimize/typed_topological_sort.py
@ -0,0 +1,29 @@
+from collections import defaultdict
+
+
+class Graph:
+    def __init__(self, vertices: int) -> None:
+        self.graph: dict[int, list[int]] = defaultdict(list)
+        self.V: int = vertices  # No. of vertices
+
+    def addEdge(self, u: int, v: int) -> None:
+        self.graph[u].append(v)
+
+    def topologicalSortUtil(self, v: int, visited: list[bool], stack: list[int]) -> None:
+        visited[v] = True
+
+        for i in self.graph[v]:
+            if visited[i] == False:
+                self.topologicalSortUtil(i, visited, stack)
+
+        stack.insert(0, v)
+
+    def topologicalSort(self) -> list[int]:
+        visited: list[bool] = [False] * self.V
+        stack: list[int] = []
+
+        for i in range(self.V):
+            if visited[i] == False:
+                self.topologicalSortUtil(i, visited, stack)
+
+        return stack
--- a/codeflash/code_utils/static_analysis.py
+++ b/codeflash/code_utils/static_analysis.py
@ -80,7 +80,7 @@ def get_module_file_path(module_name: str, project_root: Path) -> Path | None:
    return None


-def analyze_imported_internal_modules(
+def analyze_imported_modules(
    code_str: str, module_file_path: Path, project_root: Path
 ) -> list[ImportedInternalModuleAnalysis]:
    """Statically finds and analyzes all imported internal modules."""
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@ -15,6 +15,11 @@ from codeflash.verification.test_results import TestResults, TestType
 # of the module is foo.eggs.


+class ValidCode(BaseModel, frozen=True):
+    source_code: str
+    normalized_code: str
+
+
 class DiffbehaviorReturnCode(IntEnum):
    NO_DIFFERENCES = 0
    COUNTER_EXAMPLES = 1
@ -79,7 +84,8 @@ class TestFiles(BaseModel):
        if test_file not in self.test_files:
            self.test_files.append(test_file)
        else:
-            raise ValueError("Test file already exists in the list")
+            msg = "Test file already exists in the list"
+            raise ValueError(msg)

    def get_by_original_file_path(self, file_path: Path) -> TestFile | None:
        return next((test_file for test_file in self.test_files if test_file.original_file_path == file_path), None)
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@ -2,6 +2,7 @@ from __future__ import annotations

 import concurrent.futures
 import os
+import re
 import shutil
 import subprocess
 import tempfile
@ -44,7 +45,7 @@ from codeflash.code_utils.formatter import format_code, sort_imports
 from codeflash.code_utils.git_utils import git_root_dir
 from codeflash.code_utils.instrument_existing_tests import inject_profiling_into_existing_test
 from codeflash.code_utils.remove_generated_tests import remove_functions_from_generated_tests
-from codeflash.code_utils.static_analysis import analyze_imported_internal_modules
+from codeflash.code_utils.static_analysis import analyze_imported_modules
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.discovery.discover_unit_tests import discover_unit_tests
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize, get_functions_to_optimize
@ -61,6 +62,7 @@ from codeflash.models.models import (
    OriginalCodeBaseline,
    TestFile,
    TestFiles,
+    ValidCode,
 )
 from codeflash.optimization.function_context import get_constrained_function_context_and_helper_functions
 from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
@ -138,34 +140,49 @@ class Optimizer:
            logger.info(f"Discovered {num_discovered_tests} existing unit tests in {self.test_cfg.tests_root}")
            console.rule()
            ph("cli-optimize-discovered-tests", {"num_tests": num_discovered_tests})
+
            # TODO CROSSHAIR: Handle no git case.
            git_root = git_root_dir()

-            for path in file_to_funcs_to_optimize:
-                original_module_path = Path(path)
+            for original_module_path in file_to_funcs_to_optimize:
                logger.info(f"Examining file {original_module_path!s}…")
-                # TODO CROSSHAIR Check for IO errors with try block, factor out code extraction and validation.
-                original_code: str = original_module_path.read_text(encoding="utf8")  # TODO CROSSHAIR Parse, Validate

-                original_code_imported_module_analysis = analyze_imported_internal_modules(
-                    original_code, original_module_path, self.args.project_root
+                original_module_code: str = original_module_path.read_text(encoding="utf8")
+                imported_module_analyses = analyze_imported_modules(
+                    original_module_code, original_module_path, self.args.project_root
                )
-                imported_internal_module_information: dict[Path, dict[str, str]] = {}
-                for analysis in original_code_imported_module_analysis:
-                    # TODO CROSSHAIR Check for IO errors, factor out.
-                    imported_internal_module_information[analysis.file_path] = {
-                        "name": analysis.name,
-                        "full_name": analysis.full_name,
-                        # TODO CROSSHAIR Parse, Validate
-                        "code": analysis.file_path.read_text(encoding="utf8"),  # TODO CROSSHAIR Separate try block?
-                    }
-                    # TODO CROSSHAIR Parse/validate ast, norm. ast, libcst. New pydantic model for code.
+                callee_module_paths = {analysis.file_path for analysis in imported_module_analyses}

-                for function_to_optimize in file_to_funcs_to_optimize[path]:
-                    # TODO CROSSHAIR Factor out in git tools. Handle no git case.
-                    # TODO CROSSHAIR Pydantic model for worktree info: work_tree_root, work_trees. work_tree_branches
+                try:
+                    normalized_original_module_code = normalize_code(original_module_code)
+                except SyntaxError as e:
+                    logger.warning(f"Syntax error parsing code in {original_module_path}: {e}")
+                    continue
+                validated_original_code: dict[Path, ValidCode] = {
+                    original_module_path: ValidCode(
+                        source_code=original_module_code, normalized_code=normalized_original_module_code
+                    )
+                }
+
+                has_syntax_error = False
+                for analysis in imported_module_analyses:
+                    callee_original_code = analysis.file_path.read_text(encoding="utf8")
+                    try:
+                        normalized_callee_original_code = normalize_code(callee_original_code)
+                    except SyntaxError as e:
+                        logger.warning(f"Syntax error parsing code in {analysis.file_path}: {e}")
+                        has_syntax_error = True
+                        break
+                    validated_original_code[analysis.file_path] = ValidCode(
+                        source_code=callee_original_code, normalized_code=normalized_callee_original_code
+                    )
+                if has_syntax_error:
+                    continue
+
+                for function_to_optimize in file_to_funcs_to_optimize[original_module_path]:
                    worktree_root: Path = Path(tempfile.mkdtemp())
                    worktrees: list[Path] = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES + 1)]
+                    # TODO CROSSHAIR Handle no git case.
                    for worktree in worktrees:
                        subprocess.run(
                            ["git", "worktree", "add", "-d", worktree], cwd=self.args.module_root, check=True
@ -180,8 +197,8 @@ class Optimizer:
                    best_optimization = self.optimize_function(
                        function_to_optimize,
                        function_to_tests,
-                        original_code,
-                        imported_internal_module_information,
+                        callee_module_paths,
+                        validated_original_code,
                        worktree_root,
                        worktrees,
                        git_root,
@ -218,8 +235,8 @@ class Optimizer:
        self,
        function_to_optimize: FunctionToOptimize,
        function_to_tests: dict[str, list[FunctionCalledInTest]],
-        original_code: str,
-        imported_internal_module_information: dict[Path, dict[str, str]],
+        callee_module_paths: set[Path],
+        validated_original_code: dict[Path, ValidCode],
        worktree_root: Path,
        worktrees: list[Path],
        git_root: Path,
@ -230,7 +247,11 @@ class Optimizer:
        ph("cli-optimize-function-start", {"function_trace_id": function_trace_id})
        self.cleanup_leftover_test_return_values()
        file_name_from_test_module_name.cache_clear()
-        ctx_result = self.get_code_optimization_context(function_to_optimize, self.args.project_root, original_code)
+        ctx_result = self.get_code_optimization_context(
+            function_to_optimize,
+            self.args.project_root,
+            validated_original_code[function_to_optimize.file_path].source_code,
+        )
        if not is_successful(ctx_result):
            return Failure(ctx_result.failure())
        code_context: CodeOptimizationContext = ctx_result.unwrap()
@ -243,7 +264,7 @@ class Optimizer:

        code_print(code_context.code_to_optimize_with_helpers)

-        module_path = module_name_from_file_path(function_to_optimize.file_path, self.args.project_root)
+        original_module_path = module_name_from_file_path(function_to_optimize.file_path, self.args.project_root)

        for module_abspath in original_helper_code:
            code_context.code_to_optimize_with_helpers = add_needed_imports_from_module(
@ -265,7 +286,7 @@ class Optimizer:
                code_context.code_to_optimize_with_helpers,
                function_to_optimize,
                code_context.helper_functions,
-                Path(module_path),
+                Path(original_module_path),
                function_trace_id,
                run_experiment=should_run_experiment,
            )
@ -298,7 +319,7 @@ class Optimizer:
        function_to_optimize_qualified_name = function_to_optimize.qualified_name
        baseline_result = self.establish_original_code_baseline(
            function_to_optimize_qualified_name,
-            function_to_tests.get(module_path + "." + function_to_optimize_qualified_name, []),
+            function_to_tests.get(original_module_path + "." + function_to_optimize_qualified_name, []),
        )
        console.rule()
        if not is_successful(baseline_result):
@ -311,13 +332,14 @@ class Optimizer:
        original_code_baseline, test_functions_to_remove = baseline_result.unwrap()

        best_optimization = None
+
        for u, candidates in enumerate([optimizations_set.control, optimizations_set.experiment]):
            if candidates is None:
                continue

-            optimized_module_code_strings = {
+            initial_optimized_code = {
                candidate.optimization_id: replace_functions_and_add_imports(
-                    original_code,
+                    validated_original_code[function_to_optimize.file_path].source_code,
                    [function_to_optimize_qualified_name],
                    candidate.source_code,
                    function_to_optimize.file_path,
@ -328,78 +350,62 @@ class Optimizer:
                )
                for candidate in candidates
            }
-            callee_module_paths = {callee.file_path for callee in code_context.helper_functions}
-            optimized_callee_modules_code_strings = {
+            callee_original_code = {
+                module_path: validated_original_code[module_path].source_code for module_path in callee_module_paths
+            }
+            intermediate_original_code = {
+                candidate.optimization_id: callee_original_code for candidate in candidates
+            } | {
                candidate.optimization_id: {
-                    callee_module_path: replace_functions_and_add_imports(
-                        imported_internal_module_information[callee_module_path]["code"],
+                    function_to_optimize.file_path: initial_optimized_code[candidate.optimization_id]
+                }
+                for candidate in candidates
+            }
+            module_paths = callee_module_paths | {function_to_optimize.file_path}
+            optimized_code = {
+                candidate.optimization_id: {
+                    module_path: replace_functions_and_add_imports(
+                        intermediate_original_code[candidate.optimization_id][module_path],
                        (
                            [
                                callee.qualified_name
                                for callee in code_context.helper_functions
-                                if callee.file_path == callee_module_path and callee.jedi_definition.type != "class"
+                                if callee.file_path == module_path and callee.jedi_definition.type != "class"
                            ]
                        ),
                        candidate.source_code,
                        function_to_optimize.file_path,
-                        callee_module_path,
-                        code_context.preexisting_objects,
+                        module_path,
+                        [],
                        code_context.contextual_dunder_methods,
                        self.args.project_root,
                    )
-                    for callee_module_path in callee_module_paths
+                    for module_path in module_paths
                }
                for candidate in candidates
            }

-            normalized_original_code = normalize_code(original_code)
-            are_optimized_module_code_strings_zero_diff = {
-                candidate.optimization_id: normalize_code(optimized_module_code_strings[candidate.optimization_id])
-                == normalized_original_code
-                for candidate in candidates
-            }
-            normalized_callee_module_code_strings = {
-                callee_module_path: normalize_code(imported_internal_module_information[callee_module_path]["code"])
-                for callee_module_path in callee_module_paths
-            }
            are_optimized_callee_module_code_strings_zero_diff = {
                candidate.optimization_id: {
-                    callee_module_path: normalize_code(
-                        optimized_callee_modules_code_strings[candidate.optimization_id][callee_module_path]
-                    )
-                    == normalized_callee_module_code_strings[callee_module_path]
-                    for callee_module_path in callee_module_paths
+                    callee_module_path: normalize_code(optimized_code[candidate.optimization_id][callee_module_path])
+                    == validated_original_code[callee_module_path].normalized_code
+                    for callee_module_path in module_paths
                }
                for candidate in candidates
            }
-
            candidates_with_diffs = [
                candidate
                for candidate in candidates
-                if not (
-                    are_optimized_module_code_strings_zero_diff[candidate.optimization_id]
-                    and all(are_optimized_callee_module_code_strings_zero_diff[candidate.optimization_id].values())
-                )
+                if not all(are_optimized_callee_module_code_strings_zero_diff[candidate.optimization_id].values())
            ]

-            # TODO CROSSHAIR: refactor filtering + write as single function. Put under try/except block. TODO
-            #  Crosshair: Precalculate or factor out repeated function_to_optimize.file_path.relative_to(
-            #  self.args.module_root)
            for candidate, worktree in zip(candidates_with_diffs, worktrees[1:]):
-                if are_optimized_module_code_strings_zero_diff[candidate.optimization_id]:
-                    (worktree / function_to_optimize.file_path.relative_to(git_root)).write_text(
-                        optimized_module_code_strings[candidate.optimization_id], encoding="utf8"
-                    )
-                for callee_module_path in optimized_callee_modules_code_strings[candidate.optimization_id]:
-                    if are_optimized_callee_module_code_strings_zero_diff[candidate.optimization_id][
-                        callee_module_path
-                    ]:
-                        (worktree / callee_module_path.relative_to(git_root)).write_text(
-                            optimized_callee_modules_code_strings[candidate.optimization_id][callee_module_path],
-                            encoding="utf8",
+                for module_path in optimized_code[candidate.optimization_id]:
+                    if are_optimized_callee_module_code_strings_zero_diff[candidate.optimization_id][module_path]:
+                        (worktree / module_path.relative_to(git_root)).write_text(
+                            optimized_code[candidate.optimization_id][module_path], encoding="utf8"
                        )

-            # TODO Crosshair: Factor out relative path munging code, repeated.
            function_to_optimize_original_worktree_fqn = (
                str(worktrees[0].name / function_to_optimize.file_path.relative_to(git_root).with_suffix("")).replace(
                    "/", "."
@ -409,21 +415,25 @@ class Optimizer:
            )

            diffbehavior_results: dict[str, DiffbehaviorReturnCode] = {}
+            logger.info("Running concolic behavior correctness checking…")
+            console.rule()
            for candidate_index, candidate in enumerate(candidates_with_diffs, start=1):
                logger.info(f"Optimization candidate {candidate_index}/{len(candidates_with_diffs)}:")
                code_print(candidate.source_code)
+                function_to_optimize_optimized_worktree_fqn = (
+                    str(
+                        worktrees[candidate_index].name
+                        / function_to_optimize.file_path.relative_to(git_root).with_suffix("")
+                    ).replace("/", ".")
+                    + "."
+                    + function_to_optimize_qualified_name
+                )
                result = subprocess.run(
                    [
                        "crosshair",
                        "diffbehavior",
-                        "--max_uninteresting_iterations",
-                        "64",
-                        str(
-                            worktrees[candidate_index].name
-                            / function_to_optimize.file_path.relative_to(git_root).with_suffix("")
-                        ).replace("/", ".")
-                        + "."
-                        + function_to_optimize_qualified_name,
+                        "--max_uninteresting_iterations=128",
+                        function_to_optimize_optimized_worktree_fqn,
                        function_to_optimize_original_worktree_fqn,
                    ],
                    capture_output=True,
@ -431,20 +441,55 @@ class Optimizer:
                    cwd=worktree_root,
                    check=False,
                )
+                tests = subprocess.run(
+                    [
+                        "crosshair",
+                        "cover",
+                        "--example_output_format=pytest",
+                        "--max_uninteresting_iterations=128",
+                        function_to_optimize_optimized_worktree_fqn,
+                    ],
+                    capture_output=True,
+                    text=True,
+                    cwd=worktree_root,
+                    check=False,
+                )

                if result.returncode == DiffbehaviorReturnCode.ERROR:
                    diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.ERROR
-                    logger.info("Inconclusive results from concolic behavior correctness check.")
+                    logger.info("Inconclusive results from concolic behavior correctness checking.")
                    logger.warning(
                        f"Error running crosshair diffbehavior{': ' + result.stderr if result.stderr else '.'}"
                    )
                elif result.returncode == DiffbehaviorReturnCode.COUNTER_EXAMPLES:
-                    diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.COUNTER_EXAMPLES
-                    logger.info(f"Optimization candidate failed concolic behavior correctness check:\n{result.stdout}")
+                    split_counter_examples = re.split("(Given: )", result.stdout)[1:]
+                    joined_counter_examples = [
+                        "".join(map(str, split_counter_examples[i : i + 2]))
+                        for i in range(0, len(split_counter_examples), 2)
+                    ]
+                    concrete_counter_examples = "".join(
+                        [elt for elt in joined_counter_examples if not re.search(r" object at 0x[0-9a-fA-F]+", elt)]
+                    )
+                    if concrete_counter_examples:
+                        diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.COUNTER_EXAMPLES
+                        logger.info(
+                            f"Optimization candidate failed concolic behavior correctness "
+                            f"checking:\n{concrete_counter_examples}"
+                        )
+                        if result.stdout != concrete_counter_examples:
+                            objectid_counter_examples = "".join(
+                                [elt for elt in joined_counter_examples if re.search(r" object at 0x[0-9a-fA-F]+", elt)]
+                            )
+                            logger.warning(f"Counter-examples with object ID found:\n{objectid_counter_examples}")
+                    else:
+                        diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.ERROR
+                        logger.info("Inconclusive results from concolic behavior correctness checking.")
+                        console.rule()
+                        logger.warning(f"Counter-examples with object ID found:\n{result.stdout}")
                elif result.returncode == DiffbehaviorReturnCode.NO_DIFFERENCES:
                    diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.NO_DIFFERENCES
                    logger.info(
-                        f"Optimization candidate passed concolic behavior correctness check"
+                        f"Optimization candidate passed concolic behavior correctness checking"
                        f"{': ' + chr(10) + result.stdout.split(chr(10), 1)[0] if chr(10) in result.stdout else '.'}"
                    )
                    if result.stdout.endswith("All paths exhausted, functions are likely the same!\n"):
@ -452,8 +497,11 @@ class Optimizer:
                    else:
                        logger.warning("Consider increasing the --max_uninteresting_iterations option.")
                else:
-                    logger.info("Inconclusive results from concolic behavior correctness check.")
+                    logger.info("Inconclusive results from concolic behavior correctness checking.")
                    logger.error("Unknown return code running crosshair diffbehavior.")
+                console.rule()
+                logger.info(f"Tests generated through concolic coverage checking:\n{tests.stdout}")
+                console.rule()

            tests_in_file: list[FunctionCalledInTest] = function_to_tests.get(
                function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root), []
@ -463,7 +511,7 @@ class Optimizer:
                candidates=candidates_with_diffs,
                code_context=code_context,
                function_to_optimize=function_to_optimize,
-                original_code=original_code,
+                original_code=validated_original_code[function_to_optimize.file_path].source_code,
                original_code_baseline=original_code_baseline,
                original_helper_code=original_helper_code,
                function_trace_id=function_trace_id[:-4] + f"EXP{u}" if should_run_experiment else function_trace_id,
@ -503,7 +551,9 @@ class Optimizer:
                )

                new_code, new_helper_code = self.reformat_code_and_helpers(
-                    code_context.helper_functions, explanation.file_path, original_code
+                    code_context.helper_functions,
+                    explanation.file_path,
+                    validated_original_code[function_to_optimize.file_path].source_code,
                )

                existing_tests = existing_tests_source_for(
@ -513,7 +563,9 @@ class Optimizer:
                )

                original_code_combined = original_helper_code.copy()
-                original_code_combined[explanation.file_path] = original_code
+                original_code_combined[explanation.file_path] = validated_original_code[
+                    function_to_optimize.file_path
+                ].source_code
                new_code_combined = new_helper_code.copy()
                new_code_combined[explanation.file_path] = new_code
                if not self.args.no_pr:
@ -528,7 +580,11 @@ class Optimizer:
                        function_trace_id=function_trace_id,
                    )
                    if self.args.all or env_utils.get_pr_number():
-                        self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
+                        self.write_code_and_helpers(
+                            validated_original_code[function_to_optimize.file_path].source_code,
+                            original_helper_code,
+                            function_to_optimize.file_path,
+                        )
        for generated_test_path in generated_tests_paths:
            generated_test_path.unlink(missing_ok=True)
        for test_paths in instrumented_unittests_created_for_function:
@ -643,7 +699,6 @@ class Optimizer:
            logger.exception(f"Optimization interrupted: {e}")
            raise

-        # TODO Crosshair: Report on regression vs concolic, false negatives vs matches
        self.aiservice_client.log_results(
            function_trace_id=function_trace_id,
            speedup_ratio=speedup_ratios,
@ -843,7 +898,8 @@ class Optimizer:
                elif test_type == TestType.REPLAY_TEST:
                    replay_test_files_count += 1
                else:
-                    raise ValueError(f"Unexpected test type: {test_type}")
+                    msg = f"Unexpected test type: {test_type}"
+                    raise ValueError(msg)
                success, injected_test = inject_profiling_into_existing_test(
                    test_path=path_obj_test_file,
                    call_positions=positions,
@ -1200,14 +1256,14 @@ class Optimizer:
            logger.warning("Concolic behavior correctness check inconclusive.")
            console.rule()

-            if (total_candidate_timing := candidate_results.total_passed_runtime()) == 0:
-                logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
-                console.rule()
-            get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.bin")).unlink(missing_ok=True)
+        if (total_candidate_timing := candidate_results.total_passed_runtime()) == 0:
+            logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
+            console.rule()
+        get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.bin")).unlink(missing_ok=True)

-            get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
-            if not equal_results:
-                success = False
+        get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
+        if not equal_results:
+            success = False

        if not success:
            return Failure("Failed to run the optimization candidate.")
--- a/tests/test_static_analysis.py
+++ b/tests/test_static_analysis.py
@ -1,6 +1,6 @@
 from pathlib import Path

-from codeflash.code_utils.static_analysis import ImportedInternalModuleAnalysis, analyze_imported_internal_modules
+from codeflash.code_utils.static_analysis import ImportedInternalModuleAnalysis, analyze_imported_modules


 def test_analyze_imported_modules() -> None:
@ -36,5 +36,5 @@ def a_function():
            name="mymodule", full_name="tests.mymodule", file_path=project_root / Path("tests/mymodule.py")
        ),
    ]
-    actual_imported_module_analysis = analyze_imported_internal_modules(code_str, module_file_path, project_root)
+    actual_imported_module_analysis = analyze_imported_modules(code_str, module_file_path, project_root)
    assert set(actual_imported_module_analysis) == set(expected_imported_module_analysis)