codeflash-internal/cli/codeflash/optimization/optimizer.py

from __future__ import annotations

import ast
import concurrent.futures
import os
import shutil
import subprocess
import tempfile
import time
import uuid
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING

import isort
import libcst as cst
from rich.console import Group
from rich.panel import Panel
from rich.syntax import Syntax
from rich.tree import Tree

from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient
from codeflash.cli_cmds.console import code_print, console, logger, progress_bar
from codeflash.code_utils import env_utils
from codeflash.code_utils.code_extractor import add_needed_imports_from_module, extract_code
from codeflash.code_utils.code_replacer import normalize_code, normalize_node, replace_function_definitions_in_module
from codeflash.code_utils.code_utils import (
    cleanup_paths,
    file_name_from_test_module_name,
    get_run_tmp_file,
    module_name_from_file_path,
)
from codeflash.code_utils.config_consts import (
    INDIVIDUAL_TESTCASE_TIMEOUT,
    N_CANDIDATES,
    N_TESTS_TO_GENERATE,
    TOTAL_LOOPING_TIME,
)
from codeflash.code_utils.formatter import format_code, sort_imports
from codeflash.code_utils.instrument_existing_tests import inject_profiling_into_existing_test
from codeflash.code_utils.remove_generated_tests import remove_functions_from_generated_tests
from codeflash.code_utils.static_analysis import analyze_imported_modules, get_first_top_level_function_or_method_ast
from codeflash.code_utils.time_utils import humanize_runtime
from codeflash.context import code_context_extractor
from codeflash.discovery.discover_unit_tests import discover_unit_tests
from codeflash.discovery.functions_to_optimize import FunctionToOptimize, get_functions_to_optimize
from codeflash.either import Failure, Success, is_successful
from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import (
    BestOptimization,
    CodeOptimizationContext,
    FunctionCalledInTest,
    FunctionParent,
    GeneratedTests,
    GeneratedTestsList,
    OptimizationSet,
    OptimizedCandidateResult,
    OriginalCodeBaseline,
    TestFile,
    TestFiles,
    TestingMode,
    ValidCode,
)
from codeflash.optimization.function_context import get_constrained_function_context_and_helper_functions
from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
from codeflash.result.explanation import Explanation
from codeflash.telemetry.posthog_cf import ph
from codeflash.verification.concolic_testing import generate_concolic_tests
from codeflash.verification.equivalence import compare_test_results
from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
from codeflash.verification.parse_test_output import parse_test_results
from codeflash.verification.test_results import TestResults, TestType
from codeflash.verification.test_runner import run_behavioral_tests, run_benchmarking_tests
from codeflash.verification.verification_utils import TestConfig, get_test_file_path
from codeflash.verification.verifier import generate_tests

if TYPE_CHECKING:
    from argparse import Namespace

    from codeflash.either import Result
    from codeflash.models.models import CoverageData, FunctionSource, OptimizedCandidate


class Optimizer:
    def __init__(self, args: Namespace) -> None:
        self.args = args

        self.test_cfg = TestConfig(
            tests_root=args.tests_root,
            tests_project_rootdir=args.test_project_root,
            project_root_path=args.project_root,
            test_framework=args.test_framework,
            pytest_cmd=args.pytest_cmd,
        )

        self.aiservice_client = AiServiceClient()
        self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None)
        self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None

        self.test_files = TestFiles(test_files=[])

    def run(self) -> None:
        ph("cli-optimize-run-start")
        logger.info("Running optimizer.")
        console.rule()
        if not env_utils.ensure_codeflash_api_key():
            return

        file_to_funcs_to_optimize: dict[Path, list[FunctionToOptimize]]
        num_optimizable_functions: int
        (file_to_funcs_to_optimize, num_optimizable_functions) = get_functions_to_optimize(
            optimize_all=self.args.all,
            replay_test=self.args.replay_test,
            file=self.args.file,
            only_get_this_function=self.args.function,
            test_cfg=self.test_cfg,
            ignore_paths=self.args.ignore_paths,
            project_root=self.args.project_root,
            module_root=self.args.module_root,
        )

        optimizations_found: int = 0
        function_iterator_count: int = 0
        if self.args.test_framework == "pytest":
            self.test_cfg.concolic_test_root_dir = Path(
                tempfile.mkdtemp(dir=self.args.tests_root, prefix="codeflash_concolic_")
            )
        try:
            ph("cli-optimize-functions-to-optimize", {"num_functions": num_optimizable_functions})
            if num_optimizable_functions == 0:
                logger.info("No functions found to optimize. Exiting…")
                return

            console.rule()
            logger.info(f"Discovering existing unit tests in {self.test_cfg.tests_root}…")
            console.rule()
            function_to_tests: dict[str, list[FunctionCalledInTest]] = discover_unit_tests(self.test_cfg)
            num_discovered_tests: int = sum([len(value) for value in function_to_tests.values()])
            console.rule()
            logger.info(f"Discovered {num_discovered_tests} existing unit tests in {self.test_cfg.tests_root}")
            console.rule()
            ph("cli-optimize-discovered-tests", {"num_tests": num_discovered_tests})

            for original_module_path in file_to_funcs_to_optimize:
                logger.info(f"Examining file {original_module_path!s}…")
                console.rule()

                original_module_code: str = original_module_path.read_text(encoding="utf8")
                try:
                    original_module_ast = ast.parse(original_module_code)
                except SyntaxError as e:
                    logger.warning(f"Syntax error parsing code in {original_module_path}: {e}")
                    logger.info("Skipping optimization due to file error.")
                    continue
                normalized_original_module_code = ast.unparse(normalize_node(original_module_ast))
                validated_original_code: dict[Path, ValidCode] = {
                    original_module_path: ValidCode(
                        source_code=original_module_code, normalized_code=normalized_original_module_code
                    )
                }

                imported_module_analyses = analyze_imported_modules(
                    original_module_code, original_module_path, self.args.project_root
                )

                has_syntax_error = False
                for analysis in imported_module_analyses:
                    callee_original_code = analysis.file_path.read_text(encoding="utf8")
                    try:
                        normalized_callee_original_code = normalize_code(callee_original_code)
                    except SyntaxError as e:
                        logger.warning(f"Syntax error parsing code in callee module {analysis.file_path}: {e}")
                        logger.info("Skipping optimization due to helper file error.")
                        has_syntax_error = True
                        break
                    validated_original_code[analysis.file_path] = ValidCode(
                        source_code=callee_original_code, normalized_code=normalized_callee_original_code
                    )
                if has_syntax_error:
                    continue

                for function_to_optimize in file_to_funcs_to_optimize[original_module_path]:
                    function_iterator_count += 1
                    logger.info(
                        f"Optimizing function {function_iterator_count} of {num_optimizable_functions}: "
                        f"{function_to_optimize.qualified_name}"
                    )

                    if not (
                        function_to_optimize_ast := get_first_top_level_function_or_method_ast(
                            function_to_optimize.function_name, function_to_optimize.parents, original_module_ast
                        )
                    ):
                        logger.info(
                            f"Function {function_to_optimize.qualified_name} not found in {original_module_path}.\n"
                            f"Skipping optimization."
                        )
                        continue

                    best_optimization = self.optimize_function(
                        function_to_optimize, function_to_optimize_ast, function_to_tests, validated_original_code
                    )
                    self.test_files = TestFiles(test_files=[])
                    if is_successful(best_optimization):
                        optimizations_found += 1
                    else:
                        logger.warning(best_optimization.failure())
                        console.rule()
                        continue
            ph("cli-optimize-run-finished", {"optimizations_found": optimizations_found})
            if optimizations_found == 0:
                logger.info("❌ No optimizations found.")
            elif self.args.all:
                logger.info("✨ All functions have been optimized! ✨")
        finally:
            for test_file in self.test_files.get_by_type(TestType.GENERATED_REGRESSION).test_files:
                test_file.instrumented_behavior_file_path.unlink(missing_ok=True)
                test_file.benchmarking_file_path.unlink(missing_ok=True)
            for test_file in self.test_files.get_by_type(TestType.EXISTING_UNIT_TEST).test_files:
                test_file.instrumented_behavior_file_path.unlink(missing_ok=True)
                test_file.benchmarking_file_path.unlink(missing_ok=True)
            for test_file in self.test_files.get_by_type(TestType.CONCOLIC_COVERAGE_TEST).test_files:
                test_file.instrumented_behavior_file_path.unlink(missing_ok=True)
            if hasattr(get_run_tmp_file, "tmpdir"):
                get_run_tmp_file.tmpdir.cleanup()
            if self.test_cfg.concolic_test_root_dir:
                shutil.rmtree(self.test_cfg.concolic_test_root_dir, ignore_errors=True)

    def optimize_function(
        self,
        function_to_optimize: FunctionToOptimize,
        function_to_optimize_ast: ast.FunctionDef,
        function_to_tests: dict[str, list[FunctionCalledInTest]],
        validated_original_code: dict[Path, ValidCode],
    ) -> Result[BestOptimization, str]:
        should_run_experiment = self.experiment_id is not None
        function_trace_id: str = str(uuid.uuid4())
        logger.debug(f"Function Trace ID: {function_trace_id}")
        ph("cli-optimize-function-start", {"function_trace_id": function_trace_id})
        self.cleanup_leftover_test_return_values()
        file_name_from_test_module_name.cache_clear()
        ctx_result = self.get_code_optimization_context(
            function_to_optimize,
            self.args.project_root,
            validated_original_code[function_to_optimize.file_path].source_code,
        )
        if not is_successful(ctx_result):
            return Failure(ctx_result.failure())
        code_context: CodeOptimizationContext = ctx_result.unwrap()
        original_helper_code: dict[Path, str] = {}
        helper_function_paths = {hf.file_path for hf in code_context.helper_functions}
        for helper_function_path in helper_function_paths:
            with helper_function_path.open(encoding="utf8") as f:
                helper_code = f.read()
                original_helper_code[helper_function_path] = helper_code

        logger.info("Code to be optimized:")
        code_print(code_context.read_writable_code)

        original_module_path = module_name_from_file_path(function_to_optimize.file_path, self.args.project_root)

        for module_abspath, helper_code_source in original_helper_code.items():
            code_context.code_to_optimize_with_helpers = add_needed_imports_from_module(
                helper_code_source,
                code_context.code_to_optimize_with_helpers,
                module_abspath,
                function_to_optimize.file_path,
                self.args.project_root,
            )

        generated_test_paths = [
            get_test_file_path(
                self.test_cfg.tests_root, function_to_optimize.function_name, test_index, test_type="unit"
            )
            for test_index in range(N_TESTS_TO_GENERATE)
        ]
        generated_perf_test_paths = [
            get_test_file_path(
                self.test_cfg.tests_root, function_to_optimize.function_name, test_index, test_type="perf"
            )
            for test_index in range(N_TESTS_TO_GENERATE)
        ]

        with progress_bar(
            f"Generating new tests and optimizations for function {function_to_optimize.function_name}", transient=True
        ):
            generated_results = self.generate_tests_and_optimizations(
                code_to_optimize_with_helpers=code_context.code_to_optimize_with_helpers,
                read_writable_code=code_context.read_writable_code,
                read_only_context_code=code_context.read_only_context_code,
                function_to_optimize=function_to_optimize,
                helper_functions=code_context.helper_functions,
                module_path=Path(original_module_path),
                function_trace_id=function_trace_id,
                generated_test_paths=generated_test_paths,
                generated_perf_test_paths=generated_perf_test_paths,
                function_to_optimize_ast=function_to_optimize_ast,
                run_experiment=should_run_experiment,
            )

        if not is_successful(generated_results):
            return Failure(generated_results.failure())
        generated_tests: GeneratedTestsList
        optimizations_set: OptimizationSet
        generated_tests, function_to_concolic_tests, concolic_test_str, optimizations_set = generated_results.unwrap()
        count_tests = len(generated_tests.generated_tests)
        if concolic_test_str:
            count_tests += 1

        for i, generated_test in enumerate(generated_tests.generated_tests):
            with generated_test.behavior_file_path.open("w", encoding="utf8") as f:
                f.write(generated_test.instrumented_behavior_test_source)
            with generated_test.perf_file_path.open("w", encoding="utf8") as f:
                f.write(generated_test.instrumented_perf_test_source)
            self.test_files.add(
                TestFile(
                    instrumented_behavior_file_path=generated_test.behavior_file_path,
                    benchmarking_file_path=generated_test.perf_file_path,
                    original_file_path=None,
                    original_source=generated_test.generated_original_test_source,
                    test_type=TestType.GENERATED_REGRESSION,
                    tests_in_file=None,  # This is currently unused. We can discover the tests in the file if needed.
                )
            )
            logger.info(f"Generated test {i + 1}/{count_tests}:")
            code_print(generated_test.generated_original_test_source)
        if concolic_test_str:
            logger.info(f"Generated test {count_tests}/{count_tests}:")
            code_print(concolic_test_str)

        function_to_optimize_qualified_name = function_to_optimize.qualified_name
        function_to_all_tests = {
            key: function_to_tests.get(key, []) + function_to_concolic_tests.get(key, [])
            for key in set(function_to_tests) | set(function_to_concolic_tests)
        }
        instrumented_unittests_created_for_function = self.instrument_existing_tests(
            function_to_optimize=function_to_optimize, function_to_tests=function_to_all_tests
        )

        # Get a dict of file_path_to_classes of fto and helpers_of_fto
        file_path_to_helper_classes = defaultdict(set)
        for function_source in code_context.helper_functions:
            if (
                function_source.qualified_name != function_to_optimize.qualified_name
                and "." in function_source.qualified_name
            ):
                file_path_to_helper_classes[function_source.file_path].add(function_source.qualified_name.split(".")[0])

        baseline_result = self.establish_original_code_baseline(  # this needs better typing
            function_name=function_to_optimize_qualified_name,
            function_file_path=function_to_optimize.file_path,
            code_context=code_context,
            function_to_optimize=function_to_optimize,
            original_helper_code=original_helper_code,
            file_path_to_helper_classes=file_path_to_helper_classes,
        )

        console.rule()
        paths_to_cleanup = (
            generated_test_paths + generated_perf_test_paths + list(instrumented_unittests_created_for_function)
        )

        if not is_successful(baseline_result):
            cleanup_paths(paths_to_cleanup)
            return Failure(baseline_result.failure())

        original_code_baseline, test_functions_to_remove = baseline_result.unwrap()
        if isinstance(original_code_baseline, OriginalCodeBaseline) and not coverage_critic(
            original_code_baseline.coverage_results, self.args.test_framework
        ):
            cleanup_paths(paths_to_cleanup)
            return Failure("The threshold for test coverage was not met.")

        best_optimization = None

        for u, candidates in enumerate([optimizations_set.control, optimizations_set.experiment]):
            if candidates is None:
                continue

            best_optimization = self.determine_best_candidate(
                candidates=candidates,
                code_context=code_context,
                function_to_optimize=function_to_optimize,
                original_code=validated_original_code[function_to_optimize.file_path].source_code,
                original_code_baseline=original_code_baseline,
                original_helper_code=original_helper_code,
                function_trace_id=function_trace_id[:-4] + f"EXP{u}" if should_run_experiment else function_trace_id,
                file_path_to_helper_classes=file_path_to_helper_classes,
            )
            ph("cli-optimize-function-finished", {"function_trace_id": function_trace_id})

            generated_tests = remove_functions_from_generated_tests(
                generated_tests=generated_tests, test_functions_to_remove=test_functions_to_remove
            )

            if best_optimization:
                logger.info("Best candidate:")
                code_print(best_optimization.candidate.source_code)
                console.print(
                    Panel(
                        best_optimization.candidate.explanation, title="Best Candidate Explanation", border_style="blue"
                    )
                )
                explanation = Explanation(
                    raw_explanation_message=best_optimization.candidate.explanation,
                    winning_behavioral_test_results=best_optimization.winning_behavioral_test_results,
                    winning_benchmarking_test_results=best_optimization.winning_benchmarking_test_results,
                    original_runtime_ns=original_code_baseline.runtime,
                    best_runtime_ns=best_optimization.runtime,
                    function_name=function_to_optimize_qualified_name,
                    file_path=function_to_optimize.file_path,
                )

                self.log_successful_optimization(explanation, function_to_optimize, function_trace_id, generated_tests)

                self.replace_function_and_helpers_with_optimized_code(
                    code_context=code_context,
                    function_to_optimize_file_path=explanation.file_path,
                    optimized_code=best_optimization.candidate.source_code,
                    qualified_function_name=function_to_optimize_qualified_name,
                )

                new_code, new_helper_code = self.reformat_code_and_helpers(
                    code_context.helper_functions,
                    explanation.file_path,
                    validated_original_code[function_to_optimize.file_path].source_code,
                )

                existing_tests = existing_tests_source_for(
                    function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root),
                    function_to_all_tests,
                    tests_root=self.test_cfg.tests_root,
                )

                original_code_combined = original_helper_code.copy()
                original_code_combined[explanation.file_path] = validated_original_code[
                    function_to_optimize.file_path
                ].source_code
                new_code_combined = new_helper_code.copy()
                new_code_combined[explanation.file_path] = new_code
                if not self.args.no_pr:
                    coverage_message = (
                        original_code_baseline.coverage_results.build_message()
                        if original_code_baseline.coverage_results
                        else "Coverage data not available"
                    )
                    generated_tests_str = "\n\n".join(
                        [test.generated_original_test_source for test in generated_tests.generated_tests]
                    )
                    if concolic_test_str:
                        generated_tests_str += "\n\n" + concolic_test_str

                    check_create_pr(
                        original_code=original_code_combined,
                        new_code=new_code_combined,
                        explanation=explanation,
                        existing_tests_source=existing_tests,
                        generated_original_test_source=generated_tests_str,
                        function_trace_id=function_trace_id,
                        coverage_message=coverage_message,
                        git_remote=self.args.git_remote,
                    )
                    if self.args.all or env_utils.get_pr_number():
                        self.write_code_and_helpers(
                            validated_original_code[function_to_optimize.file_path].source_code,
                            original_helper_code,
                            function_to_optimize.file_path,
                        )
        for generated_test_path in generated_test_paths:
            generated_test_path.unlink(missing_ok=True)
        for generated_perf_test_path in generated_perf_test_paths:
            generated_perf_test_path.unlink(missing_ok=True)
        for test_paths in instrumented_unittests_created_for_function:
            test_paths.unlink(missing_ok=True)
        for fn in function_to_concolic_tests:
            for test in function_to_concolic_tests[fn]:
                if not test.tests_in_file.test_file.parent.exists():
                    logger.warning(
                        f"Concolic test directory {test.tests_in_file.test_file.parent} does not exist so could not be deleted."
                    )
                shutil.rmtree(test.tests_in_file.test_file.parent, ignore_errors=True)
                break  # need to delete only one test directory

        if not best_optimization:
            return Failure(f"No best optimizations found for function {function_to_optimize.qualified_name}")
        return Success(best_optimization)

    def determine_best_candidate(
        self,
        *,
        candidates: list[OptimizedCandidate],
        code_context: CodeOptimizationContext,
        function_to_optimize: FunctionToOptimize,
        original_code: str,
        original_code_baseline: OriginalCodeBaseline,
        original_helper_code: dict[Path, str],
        function_trace_id: str,
        file_path_to_helper_classes: dict[Path, set[str]],
    ) -> BestOptimization | None:
        best_optimization: BestOptimization | None = None
        best_runtime_until_now = original_code_baseline.runtime

        speedup_ratios: dict[str, float | None] = {}
        optimized_runtimes: dict[str, float | None] = {}
        is_correct = {}

        logger.info(
            f"Determining best optimization candidate (out of {len(candidates)}) for "
            f"{function_to_optimize.qualified_name}…"
        )
        console.rule()
        try:
            for candidate_index, candidate in enumerate(candidates, start=1):
                get_run_tmp_file(Path(f"test_return_values_{candidate_index}.bin")).unlink(missing_ok=True)
                get_run_tmp_file(Path(f"test_return_values_{candidate_index}.sqlite")).unlink(missing_ok=True)
                logger.info(f"Optimization candidate {candidate_index}/{len(candidates)}:")
                code_print(candidate.source_code)
                try:
                    did_update = self.replace_function_and_helpers_with_optimized_code(
                        code_context=code_context,
                        function_to_optimize_file_path=function_to_optimize.file_path,
                        optimized_code=candidate.source_code,
                        qualified_function_name=function_to_optimize.qualified_name,
                    )
                    if not did_update:
                        logger.warning(
                            "No functions were replaced in the optimized code. Skipping optimization candidate."
                        )
                        console.rule()
                        continue
                except (ValueError, SyntaxError, cst.ParserSyntaxError, AttributeError) as e:
                    logger.error(e)
                    self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
                    continue

                # Instrument codeflash capture
                run_results = self.run_optimized_candidate(
                    optimization_candidate_index=candidate_index,
                    baseline_results=original_code_baseline,
                    function_to_optimize=function_to_optimize,
                    original_helper_code=original_helper_code,
                    file_path_to_helper_classes=file_path_to_helper_classes,
                )
                console.rule()

                if not is_successful(run_results):
                    optimized_runtimes[candidate.optimization_id] = None
                    is_correct[candidate.optimization_id] = False
                    speedup_ratios[candidate.optimization_id] = None
                else:
                    candidate_result: OptimizedCandidateResult = run_results.unwrap()
                    best_test_runtime = candidate_result.best_test_runtime
                    optimized_runtimes[candidate.optimization_id] = best_test_runtime
                    is_correct[candidate.optimization_id] = True
                    perf_gain = performance_gain(
                        original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=best_test_runtime
                    )
                    speedup_ratios[candidate.optimization_id] = perf_gain

                    tree = Tree(f"Candidate #{candidate_index} - Runtime Information")
                    if speedup_critic(
                        candidate_result, original_code_baseline.runtime, best_runtime_until_now
                    ) and quantity_of_tests_critic(candidate_result):
                        tree.add("This candidate is faster than the previous best candidate. 🚀")
                        tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}")
                        tree.add(
                            f"Best summed runtime: {humanize_runtime(candidate_result.best_test_runtime)} "
                            f"(measured over {candidate_result.max_loop_count} "
                            f"loop{'s' if candidate_result.max_loop_count > 1 else ''})"
                        )
                        tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%")
                        tree.add(f"Speedup ratio: {perf_gain + 1:.1f}X")

                        best_optimization = BestOptimization(
                            candidate=candidate,
                            helper_functions=code_context.helper_functions,
                            runtime=best_test_runtime,
                            winning_behavioral_test_results=candidate_result.behavior_test_results,
                            winning_benchmarking_test_results=candidate_result.benchmarking_test_results,
                        )
                        best_runtime_until_now = best_test_runtime
                    else:
                        tree.add(
                            f"Summed runtime: {humanize_runtime(best_test_runtime)} "
                            f"(measured over {candidate_result.max_loop_count} "
                            f"loop{'s' if candidate_result.max_loop_count > 1 else ''})"
                        )
                        tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%")
                        tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X")
                    console.print(tree)
                    console.rule()

                self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
        except KeyboardInterrupt as e:
            self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
            logger.exception(f"Optimization interrupted: {e}")
            raise

        self.aiservice_client.log_results(
            function_trace_id=function_trace_id,
            speedup_ratio=speedup_ratios,
            original_runtime=original_code_baseline.runtime,
            optimized_runtime=optimized_runtimes,
            is_correct=is_correct,
        )
        return best_optimization

    def log_successful_optimization(
        self,
        explanation: Explanation,
        function_to_optimize: FunctionToOptimize,
        function_trace_id: str,
        generated_tests: GeneratedTestsList,
    ) -> None:
        explanation_panel = Panel(
            f"⚡️ Optimization successful! 📄 {function_to_optimize.qualified_name} in {explanation.file_path}\n"
            f"📈 {explanation.perf_improvement_line}\n"
            f"Explanation: \n{explanation.to_console_string()}",
            title="Optimization Summary",
            border_style="green",
        )

        if self.args.no_pr:
            tests_panel = Panel(
                Syntax(
                    "\n".join([test.generated_original_test_source for test in generated_tests.generated_tests]),
                    "python",
                    line_numbers=True,
                ),
                title="Validated Tests",
                border_style="blue",
            )

            console.print(Group(explanation_panel, tests_panel))
        console.print(explanation_panel)

        ph(
            "cli-optimize-success",
            {
                "function_trace_id": function_trace_id,
                "speedup_x": explanation.speedup_x,
                "speedup_pct": explanation.speedup_pct,
                "best_runtime": explanation.best_runtime_ns,
                "original_runtime": explanation.original_runtime_ns,
                "winning_test_results": {
                    tt.to_name(): v
                    for tt, v in explanation.winning_behavioral_test_results.get_test_pass_fail_report_by_type().items()
                },
            },
        )

    @staticmethod
    def write_code_and_helpers(original_code: str, original_helper_code: dict[Path, str], path: Path) -> None:
        with path.open("w", encoding="utf8") as f:
            f.write(original_code)
        for module_abspath in original_helper_code:
            with Path(module_abspath).open("w", encoding="utf8") as f:
                f.write(original_helper_code[module_abspath])

    def reformat_code_and_helpers(
        self, helper_functions: list[FunctionSource], path: Path, original_code: str
    ) -> tuple[str, dict[Path, str]]:
        should_sort_imports = not self.args.disable_imports_sorting
        if should_sort_imports and isort.code(original_code) != original_code:
            should_sort_imports = False

        new_code = format_code(self.args.formatter_cmds, path)
        if should_sort_imports:
            new_code = sort_imports(new_code)

        new_helper_code: dict[Path, str] = {}
        helper_functions_paths = {hf.file_path for hf in helper_functions}
        for module_abspath in helper_functions_paths:
            formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath)
            if should_sort_imports:
                formatted_helper_code = sort_imports(formatted_helper_code)
            new_helper_code[module_abspath] = formatted_helper_code

        return new_code, new_helper_code

    def replace_function_and_helpers_with_optimized_code(
        self,
        code_context: CodeOptimizationContext,
        function_to_optimize_file_path: Path,
        optimized_code: str,
        qualified_function_name: str,
    ) -> bool:
        did_update = False
        read_writable_functions_by_file_path = defaultdict(set)
        read_writable_functions_by_file_path[function_to_optimize_file_path].add(qualified_function_name)
        for helper_function in code_context.helper_functions:
            if helper_function.jedi_definition.type != "class":
                read_writable_functions_by_file_path[helper_function.file_path].add(helper_function.qualified_name)
        for module_abspath, qualified_names in read_writable_functions_by_file_path.items():
            did_update |= replace_function_definitions_in_module(
                function_names=list(qualified_names),
                optimized_code=optimized_code,
                module_abspath=module_abspath,
                preexisting_objects=code_context.preexisting_objects,
                project_root_path=self.args.project_root,
            )
        return did_update

    def get_code_optimization_context(
        self, function_to_optimize: FunctionToOptimize, project_root: Path, original_source_code: str
    ) -> Result[CodeOptimizationContext, str]:
        code_to_optimize, contextual_dunder_methods = extract_code([function_to_optimize])
        if code_to_optimize is None:
            return Failure("Could not find function to optimize.")
        (helper_code, helper_functions, helper_dunder_methods) = get_constrained_function_context_and_helper_functions(
            function_to_optimize, self.args.project_root, code_to_optimize
        )
        if function_to_optimize.parents:
            function_class = function_to_optimize.parents[0].name
            same_class_helper_methods = [
                df
                for df in helper_functions
                if df.qualified_name.count(".") > 0 and df.qualified_name.split(".")[0] == function_class
            ]
            optimizable_methods = [
                FunctionToOptimize(
                    df.qualified_name.split(".")[-1],
                    df.file_path,
                    [FunctionParent(df.qualified_name.split(".")[0], "ClassDef")],
                    None,
                    None,
                )
                for df in same_class_helper_methods
            ] + [function_to_optimize]
            dedup_optimizable_methods = []
            added_methods = set()
            for method in reversed(optimizable_methods):
                if f"{method.file_path}.{method.qualified_name}" not in added_methods:
                    dedup_optimizable_methods.append(method)
                    added_methods.add(f"{method.file_path}.{method.qualified_name}")
            if len(dedup_optimizable_methods) > 1:
                code_to_optimize, contextual_dunder_methods = extract_code(list(reversed(dedup_optimizable_methods)))
                if code_to_optimize is None:
                    return Failure("Could not find function to optimize.")
        code_to_optimize_with_helpers = helper_code + "\n" + code_to_optimize

        code_to_optimize_with_helpers_and_imports = add_needed_imports_from_module(
            original_source_code,
            code_to_optimize_with_helpers,
            function_to_optimize.file_path,
            function_to_optimize.file_path,
            project_root,
            helper_functions,
        )

        try:
            new_code_ctx = code_context_extractor.get_code_optimization_context(function_to_optimize, project_root)
        except ValueError as e:
            return Failure(str(e))

        return Success(
            CodeOptimizationContext(
                code_to_optimize_with_helpers=code_to_optimize_with_helpers_and_imports,
                read_writable_code=new_code_ctx.read_writable_code,
                read_only_context_code=new_code_ctx.read_only_context_code,
                helper_functions=new_code_ctx.helper_functions,  # only functions that are read writable
                preexisting_objects=new_code_ctx.preexisting_objects,
            )
        )

    @staticmethod
    def cleanup_leftover_test_return_values() -> None:
        # remove leftovers from previous run
        get_run_tmp_file(Path("test_return_values_0.bin")).unlink(missing_ok=True)
        get_run_tmp_file(Path("test_return_values_0.sqlite")).unlink(missing_ok=True)

    def instrument_existing_tests(
        self, function_to_optimize: FunctionToOptimize, function_to_tests: dict[str, list[FunctionCalledInTest]]
    ) -> set[Path]:
        existing_test_files_count = 0
        replay_test_files_count = 0
        concolic_coverage_test_files_count = 0
        unique_instrumented_test_files = set()

        func_qualname = function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root)
        if func_qualname not in function_to_tests:
            logger.info(f"Did not find any pre-existing tests for '{func_qualname}', will only use generated tests.")
            console.rule()
        else:
            test_file_invocation_positions = defaultdict(list[FunctionCalledInTest])
            for tests_in_file in function_to_tests.get(func_qualname):
                test_file_invocation_positions[
                    (tests_in_file.tests_in_file.test_file, tests_in_file.tests_in_file.test_type)
                ].append(tests_in_file)
            for (test_file, test_type), tests_in_file_list in test_file_invocation_positions.items():
                path_obj_test_file = Path(test_file)
                if test_type == TestType.EXISTING_UNIT_TEST:
                    existing_test_files_count += 1
                elif test_type == TestType.REPLAY_TEST:
                    replay_test_files_count += 1
                elif test_type == TestType.CONCOLIC_COVERAGE_TEST:
                    concolic_coverage_test_files_count += 1
                else:
                    msg = f"Unexpected test type: {test_type}"
                    raise ValueError(msg)
                success, injected_behavior_test = inject_profiling_into_existing_test(
                    mode=TestingMode.BEHAVIOR,
                    test_path=path_obj_test_file,
                    call_positions=[test.position for test in tests_in_file_list],
                    function_to_optimize=function_to_optimize,
                    tests_project_root=self.test_cfg.tests_project_rootdir,
                    test_framework=self.args.test_framework,
                )
                if not success:
                    continue
                success, injected_perf_test = inject_profiling_into_existing_test(
                    mode=TestingMode.PERFORMANCE,
                    test_path=path_obj_test_file,
                    call_positions=[test.position for test in tests_in_file_list],
                    function_to_optimize=function_to_optimize,
                    tests_project_root=self.test_cfg.tests_project_rootdir,
                    test_framework=self.args.test_framework,
                )
                if not success:
                    continue
                # TODO: this naming logic should be moved to a function and made more standard
                new_behavioral_test_path = Path(
                    f"{os.path.splitext(test_file)[0]}__perfinstrumented{os.path.splitext(test_file)[1]}"
                )
                new_perf_test_path = Path(
                    f"{os.path.splitext(test_file)[0]}__perfonlyinstrumented{os.path.splitext(test_file)[1]}"
                )
                if injected_behavior_test is not None:
                    with new_behavioral_test_path.open("w", encoding="utf8") as _f:
                        _f.write(injected_behavior_test)
                else:
                    msg = "injected_behavior_test is None"
                    raise ValueError(msg)
                if injected_perf_test is not None:
                    with new_perf_test_path.open("w", encoding="utf8") as _f:
                        _f.write(injected_perf_test)

                unique_instrumented_test_files.add(new_behavioral_test_path)
                unique_instrumented_test_files.add(new_perf_test_path)
                if not self.test_files.get_by_original_file_path(path_obj_test_file):
                    self.test_files.add(
                        TestFile(
                            instrumented_behavior_file_path=new_behavioral_test_path,
                            benchmarking_file_path=new_perf_test_path,
                            original_source=None,
                            original_file_path=Path(test_file),
                            test_type=test_type,
                            tests_in_file=[t.tests_in_file for t in tests_in_file_list],
                        )
                    )
            logger.info(
                f"Discovered {existing_test_files_count} existing unit test file"
                f"{'s' if existing_test_files_count != 1 else ''}, {replay_test_files_count} replay test file"
                f"{'s' if replay_test_files_count != 1 else ''}, and "
                f"{concolic_coverage_test_files_count} concolic coverage test file"
                f"{'s' if concolic_coverage_test_files_count != 1 else ''} for {func_qualname}"
            )
        return unique_instrumented_test_files

    def generate_tests_and_optimizations(
        self,
        code_to_optimize_with_helpers: str,
        read_writable_code: str,
        read_only_context_code: str,
        function_to_optimize: FunctionToOptimize,
        helper_functions: list[FunctionSource],
        module_path: Path,
        function_trace_id: str,
        generated_test_paths: list[Path],
        generated_perf_test_paths: list[Path],
        function_to_optimize_ast: ast.FunctionDef,
        run_experiment: bool = False,
    ) -> Result[tuple[GeneratedTestsList, dict[str, list[FunctionCalledInTest]], OptimizationSet], str]:
        assert len(generated_test_paths) == N_TESTS_TO_GENERATE
        max_workers = N_TESTS_TO_GENERATE + 2 if not run_experiment else N_TESTS_TO_GENERATE + 3
        console.rule()
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit the test generation task as future
            future_tests = self.generate_and_instrument_tests(
                executor,
                code_to_optimize_with_helpers,
                function_to_optimize,
                [definition.fully_qualified_name for definition in helper_functions],
                module_path,
                generated_test_paths,
                generated_perf_test_paths,
                (function_trace_id[:-4] + "EXP0" if run_experiment else function_trace_id),
            )
            future_optimization_candidates = executor.submit(
                self.aiservice_client.optimize_python_code,
                read_writable_code,
                read_only_context_code,
                function_trace_id[:-4] + "EXP0" if run_experiment else function_trace_id,
                N_CANDIDATES,
                ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
            )
            future_candidates_exp = None

            future_concolic_tests = executor.submit(
                generate_concolic_tests, self.test_cfg, self.args, function_to_optimize, function_to_optimize_ast
            )
            futures = [*future_tests, future_optimization_candidates, future_concolic_tests]
            if run_experiment:
                future_candidates_exp = executor.submit(
                    self.local_aiservice_client.optimize_python_code,
                    read_writable_code,
                    read_only_context_code,
                    function_trace_id[:-4] + "EXP1",
                    N_CANDIDATES,
                    ExperimentMetadata(id=self.experiment_id, group="experiment"),
                )
                futures.append(future_candidates_exp)

            # Wait for all futures to complete
            concurrent.futures.wait(futures)

            # Retrieve results
            candidates: list[OptimizedCandidate] = future_optimization_candidates.result()
            if not candidates:
                return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {function_to_optimize.function_name}")

            candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None

            # Process test generation results

            tests: list[GeneratedTests] = []
            for future in future_tests:
                res = future.result()
                if res:
                    (
                        generated_test_source,
                        instrumented_behavior_test_source,
                        instrumented_perf_test_source,
                        test_behavior_path,
                        test_perf_path,
                    ) = res
                    tests.append(
                        GeneratedTests(
                            generated_original_test_source=generated_test_source,
                            instrumented_behavior_test_source=instrumented_behavior_test_source,
                            instrumented_perf_test_source=instrumented_perf_test_source,
                            behavior_file_path=test_behavior_path,
                            perf_file_path=test_perf_path,
                        )
                    )
            if not tests:
                logger.warning(f"Failed to generate and instrument tests for {function_to_optimize.function_name}")
                return Failure(f"/!\\ NO TESTS GENERATED for {function_to_optimize.function_name}")
            function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
            logger.info(f"Generated {len(tests)} tests for {function_to_optimize.function_name}")
            console.rule()
            generated_tests = GeneratedTestsList(generated_tests=tests)

        return Success(
            (
                generated_tests,
                function_to_concolic_tests,
                concolic_test_str,
                OptimizationSet(control=candidates, experiment=candidates_experiment),
            )
        )

    def establish_original_code_baseline(
        self, function_name: str, function_file_path: Path, code_context: CodeOptimizationContext
    ) -> Result[tuple[OriginalCodeBaseline, list[str]], str]:
        # For the original function - run the tests and get the runtime, plus coverage
        with progress_bar(f"Establishing original code baseline for {function_name}"):
            assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]
            success = True

            test_env = os.environ.copy()
            test_env["CODEFLASH_TEST_ITERATION"] = "0"
            test_env["CODEFLASH_TRACER_DISABLE"] = "1"
            if "PYTHONPATH" not in test_env:
                test_env["PYTHONPATH"] = str(self.args.project_root)
            else:
                test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)

            coverage_results = None
            original_fto_code = function_file_path.read_text("utf-8")
            # Instrument codeflash capture
            try:
                instrument_codeflash_capture(
                    function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                )
                behavioral_results, coverage_results = self.run_and_parse_tests(
                    testing_type=TestingMode.BEHAVIOR,
                    test_env=test_env,
                    test_files=self.test_files,
                    optimization_iteration=0,
                    testing_time=TOTAL_LOOPING_TIME,
                    enable_coverage=test_framework == "pytest",
                    function_name=function_name,
                    source_file=function_file_path,
                    code_context=code_context,
                )
            finally:
                # Remove codeflash capture
                self.write_code_and_helpers(original_fto_code, original_helper_code, function_to_optimize.file_path)
            if test_framework == "pytest":
                benchmarking_results, _ = self.run_and_parse_tests(
                    testing_type=TestingMode.PERFORMANCE,
                    test_env=test_env,
                    test_files=self.test_files,
                    optimization_iteration=0,
                    testing_time=TOTAL_LOOPING_TIME,
                    enable_coverage=False,
                    function_name=function_name,
                    source_file=function_file_path,
                    code_context=code_context,
                )

            else:
                benchmarking_results = TestResults()
                start_time: float = time.time()
                for i in range(100):
                    if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME * 1.5:
                        # * 1.5 to give unittest a bit more time to run
                        break
                    test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
                    unittest_loop_results, _ = self.run_and_parse_tests(
                        testing_type=TestingMode.PERFORMANCE,
                        test_env=test_env,
                        test_files=self.test_files,
                        optimization_iteration=0,
                        testing_time=TOTAL_LOOPING_TIME,
                        enable_coverage=False,
                        function_name=function_name,
                        source_file=function_file_path,
                        code_context=code_context,
                        unittest_loop_index=i + 1,
                    )
                    benchmarking_results.merge(unittest_loop_results)

            console.print(
                TestResults.report_to_tree(
                    behavioral_results.get_test_pass_fail_report_by_type(),
                    title="Overall test results for original code",
                )
            )
            console.rule()

            total_timing = benchmarking_results.total_passed_runtime()  # caution: doesn't handle the loop index
            functions_to_remove = [
                result.id.test_function_name
                for result in behavioral_results
                if (result.test_type == TestType.GENERATED_REGRESSION and not result.did_pass)
            ]

            if not behavioral_results:
                logger.warning(
                    f"Couldn't run any tests for original function {function_name}. SKIPPING OPTIMIZING THIS FUNCTION."
                )
                console.rule()
                success = False
            if total_timing == 0:
                logger.warning(
                    "The overall summed benchmark runtime of the original function is 0, couldn't run tests."
                )
                console.rule()
                success = False
            if not total_timing:
                logger.warning("Failed to run the tests for the original function, skipping optimization")
                console.rule()
                success = False
            if not success:
                return Failure("Failed to establish a baseline for the original code.")

            loop_count = max([int(result.loop_index) for result in benchmarking_results.test_results])
            logger.info(
                f"Original code summed runtime measured over {loop_count} loop{'s' if loop_count > 1 else ''}: "
                f"{humanize_runtime(total_timing)} per full loop"
            )
            console.rule()
            logger.debug(f"Total original code runtime (ns): {total_timing}")
            return Success(
                (
                    OriginalCodeBaseline(
                        behavioral_test_results=behavioral_results,
                        benchmarking_test_results=benchmarking_results,
                        runtime=total_timing,
                        coverage_results=coverage_results,
                    ),
                    functions_to_remove,
                )
            )

    def run_optimized_candidate(
        self, *, optimization_candidate_index: int, baseline_results: OriginalCodeBaseline
    ) -> Result[OptimizedCandidateResult, str]:
        assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]

        with progress_bar("Testing optimization candidate"):
            test_env = os.environ.copy()
            test_env["CODEFLASH_TEST_ITERATION"] = str(optimization_candidate_index)
            test_env["CODEFLASH_TRACER_DISABLE"] = "1"
            if "PYTHONPATH" not in test_env:
                test_env["PYTHONPATH"] = str(self.args.project_root)
            else:
                test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)

            get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
            get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)

            # Instrument codeflash capture
            candidate_fto_code = Path(function_to_optimize.file_path).read_text("utf-8")
            candidate_helper_code = {}
            for module_abspath in original_helper_code:
                candidate_helper_code[module_abspath] = Path(module_abspath).read_text("utf-8")
            try:
                instrument_codeflash_capture(
                    function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                )

                candidate_behavior_results, _ = self.run_and_parse_tests(
                    testing_type=TestingMode.BEHAVIOR,
                    test_env=test_env,
                    test_files=self.test_files,
                    optimization_iteration=optimization_candidate_index,
                    testing_time=TOTAL_LOOPING_TIME,
                    enable_coverage=False,
                )
            # Remove instrumentation
            finally:
                self.write_code_and_helpers(candidate_fto_code, candidate_helper_code, function_to_optimize.file_path)
            console.print(
                TestResults.report_to_tree(
                    candidate_behavior_results.get_test_pass_fail_report_by_type(),
                    title="Behavioral Test Results for candidate",
                )
            )
            console.rule()

            if compare_test_results(baseline_results.behavioral_test_results, candidate_behavior_results):
                logger.info("Test results matched!")
                console.rule()
            else:
                logger.info("Test results did not match the test results of the original code.")
                console.rule()
                return Failure("Test results did not match the test results of the original code.")

            if test_framework == "pytest":
                candidate_benchmarking_results, _ = self.run_and_parse_tests(
                    testing_type=TestingMode.PERFORMANCE,
                    test_env=test_env,
                    test_files=self.test_files,
                    optimization_iteration=optimization_candidate_index,
                    testing_time=TOTAL_LOOPING_TIME,
                    enable_coverage=False,
                )
                loop_count = (
                    max(all_loop_indices)
                    if (
                        all_loop_indices := {
                            result.loop_index for result in candidate_benchmarking_results.test_results
                        }
                    )
                    else 0
                )

            else:
                candidate_benchmarking_results = TestResults()
                start_time: float = time.time()
                loop_count = 0
                for i in range(100):
                    if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME * 1.5:
                        # * 1.5 to give unittest a bit more time to run
                        break
                    test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
                    unittest_loop_results, cov = self.run_and_parse_tests(
                        testing_type=TestingMode.PERFORMANCE,
                        test_env=test_env,
                        test_files=self.test_files,
                        optimization_iteration=optimization_candidate_index,
                        testing_time=TOTAL_LOOPING_TIME,
                        unittest_loop_index=i + 1,
                    )
                    loop_count = i + 1
                    candidate_benchmarking_results.merge(unittest_loop_results)

            if (total_candidate_timing := candidate_benchmarking_results.total_passed_runtime()) == 0:
                logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
                console.rule()

            logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
            return Success(
                OptimizedCandidateResult(
                    max_loop_count=loop_count,
                    best_test_runtime=total_candidate_timing,
                    behavior_test_results=candidate_behavior_results,
                    benchmarking_test_results=candidate_benchmarking_results,
                    optimization_candidate_index=optimization_candidate_index,
                    total_candidate_timing=total_candidate_timing,
                )
            )

    def run_and_parse_tests(
        self,
        testing_type: TestingMode,
        test_env: dict[str, str],
        test_files: TestFiles,
        optimization_iteration: int,
        testing_time: float = TOTAL_LOOPING_TIME,
        *,
        enable_coverage: bool = False,
        pytest_min_loops: int = 5,
        pytest_max_loops: int = 100_000,
        function_name: str | None = None,
        source_file: Path | None = None,
        code_context: CodeOptimizationContext | None = None,
        unittest_loop_index: int | None = None,
    ) -> tuple[TestResults, CoverageData | None]:
        coverage_database_file = None
        try:
            if testing_type == TestingMode.BEHAVIOR:
                result_file_path, run_result, coverage_database_file = run_behavioral_tests(
                    test_files,
                    test_framework=self.args.test_framework,
                    cwd=self.args.project_root,
                    test_env=test_env,
                    pytest_timeout=INDIVIDUAL_TESTCASE_TIMEOUT,
                    pytest_cmd=self.test_cfg.pytest_cmd,
                    verbose=True,
                    enable_coverage=enable_coverage,
                )
            elif testing_type == TestingMode.PERFORMANCE:
                result_file_path, run_result = run_benchmarking_tests(
                    test_files,
                    cwd=self.args.project_root,
                    test_env=test_env,
                    pytest_timeout=INDIVIDUAL_TESTCASE_TIMEOUT,
                    pytest_cmd=self.test_cfg.pytest_cmd,
                    pytest_target_runtime_seconds=testing_time,
                    pytest_min_loops=pytest_min_loops,
                    pytest_max_loops=pytest_max_loops,
                    test_framework=self.args.test_framework,
                )
            else:
                raise ValueError(f"Unexpected testing type: {testing_type}")
        except subprocess.TimeoutExpired:
            logger.exception(
                f'Error running tests in {", ".join(str(f) for f in test_files.test_files)}.\nTimeout Error'
            )
            return TestResults(), None
        if run_result.returncode != 0:
            logger.debug(
                f'Nonzero return code {run_result.returncode} when running tests in '
                f'{", ".join([str(f.instrumented_behavior_file_path) for f in test_files.test_files])}.\n'
                f"stdout: {run_result.stdout}\n"
                f"stderr: {run_result.stderr}\n"
            )

        results, coverage_results = parse_test_results(
            test_xml_path=result_file_path,
            test_files=test_files,
            test_config=self.test_cfg,
            optimization_iteration=optimization_iteration,
            run_result=run_result,
            unittest_loop_index=unittest_loop_index,
            function_name=function_name,
            source_file=source_file,
            code_context=code_context,
            coverage_database_file=coverage_database_file,
        )
        return results, coverage_results

    def generate_and_instrument_tests(
        self,
        executor: concurrent.futures.ThreadPoolExecutor,
        source_code_being_tested: str,
        function_to_optimize: FunctionToOptimize,
        helper_function_names: list[str],
        module_path: Path,
        generated_test_paths: list[Path],
        generated_perf_test_paths: list[Path],
        function_trace_id: str,
    ) -> list[concurrent.futures.Future]:
        return [
            executor.submit(
                generate_tests,
                self.aiservice_client,
                source_code_being_tested,
                function_to_optimize,
                helper_function_names,
                module_path,
                self.test_cfg,
                INDIVIDUAL_TESTCASE_TIMEOUT,
                function_trace_id,
                test_index,
                test_path,
                test_perf_path,
            )
            for test_index, (test_path, test_perf_path) in enumerate(
                zip(generated_test_paths, generated_perf_test_paths)
            )
        ]


def run_with_args(args: Namespace) -> None:
    optimizer = Optimizer(args)
    optimizer.run()