Merge pull request #634 from codeflash-ai/fix-methods-as-dependents

Fix extracting helper methods
2024-05-20 15:02:00 -07:00 · 2024-05-20 15:02:00 -07:00 · b05db90e43
commit b05db90e43
parent 19c187f6e4 168a64f8a6
11 changed files with 268 additions and 159 deletions
--- a/cli/codeflash/api/aiservice.py
+++ b/cli/codeflash/api/aiservice.py
@ -156,7 +156,7 @@ class AiServiceClient:
        self,
        source_code_being_tested: str,
        function_to_optimize: FunctionToOptimize,
-        dependent_function_names: list[str],
+        helper_function_names: list[str],
        module_path: str,
        test_module_path: str,
        test_framework: str,
@ -169,7 +169,7 @@ class AiServiceClient:
        ----------
        - source_code_being_tested (str): The source code of the function being tested.
        - function_to_optimize (FunctionToOptimize): The function to optimize.
-        - dependent_function_names (list[Source]): List of dependent function names.
+        - helper_function_names (list[Source]): List of helper function names.
        - module_path (str): The module path where the function is located.
        - test_module_path (str): The module path for the test code.
        - test_framework (str): The test framework to use, e.g., "pytest".
@ -187,7 +187,7 @@ class AiServiceClient:
        payload = {
            "source_code_being_tested": source_code_being_tested,
            "function_to_optimize": function_to_optimize,
-            "dependent_function_names": dependent_function_names,
+            "helper_function_names": helper_function_names,
            "module_path": module_path,
            "test_module_path": test_module_path,
            "test_framework": test_framework,
--- a/cli/codeflash/optimization/function_context.py
+++ b/cli/codeflash/optimization/function_context.py
@ -193,9 +193,7 @@ def get_function_variables_definitions(

    for name in names:
        try:
-            definitions: list[Name] = script.goto(
-                line=name.line,
-                column=name.column,
+            definitions: list[Name] = name.goto(
                follow_imports=True,
                follow_builtin_imports=False,
            )
@ -208,21 +206,22 @@ def get_function_variables_definitions(
            definitions = []
        if definitions:
            # TODO: there can be multiple definitions, see how to handle such cases
-            definition_path = str(definitions[0].module_path)
+            definition = definitions[0]
+            definition_path = str(definition.module_path)
            # The definition is part of this project and not defined within the original function
            if (
                definition_path.startswith(project_root_path + os.sep)
                and not path_belongs_to_site_packages(definition_path)
-                and definitions[0].full_name
-                and not belongs_to_function(definitions[0], function_name)
+                and definition.full_name
+                and not belongs_to_function(definition, function_name)
            ):
                source_code = get_code_no_skeleton(definition_path, definitions[0].name)
                if source_code:
                    sources.append(
                        (
-                            Source(name.full_name, definitions[0], source_code),
+                            Source(definition.full_name, definition, source_code),
                            definition_path,
-                            name.full_name.removeprefix(name.module_name + "."),
+                            definition.full_name.removeprefix(name.module_name + "."),
                        ),
                    )
    annotation_sources = get_type_annotation_context(
@ -243,7 +242,7 @@ def get_function_variables_definitions(
 MAX_PROMPT_TOKENS = 4096  # 128000  # gpt-4-128k


-def get_constrained_function_context_and_dependent_functions(
+def get_constrained_function_context_and_helper_functions(
    function_to_optimize: FunctionToOptimize,
    project_root_path: str,
    code_to_optimize: str,
@ -251,7 +250,7 @@ def get_constrained_function_context_and_dependent_functions(
 ) -> tuple[str, list[tuple[Source, str, str]]]:
    # TODO: Not just do static analysis, but also find the datatypes of function arguments by running the existing
    #  unittests and inspecting the arguments to resolve the real definitions and dependencies.
-    dependent_functions: list[tuple[Source, str, str]] = get_function_variables_definitions(
+    helper_functions: list[tuple[Source, str, str]] = get_function_variables_definitions(
        function_to_optimize,
        project_root_path,
    )
@ -259,25 +258,25 @@ def get_constrained_function_context_and_dependent_functions(
    code_to_optimize_tokens = tokenizer.encode(code_to_optimize)

    if not function_to_optimize.parents:
-        dependent_functions_sources = [function[0].source_code for function in dependent_functions]
+        helper_functions_sources = [function[0].source_code for function in helper_functions]
    else:
-        dependent_functions_sources = [
+        helper_functions_sources = [
            function[0].source_code
-            for function in dependent_functions
+            for function in helper_functions
            if not function[2].count(".") or function[2].split(".")[0] != function_to_optimize.parents[0].name
        ]
-    dependent_functions_tokens = [len(tokenizer.encode(function)) for function in dependent_functions_sources]
+    helper_functions_tokens = [len(tokenizer.encode(function)) for function in helper_functions_sources]

    context_list = []
    context_len = len(code_to_optimize_tokens)
    logging.debug(f"ORIGINAL CODE TOKENS LENGTH: {context_len}")
-    logging.debug(f"ALL DEPENDENCIES TOKENS LENGTH: {sum(dependent_functions_tokens)}")
-    for function_source, source_len in zip(dependent_functions_sources, dependent_functions_tokens):
+    logging.debug(f"ALL DEPENDENCIES TOKENS LENGTH: {sum(helper_functions_tokens)}")
+    for function_source, source_len in zip(helper_functions_sources, helper_functions_tokens):
        if context_len + source_len <= max_tokens:
            context_list.append(function_source)
            context_len += source_len
        else:
            break
    logging.debug("FINAL OPTIMIZATION CONTEXT TOKENS LENGTH:", context_len)
-    dependent_code: str = "\n".join(context_list)
-    return dependent_code, dependent_functions
+    helper_code: str = "\n".join(context_list)
+    return helper_code, helper_functions
--- a/cli/codeflash/optimization/optimizer.py
+++ b/cli/codeflash/optimization/optimizer.py
@ -54,7 +54,7 @@ from codeflash.discovery.functions_to_optimize import (
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.optimization.function_context import (
    Source,
-    get_constrained_function_context_and_dependent_functions,
+    get_constrained_function_context_and_helper_functions,
 )
 from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
 from codeflash.result.explanation import Explanation
@ -71,7 +71,7 @@ from codeflash.verification.verifier import generate_tests

 class OptimizationSet(BaseModel):
    control: list[OptimizedCandidate]
-    experiment: list[OptimizedCandidate] | None
+    experiment: Optional[list[OptimizedCandidate]]


 class OptimizedCandidateResult(BaseModel):
@ -83,7 +83,7 @@ class OptimizedCandidateResult(BaseModel):
 class OriginalCodeBaseline(BaseModel):
    generated_test_results: TestResults
    existing_test_results: TestResults
-    overall_test_results: TestResults | None
+    overall_test_results: Optional[TestResults]
    runtime: int


@ -94,15 +94,15 @@ class GeneratedTests(BaseModel):

 class BestOptimization(BaseModel):
    candidate: OptimizedCandidate
-    dependent_functions: list[tuple[Source, str, str]]
+    helper_functions: list[tuple[Source, str, str]]
    runtime: int
    winning_test_results: TestResults


 class CodeOptimizationContext(BaseModel):
-    code_to_optimize_with_dependents: str
+    code_to_optimize_with_helpers: str
    contextual_dunder_methods: set[tuple[str, str]]
-    dependent_functions: list[tuple[Source, str, str]]
+    helper_functions: list[tuple[Source, str, str]]
    preexisting_functions: list[str]


@ -232,21 +232,21 @@ class Optimizer:
        if not is_successful(ctx_result):
            return Failure(ctx_result.failure())
        code_context: CodeOptimizationContext = ctx_result.unwrap()
-        dependent_functions_by_module_abspath = defaultdict(set)
-        for _, module_abspath, qualified_name in code_context.dependent_functions:
-            dependent_functions_by_module_abspath[module_abspath].add(qualified_name)
-        original_dependent_code = {}
-        for module_abspath in dependent_functions_by_module_abspath:
+        helper_functions_by_module_abspath = defaultdict(set)
+        for _, module_abspath, qualified_name in code_context.helper_functions:
+            helper_functions_by_module_abspath[module_abspath].add(qualified_name)
+        original_helper_code = {}
+        for module_abspath in helper_functions_by_module_abspath:
            with pathlib.Path(module_abspath).open(encoding="utf8") as f:
-                dependent_code = f.read()
-                original_dependent_code[module_abspath] = dependent_code
-        logging.info(f"Code to be optimized:\n{code_context.code_to_optimize_with_dependents}")
+                helper_code = f.read()
+                original_helper_code[module_abspath] = helper_code
+        logging.info(f"Code to be optimized:\n{code_context.code_to_optimize_with_helpers}")
        module_path = module_name_from_file_path(function_to_optimize.file_path, self.args.project_root)

-        for module_abspath in original_dependent_code:
-            code_context.code_to_optimize_with_dependents = add_needed_imports_from_module(
-                original_dependent_code[module_abspath],
-                code_context.code_to_optimize_with_dependents,
+        for module_abspath in original_helper_code:
+            code_context.code_to_optimize_with_helpers = add_needed_imports_from_module(
+                original_helper_code[module_abspath],
+                code_context.code_to_optimize_with_helpers,
                module_abspath,
                function_to_optimize.file_path,
                self.args.project_root,
@ -259,9 +259,9 @@ class Optimizer:
        self.instrumented_unittests_created.update(instrumented_unittests_created_for_function)

        generated_results = self.generate_tests_and_optimizations(
-            code_context.code_to_optimize_with_dependents,
+            code_context.code_to_optimize_with_helpers,
            function_to_optimize,
-            code_context.dependent_functions,
+            code_context.helper_functions,
            module_path,
            function_trace_id,
            run_experiment=should_run_experiment,
@ -308,13 +308,13 @@ class Optimizer:
            best_optimization = self.determine_best_candidate(
                candidates,
                code_context,
-                dependent_functions_by_module_abspath,
+                helper_functions_by_module_abspath,
                function_to_optimize,
                generated_tests_path,
                instrumented_unittests_created_for_function,
                original_code,
                original_code_baseline,
-                original_dependent_code,
+                original_helper_code,
                function_trace_id[:-4] + f"EXP{u}" if should_run_experiment else function_trace_id,
                tests_in_file,
            )
@ -341,16 +341,16 @@ class Optimizer:
                    generated_tests,
                )

-                self.replace_function_and_dependents_with_optimized_code(
+                self.replace_function_and_helpers_with_optimized_code(
                    code_context,
-                    dependent_functions_by_module_abspath,
+                    helper_functions_by_module_abspath,
                    explanation,
                    best_optimization.candidate.source_code,
                    function_to_optimize.qualified_name,
                )

-                new_code, new_dependent_code = self.reformat_code_and_dependents(
-                    dependent_functions_by_module_abspath,
+                new_code, new_helper_code = self.reformat_code_and_helpers(
+                    helper_functions_by_module_abspath,
                    explanation.file_path,
                    original_code,
                )
@ -361,9 +361,9 @@ class Optimizer:
                    tests_root=self.test_cfg.tests_root,
                )

-                original_code_combined = original_dependent_code.copy()
+                original_code_combined = original_helper_code.copy()
                original_code_combined[explanation.file_path] = original_code
-                new_code_combined = new_dependent_code.copy()
+                new_code_combined = new_helper_code.copy()
                new_code_combined[explanation.file_path] = new_code
                if not self.args.no_pr:
                    check_create_pr(
@ -378,11 +378,11 @@ class Optimizer:
                        #  a) Error propagation, where error in one function can cause the next optimization to fail
                        #  b) Performance estimates become unstable, as the runtime of an optimization might be
                        #     dependent on the runtime of the previous optimization
-                        self.write_code_and_dependents(
+                        self.write_code_and_helpers(
                            original_code,
-                            original_dependent_code,
+                            original_helper_code,
                            function_to_optimize.file_path,
-                            dependent_functions_by_module_abspath,
+                            helper_functions_by_module_abspath,
                        )
        # Delete all the generated tests to not cause any clutter.
        pathlib.Path(generated_tests_path).unlink(missing_ok=True)
@ -396,13 +396,13 @@ class Optimizer:
        self,
        candidates: list[OptimizedCandidate],
        code_context: CodeOptimizationContext,
-        dependent_functions_by_module_abspath: dict[str, set[str]],
+        helper_functions_by_module_abspath: dict[str, set[str]],
        function_to_optimize: FunctionToOptimize,
        generated_tests_path: str,
        instrumented_unittests_created_for_function: set[str],
        original_code: str,
        original_code_baseline: OriginalCodeBaseline,
-        original_dependent_code: dict[str, str],
+        original_helper_code: dict[str, str],
        function_trace_id: str,
        only_run_this_test_function: list[TestsInFile] | None = None,
    ) -> BestOptimization | None:
@ -442,7 +442,7 @@ class Optimizer:
                for (
                    module_abspath,
                    qualified_names,
-                ) in dependent_functions_by_module_abspath.items():
+                ) in helper_functions_by_module_abspath.items():
                    replace_function_definitions_in_module(
                        function_names=list(qualified_names),
                        optimized_code=candidate.source_code,
@ -459,11 +459,11 @@ class Optimizer:
                AttributeError,
            ) as e:
                logging.error(e)  # noqa: TRY400
-                self.write_code_and_dependents(
+                self.write_code_and_helpers(
                    original_code,
-                    original_dependent_code,
+                    original_helper_code,
                    function_to_optimize.file_path,
-                    dependent_functions_by_module_abspath,
+                    helper_functions_by_module_abspath,
                )
                continue

@ -519,16 +519,16 @@ class Optimizer:
                    )
                    best_optimization = BestOptimization(
                        candidate=candidate,
-                        dependent_functions=code_context.dependent_functions,
+                        helper_functions=code_context.helper_functions,
                        runtime=best_test_runtime,
                        winning_test_results=candidate_result.best_test_results,
                    )
                    best_runtime_until_now = best_test_runtime
-            self.write_code_and_dependents(
+            self.write_code_and_helpers(
                original_code,
-                original_dependent_code,
+                original_helper_code,
                function_to_optimize.file_path,
-                dependent_functions_by_module_abspath,
+                helper_functions_by_module_abspath,
            )
            logging.info("----------------")
        self.aiservice_client.log_results(
@ -571,22 +571,22 @@ class Optimizer:
            },
        )

-    def write_code_and_dependents(
+    def write_code_and_helpers(
        self,
        original_code: str,
-        original_dependent_code: dict[str, str],
+        original_helper_code: dict[str, str],
        path: str,
-        dependent_functions_by_module_abspath: dict[str, set[str]],
+        helper_functions_by_module_abspath: dict[str, set[str]],
    ) -> None:
        with pathlib.Path(path).open("w", encoding="utf8") as f:
            f.write(original_code)
-        for module_abspath in dependent_functions_by_module_abspath:
+        for module_abspath in helper_functions_by_module_abspath:
            with pathlib.Path(module_abspath).open("w", encoding="utf8") as f:
-                f.write(original_dependent_code[module_abspath])
+                f.write(original_helper_code[module_abspath])

-    def reformat_code_and_dependents(
+    def reformat_code_and_helpers(
        self,
-        dependent_functions_by_module_abspath: dict[str, set[str]],
+        helper_functions_by_module_abspath: dict[str, set[str]],
        path: str,
        original_code: str,
    ) -> tuple[str, dict[str, str]]:
@ -600,21 +600,21 @@ class Optimizer:
            should_sort_imports,
            path,
        )
-        new_dependent_code: dict[str, str] = {
+        new_helper_code: dict[str, str] = {
            module_abspath: format_code(
                self.args.formatter_cmd,
                self.args.imports_sort_cmd,
                should_sort_imports,
                module_abspath,
            )
-            for module_abspath in dependent_functions_by_module_abspath
+            for module_abspath in helper_functions_by_module_abspath
        }
-        return new_code, new_dependent_code
+        return new_code, new_helper_code

-    def replace_function_and_dependents_with_optimized_code(
+    def replace_function_and_helpers_with_optimized_code(
        self,
        code_context: CodeOptimizationContext,
-        dependent_functions_by_module_abspath: dict[str, set[str]],
+        helper_functions_by_module_abspath: dict[str, set[str]],
        explanation: Explanation,
        optimized_code: str,
        qualified_function_name: str,
@ -631,7 +631,7 @@ class Optimizer:
        for (
            module_abspath,
            qualified_names,
-        ) in dependent_functions_by_module_abspath.items():
+        ) in helper_functions_by_module_abspath.items():
            replace_function_definitions_in_module(
                function_names=list(qualified_names),
                optimized_code=optimized_code,
@ -657,29 +657,29 @@ class Optimizer:
        if not success:
            return Failure("Error in parsing the code, skipping optimization.")
        (
-            dependent_code,
-            dependent_functions,
-        ) = get_constrained_function_context_and_dependent_functions(
+            helper_code,
+            helper_functions,
+        ) = get_constrained_function_context_and_helper_functions(
            function_to_optimize,
            self.args.project_root,
            code_to_optimize,
        )
        if function_to_optimize.parents:
            function_class = function_to_optimize.parents[0].name
-            dependent_methods = [
+            helper_methods = [
                df
-                for df in dependent_functions
+                for df in helper_functions
                if df[2].count(".") > 0 and df[2].split(".")[0] == function_class
            ]
            optimizable_methods = [function_to_optimize] + [
                FunctionToOptimize(
-                    df[2].split(".")[0],
+                    df[2].split(".")[-1],
                    "",
                    [FunctionParent(df[2].split(".")[0], "ClassDef")],
                    None,
                    None,
                )
-                for df in dependent_methods
+                for df in helper_methods
            ]
            if len(optimizable_methods) > 1:
                code_to_optimize, contextual_dunder_methods = extract_code(
@ -687,23 +687,23 @@ class Optimizer:
                )
                if code_to_optimize is None:
                    return Failure("Could not find function to optimize.")
-        code_to_optimize_with_dependents = dependent_code + "\n" + code_to_optimize
+        code_to_optimize_with_helpers = helper_code + "\n" + code_to_optimize

-        code_to_optimize_with_dependents_and_imports = add_needed_imports_from_module(
+        code_to_optimize_with_helpers_and_imports = add_needed_imports_from_module(
            original_source_code,
-            code_to_optimize_with_dependents,
+            code_to_optimize_with_helpers,
            function_to_optimize.file_path,
            function_to_optimize.file_path,
            project_root,
        )
        preexisting_functions.extend(
-            [fn[0].full_name.split(".")[-1] for fn in dependent_functions],
+            [fn[0].full_name.split(".")[-1] for fn in helper_functions],
        )
        return Success(
            CodeOptimizationContext(
-                code_to_optimize_with_dependents=code_to_optimize_with_dependents_and_imports,
+                code_to_optimize_with_helpers=code_to_optimize_with_helpers_and_imports,
                contextual_dunder_methods=contextual_dunder_methods,
-                dependent_functions=dependent_functions,
+                helper_functions=helper_functions,
                preexisting_functions=preexisting_functions,
            ),
        )
@ -758,9 +758,9 @@ class Optimizer:

    def generate_tests_and_optimizations(
        self,
-        code_to_optimize_with_dependents: str,
+        code_to_optimize_with_helpers: str,
        function_to_optimize: FunctionToOptimize,
-        dependent_functions: list[tuple[Source, str, str]],
+        helper_functions: list[tuple[Source, str, str]],
        module_path: str,
        function_trace_id: str,
        run_experiment: bool = False,
@ -771,15 +771,15 @@ class Optimizer:
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_tests = executor.submit(
                self.generate_and_instrument_tests,
-                code_to_optimize_with_dependents,
+                code_to_optimize_with_helpers,
                function_to_optimize,
-                [definition[0].full_name for definition in dependent_functions],
+                [definition[0].full_name for definition in helper_functions],
                module_path,
                function_trace_id[:-4] + "EXP0" if run_experiment else function_trace_id,
            )
            future_optimization_candidates = executor.submit(
                self.aiservice_client.optimize_python_code,
-                code_to_optimize_with_dependents,
+                code_to_optimize_with_helpers,
                function_trace_id[:-4] + "EXP0" if run_experiment else function_trace_id,
                N_CANDIDATES,
                ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
@ -787,7 +787,7 @@ class Optimizer:
            if run_experiment:
                future_candidates_exp = executor.submit(
                    self.local_aiservice_client.optimize_python_code,
-                    code_to_optimize_with_dependents,
+                    code_to_optimize_with_helpers,
                    function_trace_id[:-4] + "EXP1",
                    N_CANDIDATES,
                    ExperimentMetadata(id=self.experiment_id, group="experiment"),
@ -1181,7 +1181,7 @@ class Optimizer:
        self,
        source_code_being_tested: str,
        function_to_optimize: FunctionToOptimize,
-        dependent_function_names: list[str],
+        helper_function_names: list[str],
        module_path: str,
        function_trace_id: str,
    ) -> Union[Tuple[str, str], None]:
@ -1189,7 +1189,7 @@ class Optimizer:
            self.aiservice_client,
            source_code_being_tested=source_code_being_tested,
            function_to_optimize=function_to_optimize,
-            dependent_function_names=dependent_function_names,
+            helper_function_names=helper_function_names,
            module_path=module_path,
            test_cfg=self.test_cfg,
            test_timeout=INDIVIDUAL_TEST_TIMEOUT,
--- a/cli/codeflash/verification/verifier.py
+++ b/cli/codeflash/verification/verifier.py
@ -17,7 +17,7 @@ def generate_tests(
    aiservice_client: AiServiceClient,
    source_code_being_tested: str,
    function_to_optimize: FunctionToOptimize,
-    dependent_function_names: list[str],
+    helper_function_names: list[str],
    module_path: str,
    test_cfg: TestConfig,
    test_timeout: int,
@ -47,7 +47,7 @@ def generate_tests(
        response = aiservice_client.generate_regression_tests(
            source_code_being_tested=source_code_being_tested,
            function_to_optimize=function_to_optimize,
-            dependent_function_names=dependent_function_names,
+            helper_function_names=helper_function_names,
            module_path=module_path,
            test_module_path=test_module_path,
            test_framework=test_cfg.test_framework,
--- a/cli/tests/test_code_replacement.py
+++ b/cli/tests/test_code_replacement.py
@ -307,7 +307,7 @@ def blob(st):
 """
    original_code_main = """import libcst as cst
 from typing import Mandatory
-from dependent import blob
+from helper import blob

 print("Au revoir")

@ -320,7 +320,7 @@ def other_function(st):
 print("Salut monde")
 """

-    original_code_dependent = """import numpy as np
+    original_code_helper = """import numpy as np

 print("Cool")

@ -336,7 +336,7 @@ print("Not cool")
 from typing import Optional
 import libcst as cst
 from typing import Mandatory
-from dependent import blob
+from helper import blob

 print("Au revoir")

@ -349,7 +349,7 @@ def other_function(st):
 print("Salut monde")
 """

-    expected_dependent = """import libcst as cst
+    expected_helper = """import libcst as cst
 from typing import Optional
 import numpy as np

@ -372,14 +372,14 @@ print("Not cool")
    )
    assert new_main_code == expected_main

-    new_dependent_code: str = replace_functions_in_file(
-        original_code_dependent,
+    new_helper_code: str = replace_functions_in_file(
+        original_code_helper,
        ["blob"],
        optim_code,
        [],
        set(),
    )
-    assert new_dependent_code == expected_dependent
+    assert new_helper_code == expected_helper


 def test_test_libcst_code_replacement7() -> None:
--- a/cli/tests/test_function_dependencies.py
+++ b/cli/tests/test_function_dependencies.py
@ -1,8 +1,12 @@
 import pathlib
+from argparse import Namespace
 from dataclasses import dataclass

+import pytest
 from codeflash.discovery.functions_to_optimize import FunctionParent, FunctionToOptimize
 from codeflash.optimization.function_context import get_function_variables_definitions
+from codeflash.optimization.optimizer import Optimizer
+from returns.pipeline import is_successful


 def calculate_something(data):
@ -15,15 +19,12 @@ def simple_function_with_one_dep(data):

 def test_simple_dependencies():
    file_path = pathlib.Path(__file__).resolve()
-    dependent_functions = get_function_variables_definitions(
+    helper_functions = get_function_variables_definitions(
        FunctionToOptimize("simple_function_with_one_dep", str(file_path), []),
        str(file_path.parent.resolve()),
    )
-    assert len(dependent_functions) == 1
-    assert (
-        dependent_functions[0][0].definition.full_name
-        == "test_function_dependencies.calculate_something"
-    )
+    assert len(helper_functions) == 1
+    assert helper_functions[0][0].definition.full_name == "test_function_dependencies.calculate_something"


 def global_dependency_1(num):
@ -74,15 +75,15 @@ class C:
 def test_multiple_classes_dependencies():
    # TODO: Check if C.run only gets calculate_something_3 as dependency and likewise for other classes
    file_path = pathlib.Path(__file__).resolve()
-    dependent_functions = get_function_variables_definitions(
+    helper_functions = get_function_variables_definitions(
        FunctionToOptimize("run", str(file_path), [FunctionParent("C", "ClassDef")]),
        str(file_path.parent.resolve()),
    )

-    # assert len(dependent_functions) == 2
-    assert list(map(lambda x: x[0].full_name, dependent_functions)) == [
-        "test_function_dependencies.C.run.calculate_something_3",
-        "test_function_dependencies.C.run.global_dependency_3",
+    # assert len(helper_functions) == 2
+    assert list(map(lambda x: x[0].full_name, helper_functions)) == [
+        "test_function_dependencies.C.calculate_something_3",
+        "test_function_dependencies.global_dependency_3",
    ]


@ -95,15 +96,12 @@ def recursive_dependency_1(num):

 def test_recursive_dependency():
    file_path = pathlib.Path(__file__).resolve()
-    dependent_functions = get_function_variables_definitions(
+    helper_functions = get_function_variables_definitions(
        FunctionToOptimize("recursive_dependency_1", str(file_path), []),
        str(file_path.parent.resolve()),
    )
-    assert len(dependent_functions) == 1
-    assert (
-        dependent_functions[0][0].definition.full_name
-        == "test_function_dependencies.calculate_something"
-    )
+    assert len(helper_functions) == 1
+    assert helper_functions[0][0].definition.full_name == "test_function_dependencies.calculate_something"


@dataclass
@ -121,13 +119,110 @@ def simple_function_with_one_dep_ann(data: MyData):

 def test_simple_dependencies_ann():
    file_path = pathlib.Path(__file__).resolve()
-    dependent_functions = get_function_variables_definitions(
+    helper_functions = get_function_variables_definitions(
        FunctionToOptimize("simple_function_with_one_dep_ann", str(file_path), []),
        str(file_path.parent.resolve()),
    )
-    assert len(dependent_functions) == 2
-    assert dependent_functions[0][0].definition.full_name == "test_function_dependencies.MyData"
-    assert (
-        dependent_functions[1][0].definition.full_name
-        == "test_function_dependencies.calculate_something_ann"
+    assert len(helper_functions) == 2
+    assert helper_functions[0][0].definition.full_name == "test_function_dependencies.MyData"
+    assert helper_functions[1][0].definition.full_name == "test_function_dependencies.calculate_something_ann"
+
+
+from collections import defaultdict
+
+
+class Graph:
+    def __init__(self, vertices):
+        self.graph = defaultdict(list)
+        self.V = vertices  # No. of vertices
+
+    def addEdge(self, u, v):
+        self.graph[u].append(v)
+
+    def topologicalSortUtil(self, v, visited, stack):
+        visited[v] = True
+
+        for i in self.graph[v]:
+            if visited[i] == False:
+                self.topologicalSortUtil(i, visited, stack)
+
+        stack.insert(0, v)
+
+    def topologicalSort(self):
+        visited = [False] * self.V
+        stack = []
+
+        for i in range(self.V):
+            if visited[i] == False:
+                self.topologicalSortUtil(i, visited, stack)
+
+        # Print contents of stack
+        return stack
+
+
+def test_class_method_dependencies():
+    file_path = pathlib.Path(__file__).resolve()
+    opt = Optimizer(
+        Namespace(
+            project_root=str(file_path.parent.resolve()),
+            disable_telemetry=True,
+            tests_root="tests",
+            test_framework="pytest",
+            pytest_cmd="pytest",
+            experiment_id=None,
+        ),
+    )
+    function_to_optimize = FunctionToOptimize(
+        function_name="topologicalSort",
+        file_path=str(file_path),
+        parents=[FunctionParent(name="Graph", type="ClassDef")],
+        starting_line=None,
+        ending_line=None,
+    )
+    with open(file_path) as f:
+        original_code = f.read()
+    ctx_result = opt.get_code_optimization_context(
+        function_to_optimize,
+        opt.args.project_root,
+        original_code,
+    )
+    if not is_successful(ctx_result):
+        pytest.fail()
+    code_context = ctx_result.unwrap()
+    # The code_context above should have the topologicalSortUtil function in it
+    assert len(code_context.helper_functions) == 1
+    assert (
+        code_context.helper_functions[0][0].definition.full_name
+        == "test_function_dependencies.Graph.topologicalSortUtil"
+    )
+    assert code_context.helper_functions[0][0].definition.name == "topologicalSortUtil"
+    assert code_context.helper_functions[0][2] == "Graph.topologicalSortUtil"
+    assert code_context.contextual_dunder_methods == {("Graph", "__init__")}
+    assert (
+        code_context.code_to_optimize_with_helpers
+        == """from collections import defaultdict
+
+class Graph:
+    def __init__(self, vertices):
+        self.graph = defaultdict(list)
+        self.V = vertices  # No. of vertices
+    def topologicalSort(self):
+        visited = [False] * self.V
+        stack = []
+
+        for i in range(self.V):
+            if visited[i] == False:
+                self.topologicalSortUtil(i, visited, stack)
+
+        # Print contents of stack
+        return stack
+    def topologicalSortUtil(self, v, visited, stack):
+        visited[v] = True
+
+        for i in self.graph[v]:
+            if visited[i] == False:
+                self.topologicalSortUtil(i, visited, stack)
+
+        stack.insert(0, v)
+"""
    )
--- a/cli/tests/test_type_annotation_context.py
+++ b/cli/tests/test_type_annotation_context.py
@ -7,7 +7,7 @@ from typing import List
 from codeflash.code_utils.code_extractor import get_code
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize
 from codeflash.optimization.function_context import (
-    get_constrained_function_context_and_dependent_functions,
+    get_constrained_function_context_and_helper_functions,
 )


@ -43,7 +43,7 @@ def function_to_optimize3(data: dict[CustomDataClass, list[CustomDataClass]]) ->

 def test_function_context_includes_type_annotation() -> None:
    file_path = pathlib.Path(__file__).resolve()
-    a, dependent_functions = get_constrained_function_context_and_dependent_functions(
+    a, helper_functions = get_constrained_function_context_and_helper_functions(
        FunctionToOptimize("function_to_optimize", str(file_path), []),
        str(file_path.parent.resolve()),
        """def function_to_optimize(data: CustomType):
@ -53,13 +53,13 @@ def test_function_context_includes_type_annotation() -> None:
        1000,
    )

-    assert len(dependent_functions) == 1
-    assert dependent_functions[0][0].full_name == "CustomType"
+    assert len(helper_functions) == 1
+    assert helper_functions[0][0].full_name == "CustomType"


 def test_function_context_includes_type_annotation_dataclass() -> None:
    file_path = pathlib.Path(__file__).resolve()
-    a, dependent_functions = get_constrained_function_context_and_dependent_functions(
+    a, helper_functions = get_constrained_function_context_and_helper_functions(
        FunctionToOptimize("function_to_optimize2", str(file_path), []),
        str(file_path.parent.resolve()),
        """def function_to_optimize2(data: CustomDataClass) -> CustomType:
@ -69,14 +69,14 @@ def test_function_context_includes_type_annotation_dataclass() -> None:
        1000,
    )

-    assert len(dependent_functions) == 2
-    assert dependent_functions[0][0].full_name == "CustomDataClass"
-    assert dependent_functions[1][0].full_name == "CustomType"
+    assert len(helper_functions) == 2
+    assert helper_functions[0][0].full_name == "CustomDataClass"
+    assert helper_functions[1][0].full_name == "CustomType"


 def test_function_context_works_for_composite_types() -> None:
    file_path = pathlib.Path(__file__).resolve()
-    a, dependent_functions = get_constrained_function_context_and_dependent_functions(
+    a, helper_functions = get_constrained_function_context_and_helper_functions(
        FunctionToOptimize("function_to_optimize3", str(file_path), []),
        str(file_path.parent.resolve()),
        """def function_to_optimize3(data: set[CustomDataClass[CustomDataClass, int]]) -> list[CustomType]:
@ -86,9 +86,9 @@ def test_function_context_works_for_composite_types() -> None:
        1000,
    )

-    assert len(dependent_functions) == 2
-    assert dependent_functions[0][0].full_name == "CustomDataClass"
-    assert dependent_functions[1][0].full_name == "CustomType"
+    assert len(helper_functions) == 2
+    assert helper_functions[0][0].full_name == "CustomDataClass"
+    assert helper_functions[1][0].full_name == "CustomType"


 def test_function_context_custom_datatype() -> None:
@ -99,12 +99,12 @@ def test_function_context_custom_datatype() -> None:
    )
    assert code is not None
    assert contextual_dunder_methods == set()
-    a, dependent_functions = get_constrained_function_context_and_dependent_functions(
+    a, helper_functions = get_constrained_function_context_and_helper_functions(
        FunctionToOptimize("cosine_similarity", str(file_path), []),
        str(project_path),
        code,
        1000,
    )

-    assert len(dependent_functions) == 1
-    assert dependent_functions[0][0].full_name == "Matrix"
+    assert len(helper_functions) == 1
+    assert helper_functions[0][0].full_name == "Matrix"
--- a/django/aiservice/testgen/instrumentation/instrument_new_tests.py
+++ b/django/aiservice/testgen/instrumentation/instrument_new_tests.py
@ -19,7 +19,7 @@ class InjectPerfAndLogging(ast.NodeTransformer):
    def __init__(
        self,
        function: FunctionToOptimize,
-        dependent_function_names: list[str],
+        helper_function_names: list[str],
        test_module_path: str,
        test_framework: str = "pytest",
        test_timeout: int = 15,
@ -33,9 +33,7 @@ class InjectPerfAndLogging(ast.NodeTransformer):
        self.random_test = False
        if len(function.parents) == 1 and function.parents[0].type == "ClassDef":
            self.class_name = function.top_level_parent_name
-        self.dependent_function_names = (
-            dependent_function_names  # Other functional dependencies that were injected
-        )
+        self.helper_function_names = helper_function_names  # Other functional dependencies that were injected

    def visit_ImportFrom(self, node: ast.ImportFrom):
        if any([name.name in [self.only_function_name, self.class_name] for name in node.names]):
@ -194,7 +192,7 @@ class InjectPerfAndLogging(ast.NodeTransformer):
        node: ast.FunctionDef,
        class_name: Optional[str] = None,
    ) -> Optional[ast.FunctionDef]:
-        if node.name == self.only_function_name or node.name in self.dependent_function_names:
+        if node.name == self.only_function_name or node.name in self.helper_function_names:
            return None  # Remove the re-definition of the function and its dependencies from the test generation code
        elif node.name.startswith("test_"):
            i: int = len(node.body) - 1
--- a/django/aiservice/testgen/instrumentation/instrument_test_source.py
+++ b/django/aiservice/testgen/instrumentation/instrument_test_source.py
@ -1,6 +1,7 @@
 import ast

 from aiservice.models.functions_to_optimize import FunctionToOptimize
+
 from testgen.instrumentation.instrument_new_tests import (
    InjectPerfAndLogging,
    create_wrapper_function,
@ -11,7 +12,7 @@ from testgen.instrumentation.instrument_new_tests import (
 def instrument_test_source(
    test_source: str,
    function_to_optimize: FunctionToOptimize,
-    dependent_function_names: list[str],
+    helper_function_names: list[str],
    module_path: str,
    test_module_path: str,
    test_framework: str,
@ -20,7 +21,7 @@ def instrument_test_source(
    module_node = ast.parse(test_source)
    new_module_node = InjectPerfAndLogging(
        function_to_optimize,
-        dependent_function_names=dependent_function_names,
+        helper_function_names=helper_function_names,
        test_module_path=test_module_path,
        test_framework=test_framework,
        test_timeout=test_timeout,
--- a/django/aiservice/testgen/instrumentation/tests/test_instrument_test_source.py
+++ b/django/aiservice/testgen/instrumentation/tests/test_instrument_test_source.py
@ -75,7 +75,7 @@ def test_large_list():
    result = instrument_test_source(
        test_source=test_source,
        function_to_optimize=function_to_optimize,
-        dependent_function_names=[],
+        helper_function_names=[],
        module_path="code_to_optimize.bubble_sort",
        test_module_path="code_to_optimize.tests.pytest.test_sorter__unit_test_0",
        test_framework="pytest",
@ -161,7 +161,7 @@ def test_large_list():
    result = instrument_test_source(
        test_source=test_source,
        function_to_optimize=function_to_optimize,
-        dependent_function_names=[],
+        helper_function_names=[],
        module_path="code_to_optimize.bubble_sort_in_class",
        test_module_path="tests.test_sorter__unit_test_0",
        test_framework="pytest",
@ -233,7 +233,7 @@ def test_large_list_with_random_values():
    result = instrument_test_source(
        test_source=test_source,
        function_to_optimize=function_to_optimize,
-        dependent_function_names=[],
+        helper_function_names=[],
        module_path="code_to_optimize.bubble_sort",
        test_module_path="code_to_optimize.tests.pytest.test_sorter__unit_test_0",
        test_framework="pytest",
@ -297,7 +297,7 @@ def test_large_list_with_random_values():
    result = instrument_test_source(
        test_source=test_source,
        function_to_optimize=function_to_optimize,
-        dependent_function_names=[],
+        helper_function_names=[],
        module_path="code_to_optimize.bubble_sort",
        test_module_path="code_to_optimize.tests.pytest.test_sorter__unit_test_0",
        test_framework="pytest",
@ -333,7 +333,7 @@ def test_sort_parametrized(input, expected_output):
    result = instrument_test_source(
        test_source=test_source,
        function_to_optimize=function_to_optimize,
-        dependent_function_names=[],
+        helper_function_names=[],
        module_path="code_to_optimize.bubble_sort",
        test_module_path="code_to_optimize.tests.pytest.test_sorter__unit_test_0",
        test_framework="pytest",
--- a/django/aiservice/testgen/testgen.py
+++ b/django/aiservice/testgen/testgen.py
@ -8,14 +8,15 @@ from pathlib import Path
 from typing import SupportsIndex

 import isort
+from ninja import NinjaAPI, Schema
+from pydantic import model_validator
+
 from aiservice.common_utils import parse_python_version
 from aiservice.env_specific import create_openai_client, debug_log_sensitive_data
 from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL
 from aiservice.models.functions_to_optimize import FunctionToOptimize
 from authapp.auth import AuthBearer
 from log_features.log_features import log_features
-from ninja import NinjaAPI, Schema
-
 from testgen.instrumentation.edit_generated_test import (
    parse_module_to_cst,
    replace_definition_with_import,
@ -306,7 +307,8 @@ To help unit test the function above, list diverse scenarios that the function s
 class TestGenSchema(Schema):
    source_code_being_tested: str
    function_to_optimize: FunctionToOptimize
-    dependent_function_names: list[str]
+    helper_function_names: list[str] = None  # This is the only one we should use
+    dependent_function_names: list[str] = None  # Only for backwards compatibility
    module_path: str
    test_module_path: str
    test_framework: str
@ -314,6 +316,19 @@ class TestGenSchema(Schema):
    trace_id: str
    python_version: str

+    @model_validator(mode="after")
+    def helper_function_names_validator(self):
+        # To maintain backwards compatibility
+        if self.dependent_function_names is None and self.helper_function_names is None:
+            raise ValueError("either field 'helper_function_names' or 'dependent_function_names' is required")
+        if self.helper_function_names is not None:
+            return self
+        print("self.dependent_function_names", self.dependent_function_names)
+        self.helper_function_names = self.dependent_function_names
+        self.dependent_function_names = None
+        return self
+
+

 class TestGenResponseSchema(Schema):
    generated_tests: str
@ -356,6 +371,7 @@ async def testgen(
        return 400, TestGenErrorResponseSchema(
            error="Invalid Python version, it should look like 3.x.x. We only support Python 3.9 and above.",
        )
+    print("data.helper_function_names", data.helper_function_names)
    try:
        ast.parse(
            data.source_code_being_tested,
@ -383,7 +399,7 @@ async def testgen(
        instrumented_test_source = instrument_test_source(
            test_source=generated_test_source,
            function_to_optimize=data.function_to_optimize,
-            dependent_function_names=data.dependent_function_names,
+            helper_function_names=data.helper_function_names,
            module_path=data.module_path,
            test_module_path=data.test_module_path,
            test_framework=data.test_framework,