mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
Merge pull request #1164 from codeflash-ai/catch-one-more-exception-type
minor crash fix
This commit is contained in:
commit
8f607755a3
14 changed files with 331 additions and 188 deletions
34
.github/workflows/end-to-end-test-tracer-replay.yaml
vendored
Normal file
34
.github/workflows/end-to-end-test-tracer-replay.yaml
vendored
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
name: end-to-end-test
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./cli
|
||||
|
||||
jobs:
|
||||
tracer-replay:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.11.6
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install poetry
|
||||
poetry install --with dev
|
||||
- name: Run Codeflash to optimize code
|
||||
id: optimize_code
|
||||
run: |
|
||||
poetry env use python
|
||||
poetry run python tests/scripts/end_to_end_test_tracer_replay.py
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
[tool.codeflash]
|
||||
disable-telemetry = true
|
||||
formatter-cmds = ["ruff check --exit-zero --fix $file", "ruff format $file"]
|
||||
module-root = "."
|
||||
test-framework = "pytest"
|
||||
tests-root = "tests"
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
def funcA(number):
|
||||
k = 0
|
||||
for i in range(number * 100):
|
||||
k += i
|
||||
# Simplify the for loop by using sum with a range object
|
||||
j = sum(range(number))
|
||||
|
||||
# Use a generator expression directly in join for more efficiency
|
||||
return " ".join(str(i) for i in range(number))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in range(10, 31, 10):
|
||||
funcA(10)
|
||||
|
|
@ -32,7 +32,7 @@ class TestFunction:
|
|||
|
||||
|
||||
def discover_unit_tests(
|
||||
cfg: TestConfig, discover_only_these_tests: list[str] | None = None
|
||||
cfg: TestConfig, discover_only_these_tests: list[Path] | None = None
|
||||
) -> dict[str, list[FunctionCalledInTest]]:
|
||||
if cfg.test_framework == "pytest":
|
||||
return discover_tests_pytest(cfg, discover_only_these_tests)
|
||||
|
|
@ -43,7 +43,7 @@ def discover_unit_tests(
|
|||
|
||||
|
||||
def discover_tests_pytest(
|
||||
cfg: TestConfig, discover_only_these_tests: list[str] | None = None
|
||||
cfg: TestConfig, discover_only_these_tests: list[Path] | None = None
|
||||
) -> dict[str, list[FunctionCalledInTest]]:
|
||||
tests_root = cfg.tests_root
|
||||
project_root = cfg.project_root_path
|
||||
|
|
|
|||
|
|
@ -81,10 +81,10 @@ class FunctionVisitor(cst.CSTVisitor):
|
|||
|
||||
|
||||
class FunctionWithReturnStatement(ast.NodeVisitor):
|
||||
def __init__(self, file_path: str) -> None:
|
||||
def __init__(self, file_path: Path) -> None:
|
||||
self.functions: list[FunctionToOptimize] = []
|
||||
self.ast_path: list[FunctionParent] = []
|
||||
self.file_path: str = file_path
|
||||
self.file_path: Path = file_path
|
||||
|
||||
def visit_FunctionDef(self, node: FunctionDef) -> None:
|
||||
# Check if the function has a return statement and add it to the list
|
||||
|
|
@ -188,7 +188,7 @@ def get_functions_to_optimize(
|
|||
class_name = None
|
||||
only_function_name = split_function[0]
|
||||
found_function = None
|
||||
for fn in functions.get(str(file), []):
|
||||
for fn in functions.get(file, []):
|
||||
if only_function_name == fn.function_name and (
|
||||
class_name is None or class_name == fn.top_level_parent_name
|
||||
):
|
||||
|
|
@ -196,7 +196,7 @@ def get_functions_to_optimize(
|
|||
if found_function is None:
|
||||
msg = f"Function {only_function_name} not found in file {file} or the function does not have a 'return' statement."
|
||||
raise ValueError(msg)
|
||||
functions[str(file)] = [found_function]
|
||||
functions[file] = [found_function]
|
||||
else:
|
||||
logger.info("Finding all functions modified in the current git diff ...")
|
||||
ph("cli-optimizing-git-diff")
|
||||
|
|
@ -247,23 +247,23 @@ def get_all_files_and_functions(module_root_path: Path) -> dict[str, list[Functi
|
|||
return dict(files_list)
|
||||
|
||||
|
||||
def find_all_functions_in_file(file_path: Path) -> dict[str, list[FunctionToOptimize]]:
|
||||
functions: dict[str, list[FunctionToOptimize]] = {}
|
||||
def find_all_functions_in_file(file_path: Path) -> dict[Path, list[FunctionToOptimize]]:
|
||||
functions: dict[Path, list[FunctionToOptimize]] = {}
|
||||
with file_path.open(encoding="utf8") as f:
|
||||
try:
|
||||
ast_module = ast.parse(f.read())
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return functions
|
||||
function_name_visitor = FunctionWithReturnStatement(str(file_path))
|
||||
function_name_visitor = FunctionWithReturnStatement(file_path)
|
||||
function_name_visitor.visit(ast_module)
|
||||
functions[str(file_path)] = function_name_visitor.functions
|
||||
functions[file_path] = function_name_visitor.functions
|
||||
return functions
|
||||
|
||||
|
||||
def get_all_replay_test_functions(
|
||||
replay_test: str, test_cfg: TestConfig, project_root_path: Path
|
||||
) -> dict[str, list[FunctionToOptimize]]:
|
||||
replay_test: Path, test_cfg: TestConfig, project_root_path: Path
|
||||
) -> dict[Path, list[FunctionToOptimize]]:
|
||||
function_tests = discover_unit_tests(test_cfg, discover_only_these_tests=[replay_test])
|
||||
# Get the absolute file paths for each function, excluding class name if present
|
||||
filtered_valid_functions = defaultdict(list)
|
||||
|
|
@ -292,7 +292,7 @@ def get_all_replay_test_functions(
|
|||
file_path = Path(project_root_path, *file_path_parts).with_suffix(".py")
|
||||
file_to_functions_map[file_path].append((function, function_name, class_name))
|
||||
for file_path, functions in file_to_functions_map.items():
|
||||
all_valid_functions: dict[str, list[FunctionToOptimize]] = find_all_functions_in_file(file_path=file_path)
|
||||
all_valid_functions: dict[Path, list[FunctionToOptimize]] = find_all_functions_in_file(file_path=file_path)
|
||||
filtered_list = []
|
||||
for function in functions:
|
||||
function_name, function_name_only, class_name = function
|
||||
|
|
@ -407,7 +407,7 @@ def inspect_top_level_functions_or_methods(
|
|||
|
||||
|
||||
def filter_functions(
|
||||
modified_functions: dict[str, list[FunctionToOptimize]],
|
||||
modified_functions: dict[Path, list[FunctionToOptimize]],
|
||||
tests_root: Path,
|
||||
ignore_paths: list[Path],
|
||||
project_root: Path,
|
||||
|
|
@ -431,7 +431,8 @@ def filter_functions(
|
|||
tests_root_str = str(tests_root)
|
||||
module_root_str = str(module_root)
|
||||
# We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
|
||||
for file_path, functions in modified_functions.items():
|
||||
for file_path_path, functions in modified_functions.items():
|
||||
file_path = str(file_path_path)
|
||||
if file_path.startswith(tests_root_str + os.sep):
|
||||
test_functions_removed_count += len(functions)
|
||||
continue
|
||||
|
|
@ -499,10 +500,11 @@ def filter_files_optimized(file_path: Path, tests_root: Path, ignore_paths: list
|
|||
return False
|
||||
if submodule_paths is None:
|
||||
submodule_paths = ignored_submodule_paths(module_root)
|
||||
return not (
|
||||
file_path in submodule_paths
|
||||
or any(file_path.is_relative_to(submodule_path) for submodule_path in submodule_paths)
|
||||
)
|
||||
if file_path in submodule_paths or any(
|
||||
file_path.is_relative_to(submodule_path) for submodule_path in submodule_paths
|
||||
):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def function_has_return_statement(function_node: FunctionDef | AsyncFunctionDef) -> bool:
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ from codeflash.models.ExperimentMetadata import ExperimentMetadata
|
|||
from codeflash.models.models import (
|
||||
BestOptimization,
|
||||
CodeOptimizationContext,
|
||||
FunctionParent,
|
||||
GeneratedTests,
|
||||
GeneratedTestsList,
|
||||
OptimizationSet,
|
||||
|
|
@ -51,9 +52,6 @@ from codeflash.models.models import (
|
|||
OriginalCodeBaseline,
|
||||
TestFile,
|
||||
TestFiles,
|
||||
OptimizedCandidate,
|
||||
FunctionCalledInTest,
|
||||
FunctionParent,
|
||||
)
|
||||
from codeflash.optimization.function_context import get_constrained_function_context_and_helper_functions
|
||||
from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
|
||||
|
|
@ -72,7 +70,7 @@ if TYPE_CHECKING:
|
|||
|
||||
from returns.result import Result
|
||||
|
||||
from codeflash.models.models import FunctionSource
|
||||
from codeflash.models.models import FunctionCalledInTest, FunctionSource, OptimizedCandidate
|
||||
|
||||
|
||||
class Optimizer:
|
||||
|
|
@ -245,7 +243,6 @@ class Optimizer:
|
|||
|
||||
baseline_result = self.establish_original_code_baseline(
|
||||
function_to_optimize.qualified_name,
|
||||
generated_tests_paths,
|
||||
function_to_tests.get(module_path + "." + function_to_optimize.qualified_name, []),
|
||||
)
|
||||
console.rule()
|
||||
|
|
@ -410,6 +407,7 @@ class Optimizer:
|
|||
original_test_results=original_code_baseline.overall_test_results,
|
||||
tests_in_file=only_run_this_test_function,
|
||||
)
|
||||
console.rule()
|
||||
if not is_successful(run_results):
|
||||
optimized_runtimes[candidate.optimization_id] = None
|
||||
is_correct[candidate.optimization_id] = False
|
||||
|
|
@ -635,7 +633,8 @@ class Optimizer:
|
|||
def instrument_existing_tests(
|
||||
self, function_to_optimize: FunctionToOptimize, function_to_tests: dict[str, list[FunctionCalledInTest]]
|
||||
) -> set[Path]:
|
||||
relevant_test_files_count = 0
|
||||
existing_test_files_count = 0
|
||||
replay_test_files_count = 0
|
||||
unique_instrumented_test_files = set()
|
||||
|
||||
func_qualname = function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root)
|
||||
|
|
@ -644,10 +643,17 @@ class Optimizer:
|
|||
else:
|
||||
test_file_invocation_positions = defaultdict(list)
|
||||
for tests_in_file in function_to_tests.get(func_qualname):
|
||||
test_file_invocation_positions[tests_in_file.tests_in_file.test_file].append(tests_in_file.position)
|
||||
for test_file, positions in test_file_invocation_positions.items():
|
||||
test_file_invocation_positions[
|
||||
(tests_in_file.tests_in_file.test_file, tests_in_file.tests_in_file.test_type)
|
||||
].append(tests_in_file.position)
|
||||
for (test_file, test_type), positions in test_file_invocation_positions.items():
|
||||
path_obj_test_file = Path(test_file)
|
||||
relevant_test_files_count += 1
|
||||
if test_type == TestType.EXISTING_UNIT_TEST:
|
||||
existing_test_files_count += 1
|
||||
elif test_type == TestType.REPLAY_TEST:
|
||||
replay_test_files_count += 1
|
||||
else:
|
||||
raise ValueError(f"Unexpected test type: {test_type}")
|
||||
success, injected_test = inject_profiling_into_existing_test(
|
||||
test_path=path_obj_test_file,
|
||||
call_positions=positions,
|
||||
|
|
@ -674,12 +680,13 @@ class Optimizer:
|
|||
instrumented_file_path=new_test_path,
|
||||
original_source=None,
|
||||
original_file_path=Path(test_file),
|
||||
test_type=TestType.EXISTING_UNIT_TEST,
|
||||
test_type=test_type,
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
f"Discovered {relevant_test_files_count} existing unit test file"
|
||||
f"{'s' if relevant_test_files_count != 1 else ''} for {func_qualname}"
|
||||
f"Discovered {existing_test_files_count} existing unit test file"
|
||||
f"{'s' if existing_test_files_count != 1 else ''} and {replay_test_files_count} replay test file"
|
||||
f"{'s' if replay_test_files_count != 1 else ''} for {func_qualname}"
|
||||
)
|
||||
return unique_instrumented_test_files
|
||||
|
||||
|
|
@ -756,7 +763,7 @@ class Optimizer:
|
|||
return Success((generated_tests, OptimizationSet(control=candidates, experiment=candidates_experiment)))
|
||||
|
||||
def establish_original_code_baseline(
|
||||
self, function_name: str, generated_tests_paths: list[Path], tests_in_file: list[FunctionCalledInTest]
|
||||
self, function_name: str, tests_in_file: list[FunctionCalledInTest]
|
||||
) -> Result[tuple[OriginalCodeBaseline, list[str]], str]:
|
||||
# For the original function - run the tests and get the runtime
|
||||
|
||||
|
|
@ -772,32 +779,31 @@ class Optimizer:
|
|||
else:
|
||||
test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
|
||||
|
||||
first_test_types = []
|
||||
first_test_functions = []
|
||||
only_run_these_test_functions_for_test_files: dict[str, str] = {}
|
||||
|
||||
for test_file in self.test_files.get_by_type(TestType.EXISTING_UNIT_TEST).test_files:
|
||||
# Replay tests can have hundreds of test functions and running them can be very slow,
|
||||
# so we only run the test functions that are relevant to the function we are optimizing
|
||||
for test_file in self.test_files.get_by_type(TestType.REPLAY_TEST).test_files:
|
||||
relevant_tests_in_file = [
|
||||
test_in_file
|
||||
for test_in_file in tests_in_file
|
||||
if test_in_file.tests_in_file.test_file == test_file.original_file_path
|
||||
]
|
||||
is_replay_test = (
|
||||
first_test_type := relevant_tests_in_file[0].tests_in_file.test_type
|
||||
) == TestType.REPLAY_TEST
|
||||
first_test_types.append(first_test_type)
|
||||
first_test_functions.append(
|
||||
relevant_tests_in_file[0].tests_in_file.test_function if is_replay_test else None
|
||||
)
|
||||
if is_replay_test and len(relevant_tests_in_file) > 1:
|
||||
logger.warning(f"Multiple tests found for the replay test {test_file}. Should not happen")
|
||||
first_test_functions.extend([None] * len(generated_tests_paths))
|
||||
only_run_these_test_functions_for_test_files[test_file.instrumented_file_path] = relevant_tests_in_file[
|
||||
0
|
||||
].tests_in_file.test_function
|
||||
|
||||
if len(relevant_tests_in_file) > 1:
|
||||
logger.warning(
|
||||
f"Multiple tests found ub the replay test {test_file} for {function_name}. Should not happen"
|
||||
)
|
||||
|
||||
if test_framework == "pytest":
|
||||
unittest_results = self.run_and_parse_tests(
|
||||
test_env=test_env,
|
||||
test_files=self.test_files,
|
||||
optimization_iteration=0,
|
||||
test_functions=first_test_functions,
|
||||
test_functions=only_run_these_test_functions_for_test_files,
|
||||
testing_time=TOTAL_LOOPING_TIME,
|
||||
)
|
||||
else:
|
||||
|
|
@ -811,7 +817,7 @@ class Optimizer:
|
|||
test_env=test_env,
|
||||
test_files=self.test_files,
|
||||
optimization_iteration=0,
|
||||
test_functions=first_test_functions,
|
||||
test_functions=only_run_these_test_functions_for_test_files,
|
||||
testing_time=TOTAL_LOOPING_TIME,
|
||||
)
|
||||
unittest_results.merge(unittest_loop_results)
|
||||
|
|
@ -887,120 +893,111 @@ class Optimizer:
|
|||
) -> Result[OptimizedCandidateResult, str]:
|
||||
assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]
|
||||
|
||||
instrumented_unittests_created_for_function = self.test_files.get_by_type(TestType.EXISTING_UNIT_TEST)
|
||||
generated_tests_paths = self.test_files.get_by_type(TestType.GENERATED_REGRESSION)
|
||||
with progress_bar("Testing optimization candidate"):
|
||||
success = True
|
||||
|
||||
success = True
|
||||
test_env = os.environ.copy()
|
||||
test_env["CODEFLASH_TEST_ITERATION"] = str(optimization_candidate_index)
|
||||
test_env["CODEFLASH_TRACER_DISABLE"] = "1"
|
||||
if "PYTHONPATH" not in test_env:
|
||||
test_env["PYTHONPATH"] = str(self.args.project_root)
|
||||
else:
|
||||
test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
|
||||
|
||||
test_env = os.environ.copy()
|
||||
test_env["CODEFLASH_TEST_ITERATION"] = str(optimization_candidate_index)
|
||||
test_env["CODEFLASH_TRACER_DISABLE"] = "1"
|
||||
if "PYTHONPATH" not in test_env:
|
||||
test_env["PYTHONPATH"] = str(self.args.project_root)
|
||||
else:
|
||||
test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
|
||||
|
||||
first_test_types = []
|
||||
first_test_functions = []
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
|
||||
only_run_these_test_functions_for_test_files: dict[str, str] = {}
|
||||
# Replay tests can have hundreds of test functions and running them can be very slow,
|
||||
# so we only run the test functions that are relevant to the function we are optimizing
|
||||
for test_file in self.test_files.get_by_type(TestType.REPLAY_TEST).test_files:
|
||||
relevant_tests_in_file = [
|
||||
test_in_file
|
||||
for test_in_file in tests_in_file
|
||||
if test_in_file.tests_in_file.test_file == test_file.original_file_path
|
||||
]
|
||||
only_run_these_test_functions_for_test_files[test_file.instrumented_file_path] = relevant_tests_in_file[
|
||||
0
|
||||
].tests_in_file.test_function
|
||||
|
||||
for test_file in instrumented_unittests_created_for_function:
|
||||
relevant_tests_in_file = [
|
||||
test_in_file
|
||||
for test_in_file in tests_in_file
|
||||
if test_in_file.tests_in_file.test_file == test_file.original_file_path
|
||||
]
|
||||
is_replay_test = (
|
||||
first_test_type := relevant_tests_in_file[0].tests_in_file.test_type
|
||||
) == TestType.REPLAY_TEST
|
||||
first_test_types.append(first_test_type)
|
||||
first_test_functions.append(
|
||||
relevant_tests_in_file[0].tests_in_file.test_function if is_replay_test else None
|
||||
)
|
||||
if is_replay_test and len(relevant_tests_in_file) > 1:
|
||||
logger.warning(
|
||||
f"Multiple tests found for the replay test {test_file.original_file_path}. Should not happen"
|
||||
)
|
||||
first_test_functions.extend([None] * len(generated_tests_paths))
|
||||
if test_framework == "pytest":
|
||||
candidate_results = self.run_and_parse_tests(
|
||||
test_env=test_env,
|
||||
test_files=self.test_files,
|
||||
optimization_iteration=optimization_candidate_index,
|
||||
test_functions=first_test_functions,
|
||||
testing_time=TOTAL_LOOPING_TIME,
|
||||
)
|
||||
loop_count = (
|
||||
max(all_loop_indices)
|
||||
if (all_loop_indices := {result.loop_index for result in candidate_results.test_results})
|
||||
else 0
|
||||
)
|
||||
else:
|
||||
candidate_results = TestResults()
|
||||
start_time: float = time.time()
|
||||
loop_count = 0
|
||||
for i in range(100):
|
||||
if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME:
|
||||
break
|
||||
test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
|
||||
candidate_loop_results = self.run_and_parse_tests(
|
||||
if test_framework == "pytest":
|
||||
candidate_results = self.run_and_parse_tests(
|
||||
test_env=test_env,
|
||||
test_files=self.test_files,
|
||||
optimization_iteration=optimization_candidate_index,
|
||||
test_functions=first_test_functions,
|
||||
test_functions=only_run_these_test_functions_for_test_files,
|
||||
testing_time=TOTAL_LOOPING_TIME,
|
||||
)
|
||||
loop_count = i + 1
|
||||
candidate_results.merge(candidate_loop_results)
|
||||
loop_count = (
|
||||
max(all_loop_indices)
|
||||
if (all_loop_indices := {result.loop_index for result in candidate_results.test_results})
|
||||
else 0
|
||||
)
|
||||
else:
|
||||
candidate_results = TestResults()
|
||||
start_time: float = time.time()
|
||||
loop_count = 0
|
||||
for i in range(100):
|
||||
if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME:
|
||||
break
|
||||
test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
|
||||
candidate_loop_results = self.run_and_parse_tests(
|
||||
test_env=test_env,
|
||||
test_files=self.test_files,
|
||||
optimization_iteration=optimization_candidate_index,
|
||||
test_functions=only_run_these_test_functions_for_test_files,
|
||||
testing_time=TOTAL_LOOPING_TIME,
|
||||
)
|
||||
loop_count = i + 1
|
||||
candidate_results.merge(candidate_loop_results)
|
||||
|
||||
initial_loop_candidate_results = TestResults(
|
||||
test_results=[result for result in candidate_results.test_results if result.loop_index == 1]
|
||||
)
|
||||
|
||||
console.print(
|
||||
TestResults.report_to_tree(
|
||||
initial_loop_candidate_results.get_test_pass_fail_report_by_type(),
|
||||
title="Overall initial loop test results for candidate",
|
||||
initial_loop_candidate_results = TestResults(
|
||||
test_results=[result for result in candidate_results.test_results if result.loop_index == 1]
|
||||
)
|
||||
)
|
||||
console.rule()
|
||||
|
||||
initial_loop_original_test_results = TestResults(
|
||||
test_results=[result for result in original_test_results.test_results if result.loop_index == 1]
|
||||
)
|
||||
|
||||
if compare_test_results(initial_loop_original_test_results, initial_loop_candidate_results):
|
||||
logger.info("Test results matched!")
|
||||
console.rule()
|
||||
equal_results = True
|
||||
else:
|
||||
logger.info("Test results did not match the test results of the original code.")
|
||||
console.rule()
|
||||
success = False
|
||||
equal_results = False
|
||||
|
||||
if (total_candidate_timing := candidate_results.total_passed_runtime()) == 0:
|
||||
logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
|
||||
console.rule()
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.bin")).unlink(missing_ok=True)
|
||||
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
|
||||
if not equal_results:
|
||||
success = False
|
||||
|
||||
if not success:
|
||||
return Failure("Failed to run the optimized candidate.")
|
||||
|
||||
return Success(
|
||||
OptimizedCandidateResult(
|
||||
max_loop_count=loop_count,
|
||||
best_test_runtime=total_candidate_timing,
|
||||
test_results=candidate_results,
|
||||
optimization_candidate_index=optimization_candidate_index,
|
||||
total_candidate_timing=total_candidate_timing,
|
||||
console.print(
|
||||
TestResults.report_to_tree(
|
||||
initial_loop_candidate_results.get_test_pass_fail_report_by_type(),
|
||||
title="Overall initial loop test results for candidate",
|
||||
)
|
||||
)
|
||||
console.rule()
|
||||
|
||||
initial_loop_original_test_results = TestResults(
|
||||
test_results=[result for result in original_test_results.test_results if result.loop_index == 1]
|
||||
)
|
||||
|
||||
if compare_test_results(initial_loop_original_test_results, initial_loop_candidate_results):
|
||||
logger.info("Test results matched!")
|
||||
console.rule()
|
||||
equal_results = True
|
||||
else:
|
||||
logger.info("Test results did not match the test results of the original code.")
|
||||
console.rule()
|
||||
success = False
|
||||
equal_results = False
|
||||
|
||||
if (total_candidate_timing := candidate_results.total_passed_runtime()) == 0:
|
||||
logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
|
||||
console.rule()
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.bin")).unlink(missing_ok=True)
|
||||
|
||||
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
|
||||
if not equal_results:
|
||||
success = False
|
||||
|
||||
if not success:
|
||||
return Failure("Failed to run the optimized candidate.")
|
||||
|
||||
return Success(
|
||||
OptimizedCandidateResult(
|
||||
max_loop_count=loop_count,
|
||||
best_test_runtime=total_candidate_timing,
|
||||
test_results=candidate_results,
|
||||
optimization_candidate_index=optimization_candidate_index,
|
||||
total_candidate_timing=total_candidate_timing,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def run_and_parse_tests(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ from copy import copy
|
|||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from types import FrameType
|
||||
from typing import Any, ClassVar, List, Optional
|
||||
from typing import Any, ClassVar, List
|
||||
|
||||
import dill
|
||||
import isort
|
||||
|
|
@ -42,6 +42,7 @@ from codeflash.tracing.tracing_utils import FunctionModules
|
|||
from codeflash.verification.verification_utils import get_test_file_path
|
||||
|
||||
|
||||
# Debug this file by simply adding print statements. This file is not meant to be debugged by the debugger.
|
||||
class Tracer:
|
||||
"""Use this class as a 'with' context manager to trace a function call,
|
||||
input arguments, and profiling info.
|
||||
|
|
@ -50,11 +51,11 @@ class Tracer:
|
|||
def __init__(
|
||||
self,
|
||||
output: str = "codeflash.trace",
|
||||
functions: Optional[List[str]] = None,
|
||||
functions: list[str] | None = None,
|
||||
disable: bool = False,
|
||||
config_file_path: Path | None = None,
|
||||
max_function_count: int = 256,
|
||||
timeout: Optional[int] = None, # seconds
|
||||
timeout: int | None = None, # seconds
|
||||
) -> None:
|
||||
""":param output: The path to the output trace file
|
||||
:param functions: List of functions to trace. If None, trace all functions
|
||||
|
|
@ -91,7 +92,8 @@ class Tracer:
|
|||
}
|
||||
self.max_function_count = max_function_count
|
||||
self.config, found_config_path = parse_config_file(config_file_path)
|
||||
self.project_root = project_root_from_module_root(self.config["module_root"], found_config_path)
|
||||
self.project_root = project_root_from_module_root(Path(self.config["module_root"]), found_config_path)
|
||||
print("project_root", self.project_root)
|
||||
self.ignored_functions = {"<listcomp>", "<genexpr>", "<dictcomp>", "<setcomp>", "<lambda>", "<module>"}
|
||||
|
||||
self.file_being_called_from: str = str(Path(sys._getframe().f_back.f_code.co_filename).name).replace(".", "_")
|
||||
|
|
@ -160,7 +162,7 @@ class Tracer:
|
|||
remapped_callers = [{"key": k, "value": v} for k, v in callers.items()]
|
||||
cur.execute(
|
||||
"INSERT INTO pstats VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(Path(func[0]).resolve(), func[1], func[2], func[3], cc, nc, tt, ct, json.dumps(remapped_callers)),
|
||||
(str(Path(func[0]).resolve()), func[1], func[2], func[3], cc, nc, tt, ct, json.dumps(remapped_callers)),
|
||||
)
|
||||
self.con.commit()
|
||||
|
||||
|
|
@ -177,7 +179,7 @@ class Tracer:
|
|||
function
|
||||
for function in self.function_modules
|
||||
if self.function_count[
|
||||
function.file_name
|
||||
str(function.file_name)
|
||||
+ ":"
|
||||
+ (function.class_name + ":" if function.class_name else "")
|
||||
+ function.function_name
|
||||
|
|
@ -193,14 +195,17 @@ class Tracer:
|
|||
)
|
||||
function_path = "_".join(self.functions) if self.functions else self.file_being_called_from
|
||||
test_file_path = get_test_file_path(
|
||||
test_dir=self.config["tests_root"], function_name=function_path, test_type="replay"
|
||||
test_dir=Path(self.config["tests_root"]), function_name=function_path, test_type="replay"
|
||||
)
|
||||
replay_test = isort.code(replay_test)
|
||||
with open(test_file_path, "w", encoding="utf8") as file:
|
||||
file.write(replay_test)
|
||||
|
||||
console.print(
|
||||
f"Codeflash: Traced {self.trace_count} function calls successfully and replay test created at - {test_file_path}"
|
||||
f"Codeflash: Traced {self.trace_count} function calls successfully and replay test created at - {test_file_path}",
|
||||
crop=False,
|
||||
soft_wrap=False,
|
||||
overflow="ignore",
|
||||
)
|
||||
|
||||
def tracer_logic(self, frame: FrameType, event: str):
|
||||
|
|
@ -212,12 +217,12 @@ class Tracer:
|
|||
console.print(f"Codeflash: Timeout reached! Stopping tracing at {self.timeout} seconds.")
|
||||
return
|
||||
code = frame.f_code
|
||||
file_name = code.co_filename
|
||||
file_name = Path(code.co_filename).resolve()
|
||||
# TODO : It currently doesn't log the last return call from the first function
|
||||
|
||||
if code.co_name in self.ignored_functions:
|
||||
return
|
||||
if not Path(file_name).exists():
|
||||
if not file_name.exists():
|
||||
return
|
||||
if self.functions:
|
||||
if code.co_name not in self.functions:
|
||||
|
|
@ -236,7 +241,6 @@ class Tracer:
|
|||
except:
|
||||
# someone can override the getattr method and raise an exception. I'm looking at you wrapt
|
||||
return
|
||||
file_name = Path(file_name).resolve()
|
||||
function_qualified_name = f"{file_name}:{(class_name + ':' if class_name else '')}{code.co_name}"
|
||||
if function_qualified_name in self.ignored_qualified_functions:
|
||||
return
|
||||
|
|
@ -250,9 +254,9 @@ class Tracer:
|
|||
self.function_count[function_qualified_name] = 0
|
||||
file_valid = filter_files_optimized(
|
||||
file_path=file_name,
|
||||
tests_root=self.config["tests_root"],
|
||||
ignore_paths=self.config["ignore_paths"],
|
||||
module_root=self.config["module_root"],
|
||||
tests_root=Path(self.config["tests_root"]),
|
||||
ignore_paths=[Path(p) for p in self.config["ignore_paths"]],
|
||||
module_root=Path(self.config["module_root"]),
|
||||
)
|
||||
if not file_valid:
|
||||
# we don't want to trace this function because it cannot be optimized
|
||||
|
|
@ -279,7 +283,7 @@ class Tracer:
|
|||
sys.setrecursionlimit(10000)
|
||||
# We do not pickle self for __init__ to avoid recursion errors, and instead instantiate its class
|
||||
# directly with the rest of the arguments in the replay tests. We copy the arguments to avoid memory
|
||||
# leaks, bad references or side-effects when unpickling.
|
||||
# leaks, bad references or side effects when unpickling.
|
||||
arguments = dict(arguments.items())
|
||||
if class_name and code.co_name == "__init__":
|
||||
del arguments["self"]
|
||||
|
|
@ -297,7 +301,16 @@ class Tracer:
|
|||
return
|
||||
cur.execute(
|
||||
"INSERT INTO function_calls VALUES(?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(event, code.co_name, class_name, file_name, frame.f_lineno, frame.f_back.__hash__(), t_ns, local_vars),
|
||||
(
|
||||
event,
|
||||
code.co_name,
|
||||
class_name,
|
||||
str(file_name),
|
||||
frame.f_lineno,
|
||||
frame.f_back.__hash__(),
|
||||
t_ns,
|
||||
local_vars,
|
||||
),
|
||||
)
|
||||
self.trace_count += 1
|
||||
self.next_insert -= 1
|
||||
|
|
@ -596,7 +609,7 @@ def main():
|
|||
globs = {"run_module": runpy.run_module, "modname": unknown_args[0]}
|
||||
else:
|
||||
progname = unknown_args[0]
|
||||
sys.path.insert(0, str(Path(progname).parent))
|
||||
sys.path.insert(0, str(Path(progname).resolve().parent))
|
||||
with io.open_code(progname) as fp:
|
||||
code = compile(fp.read(), progname, "exec")
|
||||
spec = importlib.machinery.ModuleSpec(name="__main__", loader=None, origin=progname)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import dataclasses
|
||||
|
|
@ -6,7 +7,7 @@ from pydantic import dataclasses
|
|||
@dataclasses.dataclass
|
||||
class FunctionModules:
|
||||
function_name: str
|
||||
file_name: str
|
||||
file_name: Path
|
||||
module_name: str
|
||||
class_name: Optional[str] = None
|
||||
line_no: Optional[int] = None
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, tes
|
|||
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
|
||||
try:
|
||||
test_pickle = pickle.loads(test_pickle_bin) if loop_index == 1 else None
|
||||
except (AttributeError, ModuleNotFoundError, IndexError) as e:
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to load pickle file. Exception: {e}")
|
||||
return test_results
|
||||
assert test_type is not None, f"Test type not found for {test_file_path}"
|
||||
|
|
@ -115,7 +115,7 @@ def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, tes
|
|||
loop_index = val[4]
|
||||
try:
|
||||
ret_val = (pickle.loads(val[7]) if loop_index == 1 else None,)
|
||||
except (AttributeError, ModuleNotFoundError, IndexError):
|
||||
except Exception:
|
||||
continue
|
||||
test_results.add(
|
||||
function_test_invocation=FunctionTestInvocation(
|
||||
|
|
@ -184,14 +184,7 @@ def parse_test_xml(
|
|||
return test_results
|
||||
|
||||
test_class_path = testcase.classname
|
||||
try:
|
||||
test_function = testcase.name.split("[", 1)[0] if "[" in testcase.name else testcase.name
|
||||
except ValueError as e:
|
||||
xml_content = test_xml_file_path.read_text(encoding="utf-8")
|
||||
logger.exception(
|
||||
f"Failed to parse test function name from {testcase.name} in {xml_content} Exception:{e}"
|
||||
)
|
||||
raise
|
||||
test_function = testcase.name.split("[", 1)[0] if "[" in testcase.name else testcase.name
|
||||
if test_file_name is None:
|
||||
if test_class_path:
|
||||
# TODO : This might not be true if the test is organized under a class
|
||||
|
|
@ -222,7 +215,7 @@ def parse_test_xml(
|
|||
continue
|
||||
timed_out = False
|
||||
if test_config.test_framework == "pytest":
|
||||
loop_index = int(testcase.name.split("[ ", 1)[1][:-2]) if "[" in testcase.name else 1
|
||||
loop_index = int(testcase.name.split("[ ")[-1][:-2]) if "[" in testcase.name else 1
|
||||
if len(testcase.result) > 1:
|
||||
logger.warning(f"!!!!!Multiple results for {testcase.name} in {test_xml_file_path}!!!")
|
||||
if len(testcase.result) == 1:
|
||||
|
|
|
|||
|
|
@ -25,13 +25,6 @@ def run_tests(
|
|||
pytest_max_loops: int = 100_000,
|
||||
) -> tuple[Path, subprocess.CompletedProcess]:
|
||||
assert test_framework in ["pytest", "unittest"]
|
||||
# TODO: Make this work for replay tests
|
||||
for i, test_file in enumerate(test_paths):
|
||||
if (
|
||||
only_run_these_test_functions and test_file.test_type == TestType.REPLAY_TEST
|
||||
): # "__replay_test" in test_path:
|
||||
# TODO: This might not work for replay tests
|
||||
test_paths[i] = str(test_file.instrumented_file_path) + "::" + only_run_these_test_functions
|
||||
|
||||
if test_framework == "pytest":
|
||||
result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
|
||||
|
|
@ -51,7 +44,14 @@ def run_tests(
|
|||
"--codeflash_loops_scope=session",
|
||||
]
|
||||
|
||||
test_files = [str(file.instrumented_file_path) for file in test_paths.test_files]
|
||||
test_files = []
|
||||
for file in test_paths.test_files:
|
||||
if file.test_type == TestType.REPLAY_TEST:
|
||||
test_files.append(
|
||||
str(file.instrumented_file_path) + "::" + only_run_these_test_functions[file.instrumented_file_path]
|
||||
)
|
||||
else:
|
||||
test_files.append(str(file.instrumented_file_path))
|
||||
|
||||
results = subprocess.run(
|
||||
pytest_cmd_list + test_files + pytest_args,
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ def main():
|
|||
improvement_pct = int(re.search(r"📈 ([\d,]+)% improvement", stdout).group(1).replace(",", ""))
|
||||
improvement_x = float(improvement_pct) / 100
|
||||
|
||||
assert improvement_pct > 5, f"Performance improvement percentage was {improvement_pct}, which was not above 10%"
|
||||
assert improvement_pct > 10, f"Performance improvement percentage was {improvement_pct}, which was not above 10%"
|
||||
assert improvement_x > 0.1, f"Performance improvement rate was {improvement_x}x, which was not above 0.1x"
|
||||
|
||||
# Check for the line indicating the number of discovered existing unit tests
|
||||
|
|
|
|||
66
cli/tests/scripts/end_to_end_test_tracer_replay.py
Normal file
66
cli/tests/scripts/end_to_end_test_tracer_replay.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
|
||||
def main():
|
||||
cwd = (
|
||||
pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "simple_tracer_e2e"
|
||||
).resolve()
|
||||
print("cwd", cwd)
|
||||
command = ["python", "-m", "codeflash.tracer", "-o", "codeflash.trace", "workload.py"]
|
||||
process = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
|
||||
)
|
||||
output = []
|
||||
|
||||
for line in process.stdout:
|
||||
print(line, end="") # Print each line in real-time
|
||||
output.append(line) # Store each line in the output variable
|
||||
return_code = process.wait()
|
||||
stdout = "".join(output)
|
||||
assert return_code == 0, f"The codeflash command returned exit code {return_code} instead of 0"
|
||||
functions_traced = re.search(r"Traced (\d+) function calls successfully and replay test created at - (.*)$", stdout)
|
||||
assert functions_traced, "Failed to find any traced functions or replay test"
|
||||
assert int(functions_traced.group(1)) == 3, "Failed to find the correct number of traced functions"
|
||||
replay_test_path = pathlib.Path(functions_traced.group(2))
|
||||
assert replay_test_path, "Failed to find the replay test file path"
|
||||
assert replay_test_path.exists(), f"Replay test file does not exist at - {replay_test_path}"
|
||||
|
||||
command = ["python", "../../../codeflash/main.py", "--replay-test", str(replay_test_path), "--no-pr"]
|
||||
process = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
|
||||
)
|
||||
output = []
|
||||
|
||||
for line in process.stdout:
|
||||
print(line, end="") # Print each line in real-time
|
||||
output.append(line) # Store each line in the output variable
|
||||
return_code = process.wait()
|
||||
stdout = "".join(output)
|
||||
assert return_code == 0, f"The codeflash command returned exit code {return_code} instead of 0"
|
||||
|
||||
improvement_pct = int(re.search(r"📈 ([\d,]+)% improvement", stdout).group(1).replace(",", ""))
|
||||
improvement_x = float(improvement_pct) / 100
|
||||
|
||||
assert improvement_pct > 10, f"Performance improvement percentage was {improvement_pct}, which was not above 10%"
|
||||
assert improvement_x > 0.1, f"Performance improvement rate was {improvement_x}x, which was not above 0.1x"
|
||||
|
||||
# Check for the line indicating the number of discovered existing unit tests
|
||||
unit_test_search = re.search(r"Discovered (\d+) existing unit tests", stdout)
|
||||
num_unit_tests = int(unit_test_search.group(1))
|
||||
assert num_unit_tests == 1, f"Could not find 1 existing unit test, found {num_unit_tests} instead"
|
||||
|
||||
# check if the replay test was correctly run for the original code
|
||||
m = re.search(r"Replay Tests - Passed: (\d+), Failed: (\d+)", stdout)
|
||||
assert m, "Failed to run replay tests"
|
||||
|
||||
passed, failed = int(m.group(1)), int(m.group(2))
|
||||
|
||||
assert passed > 0, f"Expected >0 passed replay tests, found {passed}"
|
||||
assert failed == 0, f"Expected 0 failed replay tests, found {failed}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -2,6 +2,7 @@ import tempfile
|
|||
from pathlib import Path
|
||||
|
||||
from codeflash.discovery.functions_to_optimize import (
|
||||
filter_files_optimized,
|
||||
find_all_functions_in_file,
|
||||
get_functions_to_optimize,
|
||||
inspect_top_level_functions_or_methods,
|
||||
|
|
@ -19,7 +20,7 @@ def test_function_eligible_for_optimization() -> None:
|
|||
f.write(function)
|
||||
f.flush()
|
||||
functions_found = find_all_functions_in_file(Path(f.name))
|
||||
assert functions_found[f.name][0].function_name == "test_function_eligible_for_optimization"
|
||||
assert functions_found[Path(f.name)][0].function_name == "test_function_eligible_for_optimization"
|
||||
|
||||
# Has no return statement
|
||||
function = """def test_function_not_eligible_for_optimization():
|
||||
|
|
@ -31,7 +32,7 @@ def test_function_eligible_for_optimization() -> None:
|
|||
f.write(function)
|
||||
f.flush()
|
||||
functions_found = find_all_functions_in_file(Path(f.name))
|
||||
assert len(functions_found[f.name]) == 0
|
||||
assert len(functions_found[Path(f.name)]) == 0
|
||||
|
||||
|
||||
def test_find_top_level_function_or_method():
|
||||
|
|
@ -149,3 +150,19 @@ def functionA():
|
|||
for file in functions:
|
||||
assert functions[file][0].qualified_name == "functionA"
|
||||
assert functions[file][0].function_name == "functionA"
|
||||
|
||||
|
||||
def test_filter_files_optimized():
|
||||
tests_root = Path("tests").resolve()
|
||||
module_root = Path().resolve()
|
||||
ignore_paths = []
|
||||
|
||||
file_path_test = Path("tests/test_function_discovery.py").resolve()
|
||||
file_path_same_level = Path("file.py").resolve()
|
||||
file_path_different_level = Path("src/file.py").resolve()
|
||||
file_path_above_level = Path("../file.py").resolve()
|
||||
|
||||
assert not filter_files_optimized(file_path_test, tests_root, ignore_paths, module_root)
|
||||
assert filter_files_optimized(file_path_same_level, tests_root, ignore_paths, module_root)
|
||||
assert filter_files_optimized(file_path_different_level, tests_root, ignore_paths, module_root)
|
||||
assert not filter_files_optimized(file_path_above_level, tests_root, ignore_paths, module_root)
|
||||
|
|
|
|||
Loading…
Reference in a new issue