Merge pull request #1164 from codeflash-ai/catch-one-more-exception-type

minor crash fix
This commit is contained in:
Saurabh Misra 2024-11-01 18:13:41 -07:00 committed by GitHub
commit 8f607755a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 331 additions and 188 deletions

View file

@ -0,0 +1,34 @@
name: end-to-end-test
on:
pull_request:
workflow_dispatch:
defaults:
run:
working-directory: ./cli
jobs:
tracer-replay:
runs-on: ubuntu-latest
env:
CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: 3.11.6
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install --with dev
- name: Run Codeflash to optimize code
id: optimize_code
run: |
poetry env use python
poetry run python tests/scripts/end_to_end_test_tracer_replay.py

View file

@ -0,0 +1,6 @@
[tool.codeflash]
disable-telemetry = true
formatter-cmds = ["ruff check --exit-zero --fix $file", "ruff format $file"]
module-root = "."
test-framework = "pytest"
tests-root = "tests"

View file

@ -0,0 +1,14 @@
def funcA(number):
k = 0
for i in range(number * 100):
k += i
# Simplify the for loop by using sum with a range object
j = sum(range(number))
# Use a generator expression directly in join for more efficiency
return " ".join(str(i) for i in range(number))
if __name__ == "__main__":
for i in range(10, 31, 10):
funcA(10)

View file

@ -32,7 +32,7 @@ class TestFunction:
def discover_unit_tests(
cfg: TestConfig, discover_only_these_tests: list[str] | None = None
cfg: TestConfig, discover_only_these_tests: list[Path] | None = None
) -> dict[str, list[FunctionCalledInTest]]:
if cfg.test_framework == "pytest":
return discover_tests_pytest(cfg, discover_only_these_tests)
@ -43,7 +43,7 @@ def discover_unit_tests(
def discover_tests_pytest(
cfg: TestConfig, discover_only_these_tests: list[str] | None = None
cfg: TestConfig, discover_only_these_tests: list[Path] | None = None
) -> dict[str, list[FunctionCalledInTest]]:
tests_root = cfg.tests_root
project_root = cfg.project_root_path

View file

@ -81,10 +81,10 @@ class FunctionVisitor(cst.CSTVisitor):
class FunctionWithReturnStatement(ast.NodeVisitor):
def __init__(self, file_path: str) -> None:
def __init__(self, file_path: Path) -> None:
self.functions: list[FunctionToOptimize] = []
self.ast_path: list[FunctionParent] = []
self.file_path: str = file_path
self.file_path: Path = file_path
def visit_FunctionDef(self, node: FunctionDef) -> None:
# Check if the function has a return statement and add it to the list
@ -188,7 +188,7 @@ def get_functions_to_optimize(
class_name = None
only_function_name = split_function[0]
found_function = None
for fn in functions.get(str(file), []):
for fn in functions.get(file, []):
if only_function_name == fn.function_name and (
class_name is None or class_name == fn.top_level_parent_name
):
@ -196,7 +196,7 @@ def get_functions_to_optimize(
if found_function is None:
msg = f"Function {only_function_name} not found in file {file} or the function does not have a 'return' statement."
raise ValueError(msg)
functions[str(file)] = [found_function]
functions[file] = [found_function]
else:
logger.info("Finding all functions modified in the current git diff ...")
ph("cli-optimizing-git-diff")
@ -247,23 +247,23 @@ def get_all_files_and_functions(module_root_path: Path) -> dict[str, list[Functi
return dict(files_list)
def find_all_functions_in_file(file_path: Path) -> dict[str, list[FunctionToOptimize]]:
functions: dict[str, list[FunctionToOptimize]] = {}
def find_all_functions_in_file(file_path: Path) -> dict[Path, list[FunctionToOptimize]]:
functions: dict[Path, list[FunctionToOptimize]] = {}
with file_path.open(encoding="utf8") as f:
try:
ast_module = ast.parse(f.read())
except Exception as e:
logger.exception(e)
return functions
function_name_visitor = FunctionWithReturnStatement(str(file_path))
function_name_visitor = FunctionWithReturnStatement(file_path)
function_name_visitor.visit(ast_module)
functions[str(file_path)] = function_name_visitor.functions
functions[file_path] = function_name_visitor.functions
return functions
def get_all_replay_test_functions(
replay_test: str, test_cfg: TestConfig, project_root_path: Path
) -> dict[str, list[FunctionToOptimize]]:
replay_test: Path, test_cfg: TestConfig, project_root_path: Path
) -> dict[Path, list[FunctionToOptimize]]:
function_tests = discover_unit_tests(test_cfg, discover_only_these_tests=[replay_test])
# Get the absolute file paths for each function, excluding class name if present
filtered_valid_functions = defaultdict(list)
@ -292,7 +292,7 @@ def get_all_replay_test_functions(
file_path = Path(project_root_path, *file_path_parts).with_suffix(".py")
file_to_functions_map[file_path].append((function, function_name, class_name))
for file_path, functions in file_to_functions_map.items():
all_valid_functions: dict[str, list[FunctionToOptimize]] = find_all_functions_in_file(file_path=file_path)
all_valid_functions: dict[Path, list[FunctionToOptimize]] = find_all_functions_in_file(file_path=file_path)
filtered_list = []
for function in functions:
function_name, function_name_only, class_name = function
@ -407,7 +407,7 @@ def inspect_top_level_functions_or_methods(
def filter_functions(
modified_functions: dict[str, list[FunctionToOptimize]],
modified_functions: dict[Path, list[FunctionToOptimize]],
tests_root: Path,
ignore_paths: list[Path],
project_root: Path,
@ -431,7 +431,8 @@ def filter_functions(
tests_root_str = str(tests_root)
module_root_str = str(module_root)
# We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
for file_path, functions in modified_functions.items():
for file_path_path, functions in modified_functions.items():
file_path = str(file_path_path)
if file_path.startswith(tests_root_str + os.sep):
test_functions_removed_count += len(functions)
continue
@ -499,10 +500,11 @@ def filter_files_optimized(file_path: Path, tests_root: Path, ignore_paths: list
return False
if submodule_paths is None:
submodule_paths = ignored_submodule_paths(module_root)
return not (
file_path in submodule_paths
or any(file_path.is_relative_to(submodule_path) for submodule_path in submodule_paths)
)
if file_path in submodule_paths or any(
file_path.is_relative_to(submodule_path) for submodule_path in submodule_paths
):
return False
return True
def function_has_return_statement(function_node: FunctionDef | AsyncFunctionDef) -> bool:

View file

@ -44,6 +44,7 @@ from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import (
BestOptimization,
CodeOptimizationContext,
FunctionParent,
GeneratedTests,
GeneratedTestsList,
OptimizationSet,
@ -51,9 +52,6 @@ from codeflash.models.models import (
OriginalCodeBaseline,
TestFile,
TestFiles,
OptimizedCandidate,
FunctionCalledInTest,
FunctionParent,
)
from codeflash.optimization.function_context import get_constrained_function_context_and_helper_functions
from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
@ -72,7 +70,7 @@ if TYPE_CHECKING:
from returns.result import Result
from codeflash.models.models import FunctionSource
from codeflash.models.models import FunctionCalledInTest, FunctionSource, OptimizedCandidate
class Optimizer:
@ -245,7 +243,6 @@ class Optimizer:
baseline_result = self.establish_original_code_baseline(
function_to_optimize.qualified_name,
generated_tests_paths,
function_to_tests.get(module_path + "." + function_to_optimize.qualified_name, []),
)
console.rule()
@ -410,6 +407,7 @@ class Optimizer:
original_test_results=original_code_baseline.overall_test_results,
tests_in_file=only_run_this_test_function,
)
console.rule()
if not is_successful(run_results):
optimized_runtimes[candidate.optimization_id] = None
is_correct[candidate.optimization_id] = False
@ -635,7 +633,8 @@ class Optimizer:
def instrument_existing_tests(
self, function_to_optimize: FunctionToOptimize, function_to_tests: dict[str, list[FunctionCalledInTest]]
) -> set[Path]:
relevant_test_files_count = 0
existing_test_files_count = 0
replay_test_files_count = 0
unique_instrumented_test_files = set()
func_qualname = function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root)
@ -644,10 +643,17 @@ class Optimizer:
else:
test_file_invocation_positions = defaultdict(list)
for tests_in_file in function_to_tests.get(func_qualname):
test_file_invocation_positions[tests_in_file.tests_in_file.test_file].append(tests_in_file.position)
for test_file, positions in test_file_invocation_positions.items():
test_file_invocation_positions[
(tests_in_file.tests_in_file.test_file, tests_in_file.tests_in_file.test_type)
].append(tests_in_file.position)
for (test_file, test_type), positions in test_file_invocation_positions.items():
path_obj_test_file = Path(test_file)
relevant_test_files_count += 1
if test_type == TestType.EXISTING_UNIT_TEST:
existing_test_files_count += 1
elif test_type == TestType.REPLAY_TEST:
replay_test_files_count += 1
else:
raise ValueError(f"Unexpected test type: {test_type}")
success, injected_test = inject_profiling_into_existing_test(
test_path=path_obj_test_file,
call_positions=positions,
@ -674,12 +680,13 @@ class Optimizer:
instrumented_file_path=new_test_path,
original_source=None,
original_file_path=Path(test_file),
test_type=TestType.EXISTING_UNIT_TEST,
test_type=test_type,
)
)
logger.info(
f"Discovered {relevant_test_files_count} existing unit test file"
f"{'s' if relevant_test_files_count != 1 else ''} for {func_qualname}"
f"Discovered {existing_test_files_count} existing unit test file"
f"{'s' if existing_test_files_count != 1 else ''} and {replay_test_files_count} replay test file"
f"{'s' if replay_test_files_count != 1 else ''} for {func_qualname}"
)
return unique_instrumented_test_files
@ -756,7 +763,7 @@ class Optimizer:
return Success((generated_tests, OptimizationSet(control=candidates, experiment=candidates_experiment)))
def establish_original_code_baseline(
self, function_name: str, generated_tests_paths: list[Path], tests_in_file: list[FunctionCalledInTest]
self, function_name: str, tests_in_file: list[FunctionCalledInTest]
) -> Result[tuple[OriginalCodeBaseline, list[str]], str]:
# For the original function - run the tests and get the runtime
@ -772,32 +779,31 @@ class Optimizer:
else:
test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
first_test_types = []
first_test_functions = []
only_run_these_test_functions_for_test_files: dict[str, str] = {}
for test_file in self.test_files.get_by_type(TestType.EXISTING_UNIT_TEST).test_files:
# Replay tests can have hundreds of test functions and running them can be very slow,
# so we only run the test functions that are relevant to the function we are optimizing
for test_file in self.test_files.get_by_type(TestType.REPLAY_TEST).test_files:
relevant_tests_in_file = [
test_in_file
for test_in_file in tests_in_file
if test_in_file.tests_in_file.test_file == test_file.original_file_path
]
is_replay_test = (
first_test_type := relevant_tests_in_file[0].tests_in_file.test_type
) == TestType.REPLAY_TEST
first_test_types.append(first_test_type)
first_test_functions.append(
relevant_tests_in_file[0].tests_in_file.test_function if is_replay_test else None
)
if is_replay_test and len(relevant_tests_in_file) > 1:
logger.warning(f"Multiple tests found for the replay test {test_file}. Should not happen")
first_test_functions.extend([None] * len(generated_tests_paths))
only_run_these_test_functions_for_test_files[test_file.instrumented_file_path] = relevant_tests_in_file[
0
].tests_in_file.test_function
if len(relevant_tests_in_file) > 1:
logger.warning(
f"Multiple tests found ub the replay test {test_file} for {function_name}. Should not happen"
)
if test_framework == "pytest":
unittest_results = self.run_and_parse_tests(
test_env=test_env,
test_files=self.test_files,
optimization_iteration=0,
test_functions=first_test_functions,
test_functions=only_run_these_test_functions_for_test_files,
testing_time=TOTAL_LOOPING_TIME,
)
else:
@ -811,7 +817,7 @@ class Optimizer:
test_env=test_env,
test_files=self.test_files,
optimization_iteration=0,
test_functions=first_test_functions,
test_functions=only_run_these_test_functions_for_test_files,
testing_time=TOTAL_LOOPING_TIME,
)
unittest_results.merge(unittest_loop_results)
@ -887,120 +893,111 @@ class Optimizer:
) -> Result[OptimizedCandidateResult, str]:
assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]
instrumented_unittests_created_for_function = self.test_files.get_by_type(TestType.EXISTING_UNIT_TEST)
generated_tests_paths = self.test_files.get_by_type(TestType.GENERATED_REGRESSION)
with progress_bar("Testing optimization candidate"):
success = True
success = True
test_env = os.environ.copy()
test_env["CODEFLASH_TEST_ITERATION"] = str(optimization_candidate_index)
test_env["CODEFLASH_TRACER_DISABLE"] = "1"
if "PYTHONPATH" not in test_env:
test_env["PYTHONPATH"] = str(self.args.project_root)
else:
test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
test_env = os.environ.copy()
test_env["CODEFLASH_TEST_ITERATION"] = str(optimization_candidate_index)
test_env["CODEFLASH_TRACER_DISABLE"] = "1"
if "PYTHONPATH" not in test_env:
test_env["PYTHONPATH"] = str(self.args.project_root)
else:
test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
first_test_types = []
first_test_functions = []
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
only_run_these_test_functions_for_test_files: dict[str, str] = {}
# Replay tests can have hundreds of test functions and running them can be very slow,
# so we only run the test functions that are relevant to the function we are optimizing
for test_file in self.test_files.get_by_type(TestType.REPLAY_TEST).test_files:
relevant_tests_in_file = [
test_in_file
for test_in_file in tests_in_file
if test_in_file.tests_in_file.test_file == test_file.original_file_path
]
only_run_these_test_functions_for_test_files[test_file.instrumented_file_path] = relevant_tests_in_file[
0
].tests_in_file.test_function
for test_file in instrumented_unittests_created_for_function:
relevant_tests_in_file = [
test_in_file
for test_in_file in tests_in_file
if test_in_file.tests_in_file.test_file == test_file.original_file_path
]
is_replay_test = (
first_test_type := relevant_tests_in_file[0].tests_in_file.test_type
) == TestType.REPLAY_TEST
first_test_types.append(first_test_type)
first_test_functions.append(
relevant_tests_in_file[0].tests_in_file.test_function if is_replay_test else None
)
if is_replay_test and len(relevant_tests_in_file) > 1:
logger.warning(
f"Multiple tests found for the replay test {test_file.original_file_path}. Should not happen"
)
first_test_functions.extend([None] * len(generated_tests_paths))
if test_framework == "pytest":
candidate_results = self.run_and_parse_tests(
test_env=test_env,
test_files=self.test_files,
optimization_iteration=optimization_candidate_index,
test_functions=first_test_functions,
testing_time=TOTAL_LOOPING_TIME,
)
loop_count = (
max(all_loop_indices)
if (all_loop_indices := {result.loop_index for result in candidate_results.test_results})
else 0
)
else:
candidate_results = TestResults()
start_time: float = time.time()
loop_count = 0
for i in range(100):
if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME:
break
test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
candidate_loop_results = self.run_and_parse_tests(
if test_framework == "pytest":
candidate_results = self.run_and_parse_tests(
test_env=test_env,
test_files=self.test_files,
optimization_iteration=optimization_candidate_index,
test_functions=first_test_functions,
test_functions=only_run_these_test_functions_for_test_files,
testing_time=TOTAL_LOOPING_TIME,
)
loop_count = i + 1
candidate_results.merge(candidate_loop_results)
loop_count = (
max(all_loop_indices)
if (all_loop_indices := {result.loop_index for result in candidate_results.test_results})
else 0
)
else:
candidate_results = TestResults()
start_time: float = time.time()
loop_count = 0
for i in range(100):
if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME:
break
test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
candidate_loop_results = self.run_and_parse_tests(
test_env=test_env,
test_files=self.test_files,
optimization_iteration=optimization_candidate_index,
test_functions=only_run_these_test_functions_for_test_files,
testing_time=TOTAL_LOOPING_TIME,
)
loop_count = i + 1
candidate_results.merge(candidate_loop_results)
initial_loop_candidate_results = TestResults(
test_results=[result for result in candidate_results.test_results if result.loop_index == 1]
)
console.print(
TestResults.report_to_tree(
initial_loop_candidate_results.get_test_pass_fail_report_by_type(),
title="Overall initial loop test results for candidate",
initial_loop_candidate_results = TestResults(
test_results=[result for result in candidate_results.test_results if result.loop_index == 1]
)
)
console.rule()
initial_loop_original_test_results = TestResults(
test_results=[result for result in original_test_results.test_results if result.loop_index == 1]
)
if compare_test_results(initial_loop_original_test_results, initial_loop_candidate_results):
logger.info("Test results matched!")
console.rule()
equal_results = True
else:
logger.info("Test results did not match the test results of the original code.")
console.rule()
success = False
equal_results = False
if (total_candidate_timing := candidate_results.total_passed_runtime()) == 0:
logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
console.rule()
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.bin")).unlink(missing_ok=True)
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
if not equal_results:
success = False
if not success:
return Failure("Failed to run the optimized candidate.")
return Success(
OptimizedCandidateResult(
max_loop_count=loop_count,
best_test_runtime=total_candidate_timing,
test_results=candidate_results,
optimization_candidate_index=optimization_candidate_index,
total_candidate_timing=total_candidate_timing,
console.print(
TestResults.report_to_tree(
initial_loop_candidate_results.get_test_pass_fail_report_by_type(),
title="Overall initial loop test results for candidate",
)
)
console.rule()
initial_loop_original_test_results = TestResults(
test_results=[result for result in original_test_results.test_results if result.loop_index == 1]
)
if compare_test_results(initial_loop_original_test_results, initial_loop_candidate_results):
logger.info("Test results matched!")
console.rule()
equal_results = True
else:
logger.info("Test results did not match the test results of the original code.")
console.rule()
success = False
equal_results = False
if (total_candidate_timing := candidate_results.total_passed_runtime()) == 0:
logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
console.rule()
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.bin")).unlink(missing_ok=True)
get_run_tmp_file(Path(f"test_return_values_{optimization_candidate_index}.sqlite")).unlink(missing_ok=True)
if not equal_results:
success = False
if not success:
return Failure("Failed to run the optimized candidate.")
return Success(
OptimizedCandidateResult(
max_loop_count=loop_count,
best_test_runtime=total_candidate_timing,
test_results=candidate_results,
optimization_candidate_index=optimization_candidate_index,
total_candidate_timing=total_candidate_timing,
)
)
)
def run_and_parse_tests(
self,

View file

@ -27,7 +27,7 @@ from copy import copy
from io import StringIO
from pathlib import Path
from types import FrameType
from typing import Any, ClassVar, List, Optional
from typing import Any, ClassVar, List
import dill
import isort
@ -42,6 +42,7 @@ from codeflash.tracing.tracing_utils import FunctionModules
from codeflash.verification.verification_utils import get_test_file_path
# Debug this file by simply adding print statements. This file is not meant to be debugged by the debugger.
class Tracer:
"""Use this class as a 'with' context manager to trace a function call,
input arguments, and profiling info.
@ -50,11 +51,11 @@ class Tracer:
def __init__(
self,
output: str = "codeflash.trace",
functions: Optional[List[str]] = None,
functions: list[str] | None = None,
disable: bool = False,
config_file_path: Path | None = None,
max_function_count: int = 256,
timeout: Optional[int] = None, # seconds
timeout: int | None = None, # seconds
) -> None:
""":param output: The path to the output trace file
:param functions: List of functions to trace. If None, trace all functions
@ -91,7 +92,8 @@ class Tracer:
}
self.max_function_count = max_function_count
self.config, found_config_path = parse_config_file(config_file_path)
self.project_root = project_root_from_module_root(self.config["module_root"], found_config_path)
self.project_root = project_root_from_module_root(Path(self.config["module_root"]), found_config_path)
print("project_root", self.project_root)
self.ignored_functions = {"<listcomp>", "<genexpr>", "<dictcomp>", "<setcomp>", "<lambda>", "<module>"}
self.file_being_called_from: str = str(Path(sys._getframe().f_back.f_code.co_filename).name).replace(".", "_")
@ -160,7 +162,7 @@ class Tracer:
remapped_callers = [{"key": k, "value": v} for k, v in callers.items()]
cur.execute(
"INSERT INTO pstats VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(Path(func[0]).resolve(), func[1], func[2], func[3], cc, nc, tt, ct, json.dumps(remapped_callers)),
(str(Path(func[0]).resolve()), func[1], func[2], func[3], cc, nc, tt, ct, json.dumps(remapped_callers)),
)
self.con.commit()
@ -177,7 +179,7 @@ class Tracer:
function
for function in self.function_modules
if self.function_count[
function.file_name
str(function.file_name)
+ ":"
+ (function.class_name + ":" if function.class_name else "")
+ function.function_name
@ -193,14 +195,17 @@ class Tracer:
)
function_path = "_".join(self.functions) if self.functions else self.file_being_called_from
test_file_path = get_test_file_path(
test_dir=self.config["tests_root"], function_name=function_path, test_type="replay"
test_dir=Path(self.config["tests_root"]), function_name=function_path, test_type="replay"
)
replay_test = isort.code(replay_test)
with open(test_file_path, "w", encoding="utf8") as file:
file.write(replay_test)
console.print(
f"Codeflash: Traced {self.trace_count} function calls successfully and replay test created at - {test_file_path}"
f"Codeflash: Traced {self.trace_count} function calls successfully and replay test created at - {test_file_path}",
crop=False,
soft_wrap=False,
overflow="ignore",
)
def tracer_logic(self, frame: FrameType, event: str):
@ -212,12 +217,12 @@ class Tracer:
console.print(f"Codeflash: Timeout reached! Stopping tracing at {self.timeout} seconds.")
return
code = frame.f_code
file_name = code.co_filename
file_name = Path(code.co_filename).resolve()
# TODO : It currently doesn't log the last return call from the first function
if code.co_name in self.ignored_functions:
return
if not Path(file_name).exists():
if not file_name.exists():
return
if self.functions:
if code.co_name not in self.functions:
@ -236,7 +241,6 @@ class Tracer:
except:
# someone can override the getattr method and raise an exception. I'm looking at you wrapt
return
file_name = Path(file_name).resolve()
function_qualified_name = f"{file_name}:{(class_name + ':' if class_name else '')}{code.co_name}"
if function_qualified_name in self.ignored_qualified_functions:
return
@ -250,9 +254,9 @@ class Tracer:
self.function_count[function_qualified_name] = 0
file_valid = filter_files_optimized(
file_path=file_name,
tests_root=self.config["tests_root"],
ignore_paths=self.config["ignore_paths"],
module_root=self.config["module_root"],
tests_root=Path(self.config["tests_root"]),
ignore_paths=[Path(p) for p in self.config["ignore_paths"]],
module_root=Path(self.config["module_root"]),
)
if not file_valid:
# we don't want to trace this function because it cannot be optimized
@ -279,7 +283,7 @@ class Tracer:
sys.setrecursionlimit(10000)
# We do not pickle self for __init__ to avoid recursion errors, and instead instantiate its class
# directly with the rest of the arguments in the replay tests. We copy the arguments to avoid memory
# leaks, bad references or side-effects when unpickling.
# leaks, bad references or side effects when unpickling.
arguments = dict(arguments.items())
if class_name and code.co_name == "__init__":
del arguments["self"]
@ -297,7 +301,16 @@ class Tracer:
return
cur.execute(
"INSERT INTO function_calls VALUES(?, ?, ?, ?, ?, ?, ?, ?)",
(event, code.co_name, class_name, file_name, frame.f_lineno, frame.f_back.__hash__(), t_ns, local_vars),
(
event,
code.co_name,
class_name,
str(file_name),
frame.f_lineno,
frame.f_back.__hash__(),
t_ns,
local_vars,
),
)
self.trace_count += 1
self.next_insert -= 1
@ -596,7 +609,7 @@ def main():
globs = {"run_module": runpy.run_module, "modname": unknown_args[0]}
else:
progname = unknown_args[0]
sys.path.insert(0, str(Path(progname).parent))
sys.path.insert(0, str(Path(progname).resolve().parent))
with io.open_code(progname) as fp:
code = compile(fp.read(), progname, "exec")
spec = importlib.machinery.ModuleSpec(name="__main__", loader=None, origin=progname)

View file

@ -1,3 +1,4 @@
from pathlib import Path
from typing import Optional
from pydantic import dataclasses
@ -6,7 +7,7 @@ from pydantic import dataclasses
@dataclasses.dataclass
class FunctionModules:
function_name: str
file_name: str
file_name: Path
module_name: str
class_name: Optional[str] = None
line_no: Optional[int] = None

View file

@ -72,7 +72,7 @@ def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, tes
test_type = test_files.get_test_type_by_instrumented_file_path(test_file_path)
try:
test_pickle = pickle.loads(test_pickle_bin) if loop_index == 1 else None
except (AttributeError, ModuleNotFoundError, IndexError) as e:
except Exception as e:
logger.exception(f"Failed to load pickle file. Exception: {e}")
return test_results
assert test_type is not None, f"Test type not found for {test_file_path}"
@ -115,7 +115,7 @@ def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, tes
loop_index = val[4]
try:
ret_val = (pickle.loads(val[7]) if loop_index == 1 else None,)
except (AttributeError, ModuleNotFoundError, IndexError):
except Exception:
continue
test_results.add(
function_test_invocation=FunctionTestInvocation(
@ -184,14 +184,7 @@ def parse_test_xml(
return test_results
test_class_path = testcase.classname
try:
test_function = testcase.name.split("[", 1)[0] if "[" in testcase.name else testcase.name
except ValueError as e:
xml_content = test_xml_file_path.read_text(encoding="utf-8")
logger.exception(
f"Failed to parse test function name from {testcase.name} in {xml_content} Exception:{e}"
)
raise
test_function = testcase.name.split("[", 1)[0] if "[" in testcase.name else testcase.name
if test_file_name is None:
if test_class_path:
# TODO : This might not be true if the test is organized under a class
@ -222,7 +215,7 @@ def parse_test_xml(
continue
timed_out = False
if test_config.test_framework == "pytest":
loop_index = int(testcase.name.split("[ ", 1)[1][:-2]) if "[" in testcase.name else 1
loop_index = int(testcase.name.split("[ ")[-1][:-2]) if "[" in testcase.name else 1
if len(testcase.result) > 1:
logger.warning(f"!!!!!Multiple results for {testcase.name} in {test_xml_file_path}!!!")
if len(testcase.result) == 1:

View file

@ -25,13 +25,6 @@ def run_tests(
pytest_max_loops: int = 100_000,
) -> tuple[Path, subprocess.CompletedProcess]:
assert test_framework in ["pytest", "unittest"]
# TODO: Make this work for replay tests
for i, test_file in enumerate(test_paths):
if (
only_run_these_test_functions and test_file.test_type == TestType.REPLAY_TEST
): # "__replay_test" in test_path:
# TODO: This might not work for replay tests
test_paths[i] = str(test_file.instrumented_file_path) + "::" + only_run_these_test_functions
if test_framework == "pytest":
result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
@ -51,7 +44,14 @@ def run_tests(
"--codeflash_loops_scope=session",
]
test_files = [str(file.instrumented_file_path) for file in test_paths.test_files]
test_files = []
for file in test_paths.test_files:
if file.test_type == TestType.REPLAY_TEST:
test_files.append(
str(file.instrumented_file_path) + "::" + only_run_these_test_functions[file.instrumented_file_path]
)
else:
test_files.append(str(file.instrumented_file_path))
results = subprocess.run(
pytest_cmd_list + test_files + pytest_args,

View file

@ -27,7 +27,7 @@ def main():
improvement_pct = int(re.search(r"📈 ([\d,]+)% improvement", stdout).group(1).replace(",", ""))
improvement_x = float(improvement_pct) / 100
assert improvement_pct > 5, f"Performance improvement percentage was {improvement_pct}, which was not above 10%"
assert improvement_pct > 10, f"Performance improvement percentage was {improvement_pct}, which was not above 10%"
assert improvement_x > 0.1, f"Performance improvement rate was {improvement_x}x, which was not above 0.1x"
# Check for the line indicating the number of discovered existing unit tests

View file

@ -0,0 +1,66 @@
import os
import pathlib
import re
import subprocess
def main():
cwd = (
pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "simple_tracer_e2e"
).resolve()
print("cwd", cwd)
command = ["python", "-m", "codeflash.tracer", "-o", "codeflash.trace", "workload.py"]
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
)
output = []
for line in process.stdout:
print(line, end="") # Print each line in real-time
output.append(line) # Store each line in the output variable
return_code = process.wait()
stdout = "".join(output)
assert return_code == 0, f"The codeflash command returned exit code {return_code} instead of 0"
functions_traced = re.search(r"Traced (\d+) function calls successfully and replay test created at - (.*)$", stdout)
assert functions_traced, "Failed to find any traced functions or replay test"
assert int(functions_traced.group(1)) == 3, "Failed to find the correct number of traced functions"
replay_test_path = pathlib.Path(functions_traced.group(2))
assert replay_test_path, "Failed to find the replay test file path"
assert replay_test_path.exists(), f"Replay test file does not exist at - {replay_test_path}"
command = ["python", "../../../codeflash/main.py", "--replay-test", str(replay_test_path), "--no-pr"]
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
)
output = []
for line in process.stdout:
print(line, end="") # Print each line in real-time
output.append(line) # Store each line in the output variable
return_code = process.wait()
stdout = "".join(output)
assert return_code == 0, f"The codeflash command returned exit code {return_code} instead of 0"
improvement_pct = int(re.search(r"📈 ([\d,]+)% improvement", stdout).group(1).replace(",", ""))
improvement_x = float(improvement_pct) / 100
assert improvement_pct > 10, f"Performance improvement percentage was {improvement_pct}, which was not above 10%"
assert improvement_x > 0.1, f"Performance improvement rate was {improvement_x}x, which was not above 0.1x"
# Check for the line indicating the number of discovered existing unit tests
unit_test_search = re.search(r"Discovered (\d+) existing unit tests", stdout)
num_unit_tests = int(unit_test_search.group(1))
assert num_unit_tests == 1, f"Could not find 1 existing unit test, found {num_unit_tests} instead"
# check if the replay test was correctly run for the original code
m = re.search(r"Replay Tests - Passed: (\d+), Failed: (\d+)", stdout)
assert m, "Failed to run replay tests"
passed, failed = int(m.group(1)), int(m.group(2))
assert passed > 0, f"Expected >0 passed replay tests, found {passed}"
assert failed == 0, f"Expected 0 failed replay tests, found {failed}"
if __name__ == "__main__":
main()

View file

@ -2,6 +2,7 @@ import tempfile
from pathlib import Path
from codeflash.discovery.functions_to_optimize import (
filter_files_optimized,
find_all_functions_in_file,
get_functions_to_optimize,
inspect_top_level_functions_or_methods,
@ -19,7 +20,7 @@ def test_function_eligible_for_optimization() -> None:
f.write(function)
f.flush()
functions_found = find_all_functions_in_file(Path(f.name))
assert functions_found[f.name][0].function_name == "test_function_eligible_for_optimization"
assert functions_found[Path(f.name)][0].function_name == "test_function_eligible_for_optimization"
# Has no return statement
function = """def test_function_not_eligible_for_optimization():
@ -31,7 +32,7 @@ def test_function_eligible_for_optimization() -> None:
f.write(function)
f.flush()
functions_found = find_all_functions_in_file(Path(f.name))
assert len(functions_found[f.name]) == 0
assert len(functions_found[Path(f.name)]) == 0
def test_find_top_level_function_or_method():
@ -149,3 +150,19 @@ def functionA():
for file in functions:
assert functions[file][0].qualified_name == "functionA"
assert functions[file][0].function_name == "functionA"
def test_filter_files_optimized():
tests_root = Path("tests").resolve()
module_root = Path().resolve()
ignore_paths = []
file_path_test = Path("tests/test_function_discovery.py").resolve()
file_path_same_level = Path("file.py").resolve()
file_path_different_level = Path("src/file.py").resolve()
file_path_above_level = Path("../file.py").resolve()
assert not filter_files_optimized(file_path_test, tests_root, ignore_paths, module_root)
assert filter_files_optimized(file_path_same_level, tests_root, ignore_paths, module_root)
assert filter_files_optimized(file_path_different_level, tests_root, ignore_paths, module_root)
assert not filter_files_optimized(file_path_above_level, tests_root, ignore_paths, module_root)