Lots of various fixes.

This commit is contained in:
RD 2024-11-01 02:23:59 -07:00
parent 5c3a2e18d0
commit c56828ccef
8 changed files with 95 additions and 73 deletions

View file

@ -19,6 +19,7 @@
</list>
</option>
</inspection_tool>
<inspection_tool class="PyTestUnpassedFixtureInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="PyTypeCheckerInspection" enabled="false" level="WARNING" enabled_by_default="false" />
</profile>
</component>

View file

@ -134,7 +134,7 @@ def collect_setup_info() -> SetupInfo:
test_subdir_options = valid_subdirs
if "tests" not in valid_subdirs:
test_subdir_options.append(create_for_me_option)
custom_dir_option = "enter a custom directory..."
custom_dir_option = "enter a custom directory"
test_subdir_options.append(custom_dir_option)
tests_root_answer = inquirer_wrapper(
inquirer.list_input,
@ -242,7 +242,7 @@ def detect_test_framework(curdir: Path, tests_root: Path) -> str | None:
def check_for_toml_or_setup_file() -> str | None:
click.echo()
click.echo("Checking for pyproject.toml or setup.py ...\r", nl=False)
click.echo("Checking for pyproject.toml or setup.py\r", nl=False)
curdir = Path.cwd()
pyproject_toml_path = curdir / "pyproject.toml"
setup_py_path = curdir / "setup.py"
@ -325,7 +325,7 @@ def install_github_actions() -> None:
confirm_creation_yes = inquirer_wrapper(
inquirer.confirm,
message=f"I'm going to create a new GitHub actions workflow file at {optimize_yaml_path} ... is this OK?",
message=f"I'm going to create a new GitHub actions workflow file at {optimize_yaml_path} is this OK?",
default=True,
)
ph("cli-github-optimization-confirm-workflow-creation", {"confirm_creation": confirm_creation_yes})
@ -347,7 +347,7 @@ def install_github_actions() -> None:
click.echo(f"✅ Created {optimize_yaml_path}{LF}")
click.prompt(
f"Next, you'll need to add your CODEFLASH_API_KEY as a secret to your GitHub repo.{LF}"
f"Press Enter to open your repo's secrets page at {get_github_secrets_page_url(repo)} ...{LF}"
f"Press Enter to open your repo's secrets page at {get_github_secrets_page_url(repo)}{LF}"
f"Then, click 'New repository secret' to add your api key with the variable name CODEFLASH_API_KEY.{LF}",
default="",
type=click.STRING,
@ -425,7 +425,7 @@ def configure_pyproject_toml(setup_info: SetupInfo) -> None:
tool_section["codeflash"] = codeflash_section
pyproject_data["tool"] = tool_section
click.echo("Writing Codeflash configuration ...\r", nl=False)
click.echo("Writing Codeflash configuration\r", nl=False)
with toml_path.open("w", encoding="utf8") as pyproject_file:
pyproject_file.write(tomlkit.dumps(pyproject_data))
click.echo(f"✅ Added Codeflash configuration to {toml_path}")
@ -447,7 +447,7 @@ def install_github_app() -> None:
click.prompt(
f"Finally, you'll need install the Codeflash GitHub app by choosing the repository you want to install Codeflash on.{LF}"
f"I will attempt to open the github app page - https://github.com/apps/codeflash-ai/installations/select_target {LF}"
f"Press Enter to open the page to let you install the app ...{LF}",
f"Press Enter to open the page to let you install the app{LF}",
default="",
type=click.STRING,
prompt_suffix="",
@ -455,7 +455,7 @@ def install_github_app() -> None:
)
click.launch("https://github.com/apps/codeflash-ai/installations/select_target")
click.prompt(
f"Press Enter once you've finished installing the github app from https://github.com/apps/codeflash-ai/installations/select_target {LF}",
f"Press Enter once you've finished installing the github app from https://github.com/apps/codeflash-ai/installations/select_target{LF}",
default="",
type=click.STRING,
prompt_suffix="",
@ -474,7 +474,7 @@ def install_github_app() -> None:
click.prompt(
f"❌ It looks like the Codeflash GitHub App is not installed on the repository {owner}/{repo}.{LF}"
f"Please install it from https://github.com/apps/codeflash-ai/installations/select_target {LF}"
f"Press Enter to continue once you've finished installing the github app...{LF}",
f"Press Enter to continue once you've finished installing the github app{LF}",
default="",
type=click.STRING,
prompt_suffix="",
@ -616,7 +616,7 @@ def test_sort():
def run_end_to_end_test(args: Namespace, bubble_sort_path: str, bubble_sort_test_path: str) -> None:
command = ["codeflash", "--file", "bubble_sort.py", "--function", "sorter"]
sys.stdout.write("Running sample optimization... ")
sys.stdout.write("Running sample optimization")
sys.stdout.flush()
try:
process = subprocess.run(command, text=True, cwd=args.module_root, check=False)

View file

@ -55,7 +55,7 @@ def get_git_diff(repo_directory: Path = Path.cwd(), uncommitted_changes: bool =
def get_current_branch(repo: Repo | None = None) -> str:
"""Returns the name of the current branch in the given repository.
"""Return the name of the current branch in the given repository.
:param repo: An optional Repo object. If not provided, the function will
search for a repository in the current and parent directories.
@ -97,7 +97,8 @@ def confirm_proceeding_with_no_git_repo() -> str | bool:
if sys.__stdin__.isatty():
return inquirer_wrapper(
inquirer.confirm,
message="WARNING: I did not find a git repository for your code. If you proceed with running codeflash, optimized code will"
message="WARNING: I did not find a git repository for your code. If you proceed with running codeflash, "
"optimized code will"
" be written over your current code and you could irreversibly lose your current code. Proceed?",
default=False,
)
@ -117,7 +118,8 @@ def check_and_push_branch(repo: git.Repo, wait_for_push: bool = False) -> bool:
return False
if sys.__stdin__.isatty() and inquirer_wrapper(
inquirer.confirm,
message=f"⚡️ In order for me to create PRs, your current branch needs to be pushed. Do you want to push the branch "
message=f"⚡️ In order for me to create PRs, your current branch needs to be pushed. Do you want to push "
f"the branch"
f"'{current_branch}' to the remote repository?",
default=False,
):

View file

@ -2,7 +2,6 @@
import ast
from pathlib import Path
from typing import Iterator
from pydantic import BaseModel, field_validator
@ -37,8 +36,8 @@ class ImportedInternalModuleAnalysis(BaseModel, frozen=True):
return v
def parse_imports(code: str) -> Iterator[ast.AST]:
return (node for node in ast.walk(ast.parse(code)) if isinstance(node, (ast.Import, ast.ImportFrom)))
def parse_imports(code: str) -> list[ast.Import | ast.ImportFrom]:
return [node for node in ast.walk(ast.parse(code)) if isinstance(node, (ast.Import, ast.ImportFrom))]
def resolve_relative_name(module: str | None, level: int, current_module: str) -> str | None:
@ -53,19 +52,17 @@ def resolve_relative_name(module: str | None, level: int, current_module: str) -
return ".".join(base_parts)
def get_module_full_name(node: ast.Import | ast.ImportFrom, current_module: str) -> Iterator[str]:
def get_module_full_name(node: ast.Import | ast.ImportFrom, current_module: str) -> list[str]:
if isinstance(node, ast.Import):
return (alias.name for alias in node.names)
if isinstance(node, ast.ImportFrom):
base_module = resolve_relative_name(node.module, node.level, current_module)
if base_module is None:
return iter(())
if node.module is None and node.level > 0:
# Relative import with no module specified, e.g., from . import mymodule
return (f"{base_module}.{alias.name}" for alias in node.names)
# For absolute imports or relative imports with module specified
return iter([base_module])
return iter(())
return [alias.name for alias in node.names]
base_module = resolve_relative_name(node.module, node.level, current_module)
if base_module is None:
return []
if node.module is None and node.level > 0:
# Relative import with no module specified
return [f"{base_module}.{alias.name}" for alias in node.names]
# Import with module specified
return [base_module]
def is_internal_module(module_name: str, project_root: Path) -> bool:
@ -86,19 +83,19 @@ def get_module_file_path(module_name: str, project_root: Path) -> Path | None:
def analyze_imported_internal_modules(
code_str: str, module_file_path: Path, project_root: Path
) -> list[ImportedInternalModuleAnalysis]:
"""Analyzes a Python module's code to find all imported internal modules."""
"""Statically finds and analyzes all imported internal modules."""
module_rel_path = module_file_path.relative_to(project_root).with_suffix("")
current_module = ".".join(module_rel_path.parts)
imports = parse_imports(code_str)
module_names = set()
module_names: set[str] = set()
for node in imports:
module_names.update(get_module_full_name(node, current_module))
internal_modules = filter(is_internal_module, (module_names, project_root))
internal_modules = {module_name for module_name in module_names if is_internal_module(module_name, project_root)}
return [
ImportedInternalModuleAnalysis(name=mod_name.split(".")[-1], full_name=mod_name, file_path=file_path)
ImportedInternalModuleAnalysis(name=str(mod_name).split(".")[-1], full_name=mod_name, file_path=file_path)
for mod_name in internal_modules
if (file_path := get_module_file_path(mod_name, project_root)) is not None
]

View file

@ -1,20 +1,28 @@
from __future__ import annotations
from enum import IntEnum
from pathlib import Path
from typing import Iterator, Optional
from typing import TYPE_CHECKING, Iterator, Optional
from jedi.api.classes import Name
from pydantic import BaseModel
from pydantic.dataclasses import dataclass
from codeflash.verification.test_results import TestResults, TestType
if TYPE_CHECKING:
from jedi.api.classes import Name
# If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
# qualified name of the method is foo.eggs.Ham.spam, its qualified name is Ham.spam, and its name is spam. The full name
# of the module is foo.eggs.
class DiffbehaviorReturnCode(IntEnum):
NO_DIFFERENCES = 0
COUNTER_EXAMPLES = 1
ERROR = 2
@dataclass(frozen=True, config={"arbitrary_types_allowed": True})
class FunctionSource:
file_path: Path
@ -92,7 +100,7 @@ class TestFiles(BaseModel):
def __iter__(self) -> Iterator[TestFile]:
return iter(self.test_files)
def __len__(self):
def __len__(self) -> int:
return len(self.test_files)

View file

@ -18,7 +18,6 @@ from rich.console import Group
from rich.panel import Panel
from rich.syntax import Syntax
from rich.tree import Tree
from sqlalchemy import false
from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient
from codeflash.cli_cmds.console import code_print, console, logger, progress_bar
@ -43,7 +42,7 @@ from codeflash.code_utils.config_consts import (
from codeflash.code_utils.formatter import format_code, sort_imports
from codeflash.code_utils.instrument_existing_tests import inject_profiling_into_existing_test
from codeflash.code_utils.remove_generated_tests import remove_functions_from_generated_tests
from codeflash.code_utils.static_analysis import analyze_imported_modules
from codeflash.code_utils.static_analysis import analyze_imported_internal_modules
from codeflash.code_utils.time_utils import humanize_runtime
from codeflash.discovery.discover_unit_tests import discover_unit_tests
from codeflash.discovery.functions_to_optimize import FunctionToOptimize, get_functions_to_optimize
@ -51,6 +50,7 @@ from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import (
BestOptimization,
CodeOptimizationContext,
DiffbehaviorReturnCode,
FunctionParent,
GeneratedTests,
GeneratedTestsList,
@ -126,11 +126,11 @@ class Optimizer:
try:
ph("cli-optimize-functions-to-optimize", {"num_functions": num_optimizable_functions})
if num_optimizable_functions == 0:
logger.info("No functions found to optimize. Exiting...")
logger.info("No functions found to optimize. Exiting")
return
console.rule()
logger.info(f"Discovering existing unit tests in {self.test_cfg.tests_root} ...")
logger.info(f"Discovering existing unit tests in {self.test_cfg.tests_root}")
function_to_tests: dict[str, list[FunctionCalledInTest]] = discover_unit_tests(self.test_cfg)
num_discovered_tests: int = sum([len(value) for value in function_to_tests.values()])
logger.info(f"Discovered {num_discovered_tests} existing unit tests in {self.test_cfg.tests_root}")
@ -139,17 +139,15 @@ class Optimizer:
for path in file_to_funcs_to_optimize:
original_module_path = Path(path)
logger.info(f"Examining file {original_module_path!s} ...")
logger.info(f"Examining file {original_module_path!s}")
# TODO CROSSHAIR Check for IO errors with try block, factor out code extraction and validation.
original_code: str = original_module_path.read_text(encoding="utf8") # TODO CROSSHAIR Parse, Validate
original_code_imported_module_analysis = analyze_imported_modules(
original_code_imported_module_analysis = analyze_imported_internal_modules(
original_code, original_module_path, self.args.project_root
)
imported_internal_module_information: dict[Path, dict[str, str]] = {}
for analysis in original_code_imported_module_analysis:
if analysis.origin != "internal":
continue
# TODO CROSSHAIR Check for IO errors, factor out.
imported_internal_module_information[analysis.file_path] = {
"name": analysis.name,
@ -387,19 +385,21 @@ class Optimizer:
)
]
# TODO CROSSHAIR: refactor filtering + write as single function. Put under try/except block.
# TODO Crosshair: Precalculate or factor out repeated function_to_optimize.file_path.relative_to(self.args.module_root)
# TODO CROSSHAIR: refactor filtering + write as single function. Put under try/except block. TODO
# Crosshair: Precalculate or factor out repeated function_to_optimize.file_path.relative_to(
# self.args.module_root)
for candidate, worktree in zip(candidates_with_diffs, worktrees[1:]):
if are_optimized_module_code_strings_zero_diff[candidate.optimization_id]:
(worktree / function_to_optimize.file_path.relative_to(self.args.module_root)).write_text(
optimized_module_code_strings[i], encoding="utf8"
optimized_module_code_strings[candidate.optimization_id], encoding="utf8"
)
for callee_module_path in optimized_callee_modules_code_strings[candidate.optimization_id]:
if are_optimized_callee_module_code_strings_zero_diff[candidate.optimization_id][
callee_module_path
]:
(worktree / callee_module_path.relative_to(self.args.module_root)).write_text(
optimized_callee_modules_code_strings[i][callee_module_path], encoding="utf8"
optimized_callee_modules_code_strings[candidate.optimization_id][callee_module_path],
encoding="utf8",
)
# TODO Crosshair: Factor out relative path munging code, repeated.
@ -407,8 +407,7 @@ class Optimizer:
worktrees[0].name / function_to_optimize.file_path.relative_to(self.args.module_root).with_suffix("")
).replace("/", ".")
# TODO CROSSHAIR: Turn into enum.
diffbehavior_results: dict[str, int] = {}
diffbehavior_results: dict[str, DiffbehaviorReturnCode] = {}
for candidate_index, candidate in enumerate(candidates_with_diffs, start=1):
logger.info(f"Optimization candidate {candidate_index}/{len(candidates_with_diffs)}:")
code_print(candidate.source_code)
@ -429,16 +428,18 @@ class Optimizer:
cwd=worktree_root,
check=False,
)
diffbehavior_results[candidate.optimization_id] = result.returncode
if result.returncode == 2:
if result.returncode == DiffbehaviorReturnCode.ERROR:
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.ERROR
logger.info("Inconclusive results from concolic behavior correctness check.")
logger.warning(
f"Error running crosshair diffbehavior{': '+ result.stderr if result.stderr else '.'}"
f"Error running crosshair diffbehavior{': ' + result.stderr if result.stderr else '.'}"
)
elif result.returncode == 1:
elif result.returncode == DiffbehaviorReturnCode.COUNTER_EXAMPLES:
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.COUNTER_EXAMPLES
logger.info(f"Optimization candidate failed concolic behavior correctness check:\n{result.stdout}")
elif result.returncode == 0:
elif result.returncode == DiffbehaviorReturnCode.NO_DIFFERENCES:
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.NO_DIFFERENCES
logger.info(
f"Optimization candidate passed concolic behavior correctness check"
f"{': \n' + result.stdout.split('\n', 1)[0] if '\n' in result.stdout else '.'}"
@ -545,7 +546,7 @@ class Optimizer:
original_helper_code: dict[Path, str],
function_trace_id: str,
only_run_this_test_function: list[FunctionCalledInTest] | None = None,
diffbehavior_results: dict[str, int],
diffbehavior_results: dict[str, DiffbehaviorReturnCode],
) -> BestOptimization | None:
best_optimization: BestOptimization | None = None
best_runtime_until_now = original_code_baseline.runtime # The fastest code runtime until now
@ -555,7 +556,8 @@ class Optimizer:
is_correct = {}
logger.info(
f"Determining best optimization candidate (out of {len(candidates)}) for {function_to_optimize.qualified_name} ..."
f"Determining best optimization candidate (out of {len(candidates)}) for "
f"{function_to_optimize.qualified_name}"
)
console.rule()
try:
@ -585,7 +587,7 @@ class Optimizer:
optimization_candidate_index=candidate_index,
original_test_results=original_code_baseline.overall_test_results,
tests_in_file=only_run_this_test_function,
diffbehavior_result=diffbehavior_results[candidate_index],
diffbehavior_result=diffbehavior_results[candidate.optimization_id],
)
if not is_successful(run_results):
optimized_runtimes[candidate.optimization_id] = None
@ -608,7 +610,9 @@ class Optimizer:
tree.add("This candidate is faster than the previous best candidate. 🚀")
tree.add(f"Original runtime: {humanize_runtime(original_code_baseline.runtime)}")
tree.add(
f"Best test runtime: {humanize_runtime(candidate_result.best_test_runtime)} (measured over {candidate_result.max_loop_count} loop{'s' if candidate_result.max_loop_count > 1 else ''})"
f"Best test runtime: {humanize_runtime(candidate_result.best_test_runtime)} "
f"(measured over {candidate_result.max_loop_count} "
f"loop{'s' if candidate_result.max_loop_count > 1 else ''})"
)
tree.add(f"Speedup ratio: {perf_gain:.3f}")
@ -621,7 +625,9 @@ class Optimizer:
best_runtime_until_now = best_test_runtime
else:
tree.add(
f"Runtime: {humanize_runtime(best_test_runtime)} (measured over {candidate_result.max_loop_count} loop{'s' if candidate_result.max_loop_count > 1 else ''})"
f"Runtime: {humanize_runtime(best_test_runtime)} "
f"(measured over {candidate_result.max_loop_count} "
f"loop{'s' if candidate_result.max_loop_count > 1 else ''})"
)
tree.add(f"Speedup ratio: {perf_gain:.3f}")
console.print(tree)
@ -631,7 +637,7 @@ class Optimizer:
except KeyboardInterrupt as e:
self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
logger.exception(f"Optimization interrupted: {e}")
raise e
raise
# TODO Crosshair: Report on regression vs concolic, false negatives vs matches
self.aiservice_client.log_results(
@ -843,7 +849,8 @@ class Optimizer:
with new_test_path.open("w", encoding="utf8") as _f:
_f.write(injected_test)
else:
raise ValueError("injected_test is None")
msg = "injected_test is None"
raise ValueError(msg)
unique_instrumented_test_files.add(new_test_path)
if not self.test_files.get_by_original_file_path(path_obj_test_file):
@ -890,7 +897,7 @@ class Optimizer:
ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
)
future_candidates_exp = None
futures: list = future_tests + [future_optimization_candidates]
futures: list[concurrent.futures.Future] = [*future_tests, future_optimization_candidates]
if run_experiment:
future_candidates_exp = executor.submit(
self.local_aiservice_client.optimize_python_code,
@ -1040,7 +1047,8 @@ class Optimizer:
loop_count = max([int(result.loop_index) for result in unittest_results.test_results])
logger.info(
f"Original code runtime measured over {loop_count} loop{'s' if loop_count > 1 else ''}: {humanize_runtime(total_timing)} per full loop"
f"Original code runtime measured over {loop_count} loop{'s' if loop_count > 1 else ''}: "
f"{humanize_runtime(total_timing)} per full loop"
)
console.rule()
logger.debug(f"Total original code runtime (ns): {total_timing}")
@ -1062,7 +1070,7 @@ class Optimizer:
optimization_candidate_index: int,
original_test_results: TestResults | None,
tests_in_file: list[FunctionCalledInTest] | None,
diffbehavior_result: int,
diffbehavior_result: DiffbehaviorReturnCode,
) -> Result[OptimizedCandidateResult, str]:
assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]
@ -1158,24 +1166,26 @@ class Optimizer:
equal_results = False
console.rule()
if diffbehavior_result == 0:
if diffbehavior_result == DiffbehaviorReturnCode.NO_DIFFERENCES:
logger.info("Concolic behavior correctness check successful!")
console.rule()
if equal_results:
logger.info("True negative: Concolic behavior correctness check successful and test results matched.")
else:
logger.warning(
"False negative for concolic testing: Concolic behavior correctness check successful but test results did not match."
"False negative for concolic testing: Concolic behavior correctness check successful but test "
"results did not match."
)
console.rule()
elif diffbehavior_result == 1:
elif diffbehavior_result == DiffbehaviorReturnCode.COUNTER_EXAMPLES:
logger.warning("Concolic behavior correctness check failed.")
console.rule()
if equal_results:
logger.warning(
"False negative for regression testing: Concolic behavior correctness check failed but test results matched."
"False negative for regression testing: Concolic behavior correctness check failed but test "
"results matched."
)
success = false()
success = False
equal_results = False
else:
logger.info("True positive: Concolic behavior correctness check failed and test results did not match.")
@ -1237,7 +1247,8 @@ class Optimizer:
return TestResults()
if run_result.returncode != 0:
logger.debug(
f'Nonzero return code {run_result.returncode} when running tests in {", ".join([str(f.instrumented_file_path) for f in test_files.test_files])}.\n'
f'Nonzero return code {run_result.returncode} when running tests in '
f'{", ".join([str(f.instrumented_file_path) for f in test_files.test_files])}.\n'
f"stdout: {run_result.stdout}\n"
f"stderr: {run_result.stderr}\n"
)
@ -1258,7 +1269,7 @@ class Optimizer:
module_path: Path,
function_trace_id: str,
) -> list[concurrent.futures.Future]:
futures = [
return [
executor.submit(
generate_tests,
self.aiservice_client,
@ -1274,7 +1285,6 @@ class Optimizer:
)
for test_index in range(N_TESTS_TO_GENERATE)
]
return futures
def run_with_args(args: Namespace) -> None:

View file

@ -52,6 +52,7 @@ optional = true
ipython = "^8.12.0"
mypy = ">=1.13"
ruff = ">=0.7.0"
lxml-stubs = "^0.5.1"
pandas-stubs = ">=2.2.2.240807, <2.2.3.241009"
types-Pygments = "^2.18.0.20240506"
types-colorama = "^0.4.15.20240311"
@ -61,6 +62,7 @@ types-requests = "^2.32.0.20241016"
types-six = "^1.16.21.20241009"
types-cffi = "^1.16.0.20240331"
types-openpyxl = "^3.1.5.20241020"
types-regex = "^2024.9.11.20240912"
[tool.poetry.build]
script = "codeflash/update_license_version.py"
@ -104,7 +106,8 @@ exclude= ["code_to_optimize/", "pie_test_set/"]
[tool.ruff.lint]
select = ["ALL"]
ignore = ["S101", "FIX002", "ANN101", "COM812"]
ignore = ["C901", "D100", "D101", "D102", "D103", "D105", "D107", "S101", "S603", "S607", "ANN101",
"COM812", "FIX002", "PLR0912", "PLR0913", "PLR0915", "TD002", "TD003", "TD004"]
[tool.ruff.lint.flake8-type-checking]
strict = true

View file

@ -70,7 +70,8 @@ warn_required_dynamic_aliases = true
[tool.ruff.lint]
select = ["ALL"]
ignore = ["S101", "FIX002", "ANN101", "COM812"]
ignore = ["C901", "D100", "D101", "D102", "D103", "D105", "D107", "S101", "S603", "S607", "ANN101",
"COM812", "FIX002", "PLR0912", "PLR0913", "PLR0915", "TD002", "TD003", "TD004"]
[tool.ruff.lint.flake8-type-checking]
strict = true