Merge remote-tracking branch 'origin/main' into test-discovery

This commit is contained in:
ihitamandal 2024-03-06 10:26:43 -08:00
commit 145b647c7f
16 changed files with 215 additions and 79 deletions

View file

@ -20,10 +20,10 @@ jobs:
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python 3.11.6
- name: Set up Python 3.12.2
uses: actions/setup-python@v4
with:
python-version: "3.11.6"
python-version: "3.12.2"
- name: Install dependencies
run: |
python -m pip install --upgrade pip

View file

@ -31,7 +31,7 @@
<excludeFolder url="file://$MODULE_DIR$/js/cf-webapp/node_modules" />
<excludeFolder url="file://$MODULE_DIR$/js/common/node_modules" />
</content>
<orderEntry type="jdk" jdkName="codeflash311" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="$USER_HOME$/miniforge3/envs/codeflash311" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module" module-name="langchain" />
</component>

View file

@ -11,18 +11,33 @@ _posthog = Posthog(
)
_ANALYTICS_ENABLED = True
def enable_analytics(enabled: bool) -> None:
"""
Enable or disable analytics.
:param enabled: Whether to enable analytics.
"""
if enabled:
ph("cli-analytics-enabled")
else:
ph("cli-analytics-disabled")
global _ANALYTICS_ENABLED
_ANALYTICS_ENABLED = enabled
def ph(event: str, properties: Dict[str, Any] = None) -> None:
"""
Log an event to PostHog.
:param event: The name of the event.
:param properties: A dictionary of properties to attach to the event.
"""
if not _ANALYTICS_ENABLED:
return
if properties is None:
properties = {}
properties["cli_version"] = __version__
properties["cli_version_tuple"] = __version_tuple__
properties = properties or {}
properties.update({"cli_version": __version__, "cli_version_tuple": __version_tuple__})
user_id = get_user_id()

View file

@ -1,12 +1,11 @@
import json
import logging
import os
from functools import lru_cache
from typing import Optional, Dict, Any
import requests
from functools import lru_cache
from pydantic.json import pydantic_encoder
from requests import Response
from typing import Optional, Dict, Any
from codeflash.code_utils.env_utils import get_codeflash_api_key
from codeflash.github.PrComment import PrComment, FileDiffContent
@ -128,8 +127,7 @@ def check_github_app_installed_on_repo(owner: str, repo: str) -> Response:
:param repo: The name of the repository.
:return: The response object.
"""
response = make_cfapi_request(
return make_cfapi_request(
endpoint=f"/is-github-app-installed?repo={repo}&owner={owner}",
method="GET",
)
return response

View file

@ -1,16 +1,16 @@
import git
import logging
import os
import sys
from argparse import Namespace
import git
from codeflash.api.cfapi import check_github_app_installed_on_repo
from codeflash.cli_cmds.cmd_init import init_codeflash
from codeflash.cli_cmds.logging_config import LOGGING_FORMAT
from codeflash.code_utils import env_utils
from codeflash.code_utils.config_parser import parse_config_file
from codeflash.code_utils.git_utils import (
git_root_dir,
get_repo_owner_and_name,
get_github_secrets_page_url,
)
@ -46,6 +46,7 @@ def process_cmd_args(args: Namespace) -> Namespace:
"minimum_performance_gain",
"pytest_cmd",
"formatter_cmd",
"enable_analytics",
]
for key in supported_keys:
if key in pyproject_config:
@ -86,15 +87,14 @@ def handle_optimize_all_arg_parsing(args: Namespace) -> Namespace:
if hasattr(args, "all"):
# Ensure that the user can actually open PRs on the repo.
try:
repo = git.Repo(search_parent_directories=True)
git_root_dir(repo)
git_repo = git.Repo(search_parent_directories=True)
except git.exc.InvalidGitRepositoryError:
logging.error(
"I couldn't find a git repository in the current directory. "
"I need a git repository to run --all and open PRs for optimizations. Exiting..."
)
exit(1)
owner, repo = get_repo_owner_and_name(repo)
owner, repo = get_repo_owner_and_name(git_repo)
try:
response = check_github_app_installed_on_repo(owner, repo)
if response.ok and response.text == "true":

View file

@ -1,14 +1,16 @@
import ast
import click
import inquirer
import os
import re
import subprocess
import sys
import time
from typing import Optional
import click
import inquirer
import inquirer.themes
import tomlkit
from git import Repo
from typing import Optional
from codeflash.analytics.posthog import ph
from codeflash.code_utils.env_utils import (
@ -78,7 +80,7 @@ def ask_run_end_to_end_test(setup_info):
)
]
)
run_tests = run_tests_answer["run_tests"]
run_tests = run_tests_answer.get("run_tests", False)
if run_tests:
create_bubble_sort_file(setup_info)
run_end_to_end_test(setup_info)
@ -142,6 +144,8 @@ def collect_setup_info(setup_info: dict[str, str]):
default_tests_subdir = "tests"
create_for_me_option = "okay, create a tests/ directory for me!"
test_subdir_options = valid_subdirs if len(valid_subdirs) > 0 else [create_for_me_option]
custom_dir_option = "enter a custom directory..."
test_subdir_options.append(custom_dir_option)
tests_root_answer = inquirer.prompt(
[
inquirer.List(
@ -162,6 +166,19 @@ def collect_setup_info(setup_info: dict[str, str]):
tests_root = os.path.join(curdir, default_tests_subdir)
os.mkdir(tests_root)
click.echo(f"✅ Created directory {tests_root}/\n")
elif tests_root == custom_dir_option:
custom_tests_root_answer = inquirer.prompt(
[
inquirer.Path(
"custom_tests_root", # Removed the colon and space from the message
message=f"Enter the path to your tests directory inside {os.path.abspath(module_root) + os.sep} ",
path_type=inquirer.Path.DIRECTORY,
exists=True,
normalize_to_absolute_path=True,
),
]
)
tests_root = custom_tests_root_answer["custom_tests_root"]
setup_info["tests_root"] = os.path.relpath(tests_root, curdir)
ph("cli-tests-root-provided")
@ -189,6 +206,22 @@ def collect_setup_info(setup_info: dict[str, str]):
ignore_paths = []
setup_info["ignore_paths"] = ignore_paths
# Ask the user if they agree to enable PostHog analytics logging
# enable_analytics_question = [
# inquirer.List(
# "enable_analytics",
# message="⚡️ Is it OK to collect usage analytics to help improve CodeFlash? (recommended)",
# choices=[
# ("Sure, I'd love to help make CodeFlash better!", True),
# ("No, thanks.", False),
# ],
# )
# ]
# enable_analytics_answer = inquirer.prompt(enable_analytics_question)
# setup_info["enable_analytics"] = enable_analytics_answer["enable_analytics"]
ph("cli-analytics-choice", {"enable_analytics": setup_info["enable_analytics"]})
def detect_test_framework(curdir, tests_root) -> Optional[str]:
test_framework = None
@ -419,6 +452,7 @@ def configure_pyproject_toml(setup_info: dict[str, str]):
codeflash_section["tests-root"] = setup_info["tests_root"]
codeflash_section["test-framework"] = setup_info["test_framework"]
codeflash_section["ignore-paths"] = setup_info["ignore_paths"]
codeflash_section["enable-analytics"] = setup_info["enable_analytics"]
# Add the 'codeflash' section, ensuring 'tool' section exists
tool_section = pyproject_data.get("tool", tomlkit.table())

View file

@ -58,6 +58,8 @@ def parse_config_file(config_file_path=None):
f"Please run 'codeflash init' to create the config file."
)
assert isinstance(config, dict)
# default values:
path_keys = ["module-root", "tests-root"]
path_list_keys = ["ignore-paths"]
# TODO: minimum-peformance-gain should become a more dynamic auto-detection in the future
@ -68,6 +70,9 @@ def parse_config_file(config_file_path=None):
"pytest-cmd": "pytest",
"formatter-cmd": "black",
}
bool_keys = {
"enable-analytics": True,
}
for key in float_keys:
if key in config:
@ -79,6 +84,11 @@ def parse_config_file(config_file_path=None):
config[key] = str(config[key])
else:
config[key] = str_keys[key]
for key in bool_keys:
if key in config:
config[key] = bool(config[key])
else:
config[key] = bool_keys[key]
for key in path_keys:
if key in config:
config[key] = os.path.join(os.path.dirname(config_file), config[key])
@ -88,6 +98,7 @@ def parse_config_file(config_file_path=None):
config[key] = [os.path.join(os.path.dirname(config_file), path) for path in config[key]]
else: # Default to empty list
config[key] = []
assert config["test-framework"] in [
"pytest",
"unittest",

View file

@ -1,9 +1,10 @@
import git
import logging
import os
from git import Repo
from io import StringIO
from typing import Optional
import git
from git import Repo
from unidiff import PatchSet
@ -70,20 +71,18 @@ def get_remote_url(repo: Optional[Repo] = None) -> str:
def get_repo_owner_and_name(repo: Optional[Repo] = None) -> tuple[str, str]:
remote_url = get_remote_url(repo)
if remote_url.endswith(".git"):
remote_url = remote_url[:-4]
if "://" in remote_url:
# It's an HTTP/HTTPS URL
repo_owner, repo_name = remote_url.split("/")[-2:]
else:
# It's an SSH URL and should contain ':' after the domain
repo_owner_with_github, repo_name = remote_url.split("/")[-2:]
repo_owner = (
repo_owner_with_github.split(":")[1]
if ":" in repo_owner_with_github
else repo_owner_with_github
)
remote_url = (
get_remote_url(repo).rstrip(".git")
if get_remote_url(repo).endswith(".git")
else get_remote_url(repo)
)
split_url = remote_url.split("/")
repo_owner_with_github, repo_name = split_url[-2], split_url[-1]
repo_owner = (
repo_owner_with_github.split(":")[1]
if ":" in repo_owner_with_github
else repo_owner_with_github
)
return repo_owner, repo_name

View file

@ -12,6 +12,8 @@ from typing import Tuple, Union
import libcst as cst
from codeflash.analytics import posthog
from codeflash.analytics.posthog import ph
from codeflash.analytics.sentry import init_sentry
from codeflash.api.aiservice import optimize_python_code
from codeflash.cli_cmds.cli import process_cmd_args
@ -114,6 +116,8 @@ def parse_args() -> Namespace:
class Optimizer:
def __init__(self, args: Namespace):
self.args = args
posthog.enable_analytics(args.enable_analytics)
self.test_cfg = TestConfig(
tests_root=args.tests_root,
project_root_path=args.project_root,
@ -122,6 +126,7 @@ class Optimizer:
)
def run(self) -> None:
ph("cli-optimize-run-start", {"args": self.args})
logging.info(CODEFLASH_LOGO)
logging.info("Running optimizer.")
if not env_utils.ensure_codeflash_api_key():
@ -141,19 +146,21 @@ class Optimizer:
test_files_created = set()
instrumented_unittests_created = set()
found_atleast_one_optimization = False
optimizations_found = 0
function_iterator_count = 0
try:
ph("cli-optimize-functions-to-optimize", {"num_functions": num_modified_functions})
if num_modified_functions == 0:
logging.info("No functions found to optimize. Exiting...")
return
logging.info(f"Discovering existing unit tests in {self.test_cfg.tests_root} ...")
function_to_tests: dict[str, list[TestsInFile]] = discover_unit_tests(self.test_cfg)
num_discovered_tests = sum([len(value) for value in function_to_tests.values()])
logging.info(
f"Discovered {sum([len(value) for value in function_to_tests.values()])} "
f"existing unit tests in {self.test_cfg.tests_root}"
f"Discovered {num_discovered_tests} existing unit tests in {self.test_cfg.tests_root}"
)
ph("cli-optimize-discovered-tests", {"num_tests": num_discovered_tests})
for path in file_to_funcs_to_optimize:
logging.info(f"Examining file {path} ...")
# TODO: Sequence the functions one goes through intelligently. If we are optimizing f(g(x)),
@ -177,7 +184,7 @@ class Optimizer:
f"Optimizing function {function_iterator_count} of {num_modified_functions} - {function_name}"
)
winning_test_results = None
# remove left overs from previous run
# remove leftovers from previous run
pathlib.Path(get_run_tmp_file("test_return_values_0.bin")).unlink(
missing_ok=True
)
@ -353,7 +360,7 @@ class Optimizer:
logging.info("----------------")
logging.info(f"Best optimization: {best_optimization[0:2]}")
if best_optimization:
found_atleast_one_optimization = True
optimizations_found += 1
logging.info(f"Best candidate:\n{best_optimization[0]}")
optimized_code = best_optimization[0]
@ -381,7 +388,7 @@ class Optimizer:
function_name=function_name,
path=path,
)
logging.info(f"EXPLANATION\n{explanation_final.to_console_string()}")
logging.info(f"Explanation: \n{explanation_final.to_console_string()}")
new_code = format_code(self.args.formatter_cmd, path)
new_dependent_code: dict[str, str] = {
@ -396,6 +403,16 @@ class Optimizer:
logging.info(f"⚡️ Optimization successful! 📄 {function_name} in {path}")
logging.info(f"📈 {explanation_final.perf_improvement_line}")
ph(
"cli-optimize-success",
{
"speedup_x": explanation_final.speedup_x,
"speedup_pct": explanation_final.speedup_pct,
"best_runtime": explanation_final.best_runtime_ns,
"original_runtime": explanation_final.original_runtime_ns,
"winning_test_results": explanation_final.winning_test_results,
},
)
test_files = function_to_tests.get(module_path + "." + function_name)
existing_tests = ""
if test_files:
@ -428,7 +445,8 @@ class Optimizer:
pathlib.Path(generated_tests_path).unlink(missing_ok=True)
for test_paths in instrumented_unittests_created_for_function:
pathlib.Path(test_paths).unlink(missing_ok=True)
if not found_atleast_one_optimization:
ph("cli-optimize-run-finished", {"optimizations_found": optimizations_found})
if optimizations_found == 0:
logging.info("❌ No optimizations found.")
elif self.args.all:
logging.info("✨ All functions have been optimized! ✨")

View file

@ -1,3 +1,3 @@
# These version placeholders will be replaced by poetry-dynamic-versioning during `poetry build`.
__version__ = "0.3.5"
__version_tuple__ = (0, 3, 5)
__version__ = "0.3.6"
__version_tuple__ = (0, 3, 6)

View file

@ -1,3 +1,4 @@
[tool]
[tool.poetry]
name = "codeflash"
version = "0.0.0" # Determined by poetry-dynamic-versioning during `poetry build`
@ -36,21 +37,32 @@ optional = true
[tool.poetry.group.dev.dependencies]
ipython = "^8.12.0"
mypy = "^1.6.1"
mypy = "^1.8.0"
[tool.poetry.build]
script = "codeflash/update_license_version.py"
[tool.poetry.scripts]
codeflash = "codeflash.main:main"
[tool.mypy]
warn_return_any = true
disallow_any_decorated = true
disallow_any_explicit = true
disallow_any_expr = true
disallow_any_unimported = true
disallow_subclassing_any = true
pretty = true
show_absolute_path = true
show_column_numbers = true
show_error_context = true
strict = true
verbosity = 3
warn_unreachable = true
plugins = [
"pydantic.mypy"
]
follow_imports = "silent"
warn_redundant_casts = true
warn_unused_ignores = true
disallow_any_generics = true
check_untyped_defs = true
no_implicit_reexport = true
# for strict mypy: (this is the tricky one :-))
disallow_untyped_defs = true
[tool.pydantic-mypy]
init_forbid_extra = true
@ -61,13 +73,6 @@ warn_required_dynamic_aliases = true
line-length = 100
target-version = ['py312']
[build-system]
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.2.0,<2.0.0"]
build-backend = "poetry_dynamic_versioning.backend"
[tool.poetry.build]
script = "codeflash/update_license_version.py"
[tool.poetry-dynamic-versioning]
enable = true
style = "pep440"
@ -84,11 +89,15 @@ initial-content = """
__version_tuple__ = (0, 0, 0)
"""
[tool.poetry.scripts]
codeflash = "codeflash.main:main"
[tool.codeflash]
module-root = "codeflash"
tests-root = "tests"
test-framework = "pytest"
ignore-paths = []
enable-analytics = true
[build-system]
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.2.0,<2.0.0"]
build-backend = "poetry_dynamic_versioning.backend"

View file

@ -0,0 +1,31 @@
import unittest
from unittest.mock import patch
from codeflash.code_utils.git_utils import get_repo_owner_and_name
class TestGitUtils(unittest.TestCase):
@patch("codeflash.code_utils.git_utils.get_remote_url")
def test_test_get_repo_owner_and_name(self, mock_get_remote_url):
# Test with a standard GitHub HTTPS URL
mock_get_remote_url.return_value = "https://github.com/owner/repo.git"
owner, repo_name = get_repo_owner_and_name()
self.assertEqual(owner, "owner")
self.assertEqual(repo_name, "repo")
# Test with a GitHub SSH URL
mock_get_remote_url.return_value = "git@github.com:owner/repo.git"
owner, repo_name = get_repo_owner_and_name()
self.assertEqual(owner, "owner")
self.assertEqual(repo_name, "repo")
# Test with a URL without the .git suffix
mock_get_remote_url.return_value = "https://github.com/owner/repo"
owner, repo_name = get_repo_owner_and_name()
self.assertEqual(owner, "owner")
self.assertEqual(repo_name, "repo")
if __name__ == "__main__":
unittest.main()

View file

@ -1,3 +1,8 @@
loglevel = "info"
accesslog = "-"
errorlog = "-"
# https://www.uvicorn.org/deployment/#gunicorn
# using custom one to disable Lifespan Protocol
# needs to be passed by string https://github.com/benoitc/gunicorn/issues/1539
worker_class = "uvicorn_worker.DjangoUvicornWorker"

View file

@ -11,4 +11,4 @@ else
fi
gunicorn -c gunicorn.conf.py aiservice.asgi:application -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:"$PORT" --timeout 600 --workers 2
gunicorn -c gunicorn.conf.py aiservice.asgi:application --bind 0.0.0.0:"$PORT" --timeout 600 --workers 2

View file

@ -0,0 +1,13 @@
from uvicorn.workers import UvicornWorker
class DjangoUvicornWorker(UvicornWorker):
"""
Use UvicornWorker with lifespan='off', because Django does not
(and probably will not https://code.djangoproject.com/ticket/31508)
support Lifespan.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.config.lifespan = "off"

View file

@ -7,7 +7,7 @@ homepage = "https://codeflash.ai"
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
python = "^3.12.2"
django = "^5.0"
django-ninja = "^1.1.0"
openai = "^1.10.0"
@ -29,21 +29,24 @@ optional = true
[tool.poetry.group.dev.dependencies]
ipython = "^8.12.0"
mypy = "^1.6.1"
mypy = "^1.8.0"
[tool.mypy]
warn_return_any = true
disallow_any_decorated = true
disallow_any_explicit = true
disallow_any_expr = true
disallow_any_unimported = true
disallow_subclassing_any = true
pretty = true
show_absolute_path = true
show_column_numbers = true
show_error_context = true
strict = true
verbosity = 3
warn_unreachable = true
plugins = [
"pydantic.mypy"
]
follow_imports = "silent"
warn_redundant_casts = true
warn_unused_ignores = true
disallow_any_generics = true
check_untyped_defs = true
no_implicit_reexport = true
# for strict mypy: (this is the tricky one :-))
disallow_untyped_defs = true
[tool.pydantic-mypy]
init_forbid_extra = true
@ -52,7 +55,7 @@ warn_required_dynamic_aliases = true
[tool.black]
line-length = 100
target-version = ['py311']
target-version = ['py312']
[build-system]
requires = ["poetry-core"]