Merge remote-tracking branch 'origin/main' into test-discovery

2024-03-06 10:26:43 -08:00 · 2024-03-06 10:26:43 -08:00 · 145b647c7f
commit 145b647c7f
parent 79684186a6 fb5984303b
16 changed files with 215 additions and 79 deletions
--- a/.github/workflows/django-unit-tests.yaml
+++ b/.github/workflows/django-unit-tests.yaml
@ -20,10 +20,10 @@ jobs:
        with:
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}
-      - name: Set up Python 3.11.6
+      - name: Set up Python 3.12.2
        uses: actions/setup-python@v4
        with:
-          python-version: "3.11.6"
+          python-version: "3.12.2"
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
--- a/.idea/codeflash.iml
+++ b/.idea/codeflash.iml
@ -31,7 +31,7 @@
      <excludeFolder url="file://$MODULE_DIR$/js/cf-webapp/node_modules" />
      <excludeFolder url="file://$MODULE_DIR$/js/common/node_modules" />
    </content>
-    <orderEntry type="jdk" jdkName="codeflash311" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="$USER_HOME$/miniforge3/envs/codeflash311" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
    <orderEntry type="module" module-name="langchain" />
  </component>
--- a/cli/codeflash/analytics/posthog.py
+++ b/cli/codeflash/analytics/posthog.py
@ -11,18 +11,33 @@ _posthog = Posthog(
 )


+_ANALYTICS_ENABLED = True
+
+
+def enable_analytics(enabled: bool) -> None:
+    """
+    Enable or disable analytics.
+    :param enabled: Whether to enable analytics.
+    """
+    if enabled:
+        ph("cli-analytics-enabled")
+    else:
+        ph("cli-analytics-disabled")
+    global _ANALYTICS_ENABLED
+    _ANALYTICS_ENABLED = enabled
+
+
 def ph(event: str, properties: Dict[str, Any] = None) -> None:
    """
    Log an event to PostHog.
    :param event: The name of the event.
    :param properties: A dictionary of properties to attach to the event.
    """
+    if not _ANALYTICS_ENABLED:
+        return

-    if properties is None:
-        properties = {}
-
-    properties["cli_version"] = __version__
-    properties["cli_version_tuple"] = __version_tuple__
+    properties = properties or {}
+    properties.update({"cli_version": __version__, "cli_version_tuple": __version_tuple__})

    user_id = get_user_id()

--- a/cli/codeflash/api/cfapi.py
+++ b/cli/codeflash/api/cfapi.py
@ -1,12 +1,11 @@
 import json
 import logging
 import os
-from functools import lru_cache
-from typing import Optional, Dict, Any
-
 import requests
+from functools import lru_cache
 from pydantic.json import pydantic_encoder
 from requests import Response
+from typing import Optional, Dict, Any

 from codeflash.code_utils.env_utils import get_codeflash_api_key
 from codeflash.github.PrComment import PrComment, FileDiffContent
@ -128,8 +127,7 @@ def check_github_app_installed_on_repo(owner: str, repo: str) -> Response:
    :param repo: The name of the repository.
    :return: The response object.
    """
-    response = make_cfapi_request(
+    return make_cfapi_request(
        endpoint=f"/is-github-app-installed?repo={repo}&owner={owner}",
        method="GET",
    )
-    return response
--- a/cli/codeflash/cli_cmds/cli.py
+++ b/cli/codeflash/cli_cmds/cli.py
@ -1,16 +1,16 @@
-import git
 import logging
 import os
 import sys
 from argparse import Namespace

+import git
+
 from codeflash.api.cfapi import check_github_app_installed_on_repo
 from codeflash.cli_cmds.cmd_init import init_codeflash
 from codeflash.cli_cmds.logging_config import LOGGING_FORMAT
 from codeflash.code_utils import env_utils
 from codeflash.code_utils.config_parser import parse_config_file
 from codeflash.code_utils.git_utils import (
-    git_root_dir,
    get_repo_owner_and_name,
    get_github_secrets_page_url,
 )
@ -46,6 +46,7 @@ def process_cmd_args(args: Namespace) -> Namespace:
        "minimum_performance_gain",
        "pytest_cmd",
        "formatter_cmd",
+        "enable_analytics",
    ]
    for key in supported_keys:
        if key in pyproject_config:
@ -86,15 +87,14 @@ def handle_optimize_all_arg_parsing(args: Namespace) -> Namespace:
    if hasattr(args, "all"):
        # Ensure that the user can actually open PRs on the repo.
        try:
-            repo = git.Repo(search_parent_directories=True)
-            git_root_dir(repo)
+            git_repo = git.Repo(search_parent_directories=True)
        except git.exc.InvalidGitRepositoryError:
            logging.error(
                "I couldn't find a git repository in the current directory. "
                "I need a git repository to run --all and open PRs for optimizations. Exiting..."
            )
            exit(1)
-        owner, repo = get_repo_owner_and_name(repo)
+        owner, repo = get_repo_owner_and_name(git_repo)
        try:
            response = check_github_app_installed_on_repo(owner, repo)
            if response.ok and response.text == "true":
--- a/cli/codeflash/cli_cmds/cmd_init.py
+++ b/cli/codeflash/cli_cmds/cmd_init.py
@ -1,14 +1,16 @@
 import ast
-import click
-import inquirer
 import os
 import re
 import subprocess
 import sys
 import time
+from typing import Optional
+
+import click
+import inquirer
+import inquirer.themes
 import tomlkit
 from git import Repo
-from typing import Optional

 from codeflash.analytics.posthog import ph
 from codeflash.code_utils.env_utils import (
@ -78,7 +80,7 @@ def ask_run_end_to_end_test(setup_info):
            )
        ]
    )
-    run_tests = run_tests_answer["run_tests"]
+    run_tests = run_tests_answer.get("run_tests", False)
    if run_tests:
        create_bubble_sort_file(setup_info)
        run_end_to_end_test(setup_info)
@ -142,6 +144,8 @@ def collect_setup_info(setup_info: dict[str, str]):
    default_tests_subdir = "tests"
    create_for_me_option = "okay, create a tests/ directory for me!"
    test_subdir_options = valid_subdirs if len(valid_subdirs) > 0 else [create_for_me_option]
+    custom_dir_option = "enter a custom directory..."
+    test_subdir_options.append(custom_dir_option)
    tests_root_answer = inquirer.prompt(
        [
            inquirer.List(
@ -162,6 +166,19 @@ def collect_setup_info(setup_info: dict[str, str]):
        tests_root = os.path.join(curdir, default_tests_subdir)
        os.mkdir(tests_root)
        click.echo(f"✅ Created directory {tests_root}/\n")
+    elif tests_root == custom_dir_option:
+        custom_tests_root_answer = inquirer.prompt(
+            [
+                inquirer.Path(
+                    "custom_tests_root",  # Removed the colon and space from the message
+                    message=f"Enter the path to your tests directory inside {os.path.abspath(module_root) + os.sep} ",
+                    path_type=inquirer.Path.DIRECTORY,
+                    exists=True,
+                    normalize_to_absolute_path=True,
+                ),
+            ]
+        )
+        tests_root = custom_tests_root_answer["custom_tests_root"]
    setup_info["tests_root"] = os.path.relpath(tests_root, curdir)
    ph("cli-tests-root-provided")

@ -189,6 +206,22 @@ def collect_setup_info(setup_info: dict[str, str]):
    ignore_paths = []
    setup_info["ignore_paths"] = ignore_paths

+    # Ask the user if they agree to enable PostHog analytics logging
+    # enable_analytics_question = [
+    #     inquirer.List(
+    #         "enable_analytics",
+    #         message="⚡️ Is it OK to collect usage analytics to help improve CodeFlash? (recommended)",
+    #         choices=[
+    #             ("Sure, I'd love to help make CodeFlash better!", True),
+    #             ("No, thanks.", False),
+    #         ],
+    #     )
+    # ]
+    # enable_analytics_answer = inquirer.prompt(enable_analytics_question)
+    # setup_info["enable_analytics"] = enable_analytics_answer["enable_analytics"]
+
+    ph("cli-analytics-choice", {"enable_analytics": setup_info["enable_analytics"]})
+

 def detect_test_framework(curdir, tests_root) -> Optional[str]:
    test_framework = None
@ -419,6 +452,7 @@ def configure_pyproject_toml(setup_info: dict[str, str]):
    codeflash_section["tests-root"] = setup_info["tests_root"]
    codeflash_section["test-framework"] = setup_info["test_framework"]
    codeflash_section["ignore-paths"] = setup_info["ignore_paths"]
+    codeflash_section["enable-analytics"] = setup_info["enable_analytics"]

    # Add the 'codeflash' section, ensuring 'tool' section exists
    tool_section = pyproject_data.get("tool", tomlkit.table())
--- a/cli/codeflash/code_utils/config_parser.py
+++ b/cli/codeflash/code_utils/config_parser.py
@ -58,6 +58,8 @@ def parse_config_file(config_file_path=None):
            f"Please run 'codeflash init' to create the config file."
        )
    assert isinstance(config, dict)
+
+    # default values:
    path_keys = ["module-root", "tests-root"]
    path_list_keys = ["ignore-paths"]
    # TODO: minimum-peformance-gain should become a more dynamic auto-detection in the future
@ -68,6 +70,9 @@ def parse_config_file(config_file_path=None):
        "pytest-cmd": "pytest",
        "formatter-cmd": "black",
    }
+    bool_keys = {
+        "enable-analytics": True,
+    }

    for key in float_keys:
        if key in config:
@ -79,6 +84,11 @@ def parse_config_file(config_file_path=None):
            config[key] = str(config[key])
        else:
            config[key] = str_keys[key]
+    for key in bool_keys:
+        if key in config:
+            config[key] = bool(config[key])
+        else:
+            config[key] = bool_keys[key]
    for key in path_keys:
        if key in config:
            config[key] = os.path.join(os.path.dirname(config_file), config[key])
@ -88,6 +98,7 @@ def parse_config_file(config_file_path=None):
            config[key] = [os.path.join(os.path.dirname(config_file), path) for path in config[key]]
        else:  # Default to empty list
            config[key] = []
+
    assert config["test-framework"] in [
        "pytest",
        "unittest",
--- a/cli/codeflash/code_utils/git_utils.py
+++ b/cli/codeflash/code_utils/git_utils.py
@ -1,9 +1,10 @@
-import git
 import logging
 import os
-from git import Repo
 from io import StringIO
 from typing import Optional
+
+import git
+from git import Repo
 from unidiff import PatchSet


@ -70,20 +71,18 @@ def get_remote_url(repo: Optional[Repo] = None) -> str:


 def get_repo_owner_and_name(repo: Optional[Repo] = None) -> tuple[str, str]:
-    remote_url = get_remote_url(repo)
-    if remote_url.endswith(".git"):
-        remote_url = remote_url[:-4]
-    if "://" in remote_url:
-        # It's an HTTP/HTTPS URL
-        repo_owner, repo_name = remote_url.split("/")[-2:]
-    else:
-        # It's an SSH URL and should contain ':' after the domain
-        repo_owner_with_github, repo_name = remote_url.split("/")[-2:]
-        repo_owner = (
-            repo_owner_with_github.split(":")[1]
-            if ":" in repo_owner_with_github
-            else repo_owner_with_github
-        )
+    remote_url = (
+        get_remote_url(repo).rstrip(".git")
+        if get_remote_url(repo).endswith(".git")
+        else get_remote_url(repo)
+    )
+    split_url = remote_url.split("/")
+    repo_owner_with_github, repo_name = split_url[-2], split_url[-1]
+    repo_owner = (
+        repo_owner_with_github.split(":")[1]
+        if ":" in repo_owner_with_github
+        else repo_owner_with_github
+    )
    return repo_owner, repo_name


--- a/cli/codeflash/main.py
+++ b/cli/codeflash/main.py
@ -12,6 +12,8 @@ from typing import Tuple, Union

 import libcst as cst

+from codeflash.analytics import posthog
+from codeflash.analytics.posthog import ph
 from codeflash.analytics.sentry import init_sentry
 from codeflash.api.aiservice import optimize_python_code
 from codeflash.cli_cmds.cli import process_cmd_args
@ -114,6 +116,8 @@ def parse_args() -> Namespace:
 class Optimizer:
    def __init__(self, args: Namespace):
        self.args = args
+        posthog.enable_analytics(args.enable_analytics)
+
        self.test_cfg = TestConfig(
            tests_root=args.tests_root,
            project_root_path=args.project_root,
@ -122,6 +126,7 @@ class Optimizer:
        )

    def run(self) -> None:
+        ph("cli-optimize-run-start", {"args": self.args})
        logging.info(CODEFLASH_LOGO)
        logging.info("Running optimizer.")
        if not env_utils.ensure_codeflash_api_key():
@ -141,19 +146,21 @@ class Optimizer:

        test_files_created = set()
        instrumented_unittests_created = set()
-        found_atleast_one_optimization = False
+        optimizations_found = 0

        function_iterator_count = 0
        try:
+            ph("cli-optimize-functions-to-optimize", {"num_functions": num_modified_functions})
            if num_modified_functions == 0:
                logging.info("No functions found to optimize. Exiting...")
                return
            logging.info(f"Discovering existing unit tests in {self.test_cfg.tests_root} ...")
            function_to_tests: dict[str, list[TestsInFile]] = discover_unit_tests(self.test_cfg)
+            num_discovered_tests = sum([len(value) for value in function_to_tests.values()])
            logging.info(
-                f"Discovered {sum([len(value) for value in function_to_tests.values()])} "
-                f"existing unit tests in {self.test_cfg.tests_root}"
+                f"Discovered {num_discovered_tests} existing unit tests in {self.test_cfg.tests_root}"
            )
+            ph("cli-optimize-discovered-tests", {"num_tests": num_discovered_tests})
            for path in file_to_funcs_to_optimize:
                logging.info(f"Examining file {path} ...")
                # TODO: Sequence the functions one goes through intelligently. If we are optimizing f(g(x)),
@ -177,7 +184,7 @@ class Optimizer:
                        f"Optimizing function {function_iterator_count} of {num_modified_functions} - {function_name}"
                    )
                    winning_test_results = None
-                    # remove left overs from previous run
+                    # remove leftovers from previous run
                    pathlib.Path(get_run_tmp_file("test_return_values_0.bin")).unlink(
                        missing_ok=True
                    )
@ -353,7 +360,7 @@ class Optimizer:
                        logging.info("----------------")
                    logging.info(f"Best optimization: {best_optimization[0:2]}")
                    if best_optimization:
-                        found_atleast_one_optimization = True
+                        optimizations_found += 1
                        logging.info(f"Best candidate:\n{best_optimization[0]}")

                        optimized_code = best_optimization[0]
@ -381,7 +388,7 @@ class Optimizer:
                            function_name=function_name,
                            path=path,
                        )
-                        logging.info(f"EXPLANATION\n{explanation_final.to_console_string()}")
+                        logging.info(f"Explanation: \n{explanation_final.to_console_string()}")

                        new_code = format_code(self.args.formatter_cmd, path)
                        new_dependent_code: dict[str, str] = {
@ -396,6 +403,16 @@ class Optimizer:
                        logging.info(f"⚡️ Optimization successful! 📄 {function_name} in {path}")
                        logging.info(f"📈 {explanation_final.perf_improvement_line}")

+                        ph(
+                            "cli-optimize-success",
+                            {
+                                "speedup_x": explanation_final.speedup_x,
+                                "speedup_pct": explanation_final.speedup_pct,
+                                "best_runtime": explanation_final.best_runtime_ns,
+                                "original_runtime": explanation_final.original_runtime_ns,
+                                "winning_test_results": explanation_final.winning_test_results,
+                            },
+                        )
                        test_files = function_to_tests.get(module_path + "." + function_name)
                        existing_tests = ""
                        if test_files:
@ -428,7 +445,8 @@ class Optimizer:
                    pathlib.Path(generated_tests_path).unlink(missing_ok=True)
                    for test_paths in instrumented_unittests_created_for_function:
                        pathlib.Path(test_paths).unlink(missing_ok=True)
-            if not found_atleast_one_optimization:
+            ph("cli-optimize-run-finished", {"optimizations_found": optimizations_found})
+            if optimizations_found == 0:
                logging.info("❌ No optimizations found.")
            elif self.args.all:
                logging.info("✨ All functions have been optimized! ✨")
--- a/cli/codeflash/version.py
+++ b/cli/codeflash/version.py
@ -1,3 +1,3 @@
 # These version placeholders will be replaced by poetry-dynamic-versioning during `poetry build`.
-__version__ = "0.3.5"
-__version_tuple__ = (0, 3, 5)
+__version__ = "0.3.6"
+__version_tuple__ = (0, 3, 6)
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@ -1,3 +1,4 @@
+[tool]
 [tool.poetry]
 name = "codeflash"
 version = "0.0.0" # Determined by poetry-dynamic-versioning during `poetry build`
@ -36,21 +37,32 @@ optional = true

 [tool.poetry.group.dev.dependencies]
 ipython = "^8.12.0"
-mypy = "^1.6.1"
+mypy = "^1.8.0"
+
+
+[tool.poetry.build]
+script = "codeflash/update_license_version.py"
+
+[tool.poetry.scripts]
+codeflash = "codeflash.main:main"
+

 [tool.mypy]
-warn_return_any = true
+disallow_any_decorated = true
+disallow_any_explicit = true
+disallow_any_expr = true
+disallow_any_unimported = true
+disallow_subclassing_any = true
+pretty = true
+show_absolute_path = true
+show_column_numbers = true
+show_error_context = true
+strict = true
+verbosity = 3
+warn_unreachable = true
 plugins = [
    "pydantic.mypy"
 ]
-follow_imports = "silent"
-warn_redundant_casts = true
-warn_unused_ignores = true
-disallow_any_generics = true
-check_untyped_defs = true
-no_implicit_reexport = true
-# for strict mypy: (this is the tricky one :-))
-disallow_untyped_defs = true

 [tool.pydantic-mypy]
 init_forbid_extra = true
@ -61,13 +73,6 @@ warn_required_dynamic_aliases = true
 line-length = 100
 target-version = ['py312']

-[build-system]
-requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.2.0,<2.0.0"]
-build-backend = "poetry_dynamic_versioning.backend"
-
-[tool.poetry.build]
-script = "codeflash/update_license_version.py"
-
 [tool.poetry-dynamic-versioning]
 enable = true
 style = "pep440"
@ -84,11 +89,15 @@ initial-content = """
  __version_tuple__ = (0, 0, 0)
 """

-[tool.poetry.scripts]
-codeflash = "codeflash.main:main"

 [tool.codeflash]
 module-root = "codeflash"
 tests-root = "tests"
 test-framework = "pytest"
 ignore-paths = []
+enable-analytics = true
+
+[build-system]
+requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.2.0,<2.0.0"]
+build-backend = "poetry_dynamic_versioning.backend"
+
--- a/cli/tests/test_git_utils.py
+++ b/cli/tests/test_git_utils.py
@ -0,0 +1,31 @@
+import unittest
+from unittest.mock import patch
+
+from codeflash.code_utils.git_utils import get_repo_owner_and_name
+
+
+class TestGitUtils(unittest.TestCase):
+
+    @patch("codeflash.code_utils.git_utils.get_remote_url")
+    def test_test_get_repo_owner_and_name(self, mock_get_remote_url):
+        # Test with a standard GitHub HTTPS URL
+        mock_get_remote_url.return_value = "https://github.com/owner/repo.git"
+        owner, repo_name = get_repo_owner_and_name()
+        self.assertEqual(owner, "owner")
+        self.assertEqual(repo_name, "repo")
+
+        # Test with a GitHub SSH URL
+        mock_get_remote_url.return_value = "git@github.com:owner/repo.git"
+        owner, repo_name = get_repo_owner_and_name()
+        self.assertEqual(owner, "owner")
+        self.assertEqual(repo_name, "repo")
+
+        # Test with a URL without the .git suffix
+        mock_get_remote_url.return_value = "https://github.com/owner/repo"
+        owner, repo_name = get_repo_owner_and_name()
+        self.assertEqual(owner, "owner")
+        self.assertEqual(repo_name, "repo")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/django/aiservice/aiservice/gunicorn.conf.py
+++ b/django/aiservice/aiservice/gunicorn.conf.py
@ -1,3 +1,8 @@
 loglevel = "info"
 accesslog = "-"
 errorlog = "-"
+
+# https://www.uvicorn.org/deployment/#gunicorn
+# using custom one to disable Lifespan Protocol
+# needs to be passed by string https://github.com/benoitc/gunicorn/issues/1539
+worker_class = "uvicorn_worker.DjangoUvicornWorker"
--- a/django/aiservice/aiservice/start_gunicorn.sh
+++ b/django/aiservice/aiservice/start_gunicorn.sh
@ -11,4 +11,4 @@ else
 fi


-gunicorn -c gunicorn.conf.py aiservice.asgi:application -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:"$PORT" --timeout 600 --workers 2
+gunicorn -c gunicorn.conf.py aiservice.asgi:application --bind 0.0.0.0:"$PORT" --timeout 600 --workers 2
--- a/django/aiservice/aiservice/uvicorn_worker.py
+++ b/django/aiservice/aiservice/uvicorn_worker.py
@ -0,0 +1,13 @@
+from uvicorn.workers import UvicornWorker
+
+
+class DjangoUvicornWorker(UvicornWorker):
+    """
+    Use UvicornWorker with lifespan='off', because Django does not
+    (and probably will not https://code.djangoproject.com/ticket/31508)
+    support Lifespan.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.config.lifespan = "off"
--- a/django/aiservice/pyproject.toml
+++ b/django/aiservice/pyproject.toml
@ -7,7 +7,7 @@ homepage = "https://codeflash.ai"
 readme = "README.md"

 [tool.poetry.dependencies]
-python = "^3.11"
+python = "^3.12.2"
 django = "^5.0"
 django-ninja = "^1.1.0"
 openai = "^1.10.0"
@ -29,21 +29,24 @@ optional = true

 [tool.poetry.group.dev.dependencies]
 ipython = "^8.12.0"
-mypy = "^1.6.1"
+mypy = "^1.8.0"

 [tool.mypy]
-warn_return_any = true
+disallow_any_decorated = true
+disallow_any_explicit = true
+disallow_any_expr = true
+disallow_any_unimported = true
+disallow_subclassing_any = true
+pretty = true
+show_absolute_path = true
+show_column_numbers = true
+show_error_context = true
+strict = true
+verbosity = 3
+warn_unreachable = true
 plugins = [
    "pydantic.mypy"
 ]
-follow_imports = "silent"
-warn_redundant_casts = true
-warn_unused_ignores = true
-disallow_any_generics = true
-check_untyped_defs = true
-no_implicit_reexport = true
-# for strict mypy: (this is the tricky one :-))
-disallow_untyped_defs = true

 [tool.pydantic-mypy]
 init_forbid_extra = true
@ -52,7 +55,7 @@ warn_required_dynamic_aliases = true

 [tool.black]
 line-length = 100
-target-version = ['py311']
+target-version = ['py312']

 [build-system]
 requires = ["poetry-core"]