unify

2025-12-22 23:51:05 -05:00 · 2025-12-22 23:51:05 -05:00 · 273edff3ab
commit 273edff3ab
parent 4a7f8a10f4
15 changed files with 301 additions and 395 deletions
--- a/django/aiservice/aiservice/env_specific.py
+++ b/django/aiservice/aiservice/env_specific.py
@ -1,21 +1,17 @@
+"""Environment-specific configuration and utilities."""
+
 from __future__ import annotations

 import logging
 import os
 import sys
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING

-from anthropic import AsyncAnthropicFoundry
 from dotenv import load_dotenv
-from openai import AsyncOpenAI

 if TYPE_CHECKING:
    from collections.abc import Callable

-    from anthropic.types import Message as AnthropicMessage
-    from openai.types.chat import ChatCompletion
-

 IS_PRODUCTION = os.environ.get("ENVIRONMENT", default="") == "PRODUCTION"

@ -25,11 +21,13 @@ logging.getLogger("parso").setLevel(logging.WARNING)


 def load_env() -> None:
+    """Load environment variables from .env file in non-production."""
    if not IS_PRODUCTION:
        load_dotenv()


 def set_logging_level() -> None:
+    """Set logging level based on environment."""
    if IS_PRODUCTION:
        logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, stream=sys.stdout)
    else:
@ -37,121 +35,12 @@ def set_logging_level() -> None:


 def debug_log_sensitive_data(message: str) -> None:
+    """Log sensitive data only in non-production environments."""
    if not IS_PRODUCTION:
        logging.debug(message)


 def debug_log_sensitive_data_from_callable(message: Callable[[], str | None]) -> None:
+    """Log sensitive data from callable only in non-production environments."""
    if not IS_PRODUCTION:
        logging.debug(message())
-
-
-def create_llm_client(
-    model_type: Literal["openai", "anthropic", "google"],
-) -> AsyncOpenAI | AsyncAnthropicFoundry | None:
-    # Azure OpenAI endpoint configuration
-    azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-    azure_openai_endpoint = os.environ.get(
-        "AZURE_OPENAI_ENDPOINT", "https://codeflash-openai-resource.openai.azure.com/openai/v1/"
-    )
-
-    # Azure Anthropic endpoint configuration
-    azure_anthropic_api_key = os.environ.get("AZURE_ANTHROPIC_API_KEY")
-    azure_anthropic_endpoint = os.environ.get(
-        "AZURE_ANTHROPIC_ENDPOINT", "https://codeflash-anthropic-resource.openai.azure.com/anthropic"
-    )
-
-    # Direct OpenAI and Google keys
-    openai_key = os.environ.get("OPENAI_API_KEY")
-    google_key = os.environ.get("GEMINI_API_KEY")
-
-    if model_type == "openai" and azure_openai_api_key:
-        return AsyncOpenAI(api_key=azure_openai_api_key, base_url=azure_openai_endpoint)
-    if model_type == "openai" and openai_key:
-        return AsyncOpenAI(api_key=openai_key)  # baseurl not needed for regular openai
-    if model_type == "anthropic" and azure_anthropic_api_key:
-        return AsyncAnthropicFoundry(api_key=azure_anthropic_api_key, base_url=azure_anthropic_endpoint)
-    # # for future use : gemini supported only via GEMINI_API_KEY at the moment, todo for vertex ai
-    if model_type == "google" and google_key:
-        return AsyncOpenAI(api_key=google_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
-    return None
-
-
-llm_clients = {
-    "openai": create_llm_client("openai"),
-    "anthropic": create_llm_client("anthropic"),
-    # "google": create_llm_client("google"), # no need to instantiate right now as we're not using it
-}
-
-
-@dataclass
-class LLMUsage:
-    """Unified usage stats for both OpenAI and Anthropic responses."""
-
-    input_tokens: int
-    output_tokens: int
-
-
-@dataclass
-class LLMResponse:
-    """Unified response wrapper for both OpenAI and Anthropic API responses."""
-
-    content: str
-    usage: LLMUsage
-    raw_response: ChatCompletion | AnthropicMessage
-    all_contents: list[str] | None = None  # For multiple completions when n > 1
-
-
-async def call_llm(
-    model_name: str,
-    model_type: Literal["openai", "anthropic", "google"],
-    messages: list[dict[str, Any]],
-    max_tokens: int = 8192,
-    temperature: float | None = None,
-    n: int = 1,
-) -> LLMResponse:
-    """Call LLM with OpenAI or Anthropic client."""
-    client = llm_clients[model_type]
-    if client is None:
-        msg = f"LLM client for model type '{model_type}' is not available"
-        raise ValueError(msg)
-
-    if model_type == "anthropic":
-        assert isinstance(client, AsyncAnthropicFoundry)
-        system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
-        anthropic_messages = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]
-
-        kwargs: dict[str, Any] = {"model": model_name, "messages": anthropic_messages, "max_tokens": max_tokens}
-        if system_prompt:
-            kwargs["system"] = system_prompt
-        if temperature is not None:
-            kwargs["temperature"] = temperature
-
-        response = await client.messages.create(**kwargs)
-        content = "".join(block.text for block in response.content if hasattr(block, "text"))
-
-        return LLMResponse(
-            content=content,
-            usage=LLMUsage(input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens),
-            raw_response=response,
-        )
-
-    # OpenAI / Google (OpenAI-compatible)
-    assert isinstance(client, AsyncOpenAI)
-    openai_kwargs: dict[str, Any] = {"model": model_name, "messages": messages, "n": n}
-    if temperature is not None:
-        openai_kwargs["temperature"] = temperature
-    response = await client.chat.completions.create(**openai_kwargs)
-
-    # Collect all contents when n > 1
-    all_contents = [choice.message.content or "" for choice in response.choices] if len(response.choices) > 1 else None
-
-    return LLMResponse(
-        content=response.choices[0].message.content or "",
-        usage=LLMUsage(
-            input_tokens=response.usage.prompt_tokens if response.usage else 0,
-            output_tokens=response.usage.completion_tokens if response.usage else 0,
-        ),
-        raw_response=response,
-        all_contents=all_contents,
-    )
--- a/django/aiservice/aiservice/llm.py
+++ b/django/aiservice/aiservice/llm.py
@ -0,0 +1,265 @@
+"""Unified LLM module for all model definitions, clients, and API calls."""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Literal
+
+from anthropic import AsyncAnthropicFoundry
+from openai import AsyncOpenAI
+from pydantic.dataclasses import dataclass as pydantic_dataclass
+
+if TYPE_CHECKING:
+    from anthropic.types import Message as AnthropicMessage
+    from openai.types.chat import ChatCompletion
+
+
+# =============================================================================
+# Model Definitions
+# =============================================================================
+
+# Pricing is in USD per 1M tokens. See:
+# https://docs.anthropic.com/en/docs/about-claude/pricing
+# https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
+
+
+@pydantic_dataclass
+class LLM:
+    """Base LLM configuration with pricing info."""
+
+    name: str  # On Azure OpenAI Service, this is the deployment name
+    max_tokens: int
+    model_type: Literal["openai", "anthropic", "google"]
+    input_cost: float | None = None  # USD per 1M tokens
+    output_cost: float | None = None  # USD per 1M tokens
+
+
+@pydantic_dataclass
+class OpenAI_GPT_4_1(LLM):
+    """OpenAI GPT-4.1 model."""
+
+    name: str = "gpt-4.1"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
+    max_tokens: int = 100000
+    input_cost: float = 2.00
+    output_cost: float = 8.00
+
+
+@pydantic_dataclass
+class Anthropic_Claude_4(LLM):
+    """Anthropic Claude 4 Sonnet model."""
+
+    name: str = "claude-sonnet-4-20250514"
+    model_type: Literal["openai", "anthropic", "google"] = "anthropic"
+    max_tokens: int = 100000
+    input_cost: float = 3.00
+    output_cost: float = 15.00
+
+
+@pydantic_dataclass
+class Anthropic_Claude_Sonnet_4_5_AF(LLM):
+    """Anthropic Claude 4.5 Sonnet via Azure Foundry."""
+
+    name: str = "claude-sonnet-4-5"
+    model_type: Literal["openai", "anthropic", "google"] = "anthropic"
+    max_tokens: int = 200000
+    input_cost: float = 3.00
+    output_cost: float = 15.00
+
+
+# =============================================================================
+# LLM Client Setup
+# =============================================================================
+
+
+def create_llm_client(
+    model_type: Literal["openai", "anthropic", "google"],
+) -> AsyncOpenAI | AsyncAnthropicFoundry | None:
+    """Create an LLM client based on available API keys."""
+    # Azure OpenAI endpoint configuration
+    azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
+    azure_openai_endpoint = os.environ.get(
+        "AZURE_OPENAI_ENDPOINT", "https://codeflash-openai-resource.openai.azure.com/openai/v1/"
+    )
+
+    # Azure Anthropic endpoint configuration
+    azure_anthropic_api_key = os.environ.get("AZURE_ANTHROPIC_API_KEY")
+    azure_anthropic_endpoint = os.environ.get(
+        "AZURE_ANTHROPIC_ENDPOINT", "https://codeflash-anthropic-resource.openai.azure.com/anthropic"
+    )
+
+    # Direct OpenAI and Google keys
+    openai_key = os.environ.get("OPENAI_API_KEY")
+    google_key = os.environ.get("GEMINI_API_KEY")
+
+    if model_type == "openai" and azure_openai_api_key:
+        return AsyncOpenAI(api_key=azure_openai_api_key, base_url=azure_openai_endpoint)
+    if model_type == "openai" and openai_key:
+        return AsyncOpenAI(api_key=openai_key)
+    if model_type == "anthropic" and azure_anthropic_api_key:
+        return AsyncAnthropicFoundry(api_key=azure_anthropic_api_key, base_url=azure_anthropic_endpoint)
+    if model_type == "google" and google_key:
+        return AsyncOpenAI(api_key=google_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
+    return None
+
+
+llm_clients = {
+    "openai": create_llm_client("openai"),
+    "anthropic": create_llm_client("anthropic"),
+}
+
+
+# =============================================================================
+# Response Types
+# =============================================================================
+
+
+@dataclass
+class LLMUsage:
+    """Unified usage stats for both OpenAI and Anthropic responses."""
+
+    input_tokens: int
+    output_tokens: int
+
+
+@dataclass
+class LLMResponse:
+    """Unified response wrapper for both OpenAI and Anthropic API responses."""
+
+    content: str
+    usage: LLMUsage
+    raw_response: ChatCompletion | AnthropicMessage
+    all_contents: list[str] | None = None  # For multiple completions when n > 1
+
+
+# =============================================================================
+# LLM API Call
+# =============================================================================
+
+
+async def call_llm(
+    model_name: str,
+    model_type: Literal["openai", "anthropic", "google"],
+    messages: list[dict[str, Any]],
+    max_tokens: int = 8192,
+    temperature: float | None = None,
+    n: int = 1,
+) -> LLMResponse:
+    """Call LLM with OpenAI or Anthropic client."""
+    client = llm_clients[model_type]
+    if client is None:
+        msg = f"LLM client for model type '{model_type}' is not available"
+        raise ValueError(msg)
+
+    if model_type == "anthropic":
+        assert isinstance(client, AsyncAnthropicFoundry)
+        system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
+        anthropic_messages = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]
+
+        kwargs: dict[str, Any] = {"model": model_name, "messages": anthropic_messages, "max_tokens": max_tokens}
+        if system_prompt:
+            kwargs["system"] = system_prompt
+        if temperature is not None:
+            kwargs["temperature"] = temperature
+
+        response = await client.messages.create(**kwargs)
+        content = "".join(block.text for block in response.content if hasattr(block, "text"))
+
+        return LLMResponse(
+            content=content,
+            usage=LLMUsage(input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens),
+            raw_response=response,
+        )
+
+    # OpenAI / Google (OpenAI-compatible)
+    assert isinstance(client, AsyncOpenAI)
+    openai_kwargs: dict[str, Any] = {"model": model_name, "messages": messages, "n": n}
+    if temperature is not None:
+        openai_kwargs["temperature"] = temperature
+    response = await client.chat.completions.create(**openai_kwargs)
+
+    # Collect all contents when n > 1
+    all_contents = [choice.message.content or "" for choice in response.choices] if len(response.choices) > 1 else None
+
+    return LLMResponse(
+        content=response.choices[0].message.content or "",
+        usage=LLMUsage(
+            input_tokens=response.usage.prompt_tokens if response.usage else 0,
+            output_tokens=response.usage.completion_tokens if response.usage else 0,
+        ),
+        raw_response=response,
+        all_contents=all_contents,
+    )
+
+
+# =============================================================================
+# Cost Calculation
+# =============================================================================
+
+
+def calculate_llm_cost(response: Any, llm: LLM) -> float | None:
+    """Calculate the cost of an LLM API call.
+
+    Args:
+        response: The raw response from the LLM API call.
+        llm: The LLM model configuration with pricing info.
+
+    Returns:
+        The total cost in USD, or None if cost cannot be calculated.
+
+    """
+    try:
+        usage = response.usage
+        if hasattr(usage, "prompt_tokens"):  # OpenAI
+            prompt_tokens = usage.prompt_tokens
+            completion_tokens = usage.completion_tokens
+        else:  # Anthropic
+            prompt_tokens = usage.input_tokens
+            completion_tokens = usage.output_tokens
+
+        prompt_cost = (prompt_tokens / 1_000_000) * llm.input_cost
+        completion_cost = (completion_tokens / 1_000_000) * llm.output_cost
+
+        return prompt_cost + completion_cost
+
+    except Exception:
+        return None
+
+
+# =============================================================================
+# Model Selection
+# =============================================================================
+
+
+def _get_openai_model() -> LLM:
+    """Return OpenAI GPT-4.1 if available, otherwise falls back to Anthropic Claude 4."""
+    if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
+        return OpenAI_GPT_4_1()
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        return Anthropic_Claude_4()
+    return OpenAI_GPT_4_1()
+
+
+def _get_anthropic_model() -> LLM:
+    """Return Anthropic Claude model prioritizing Azure Foundry, otherwise falls back to OpenAI."""
+    if os.environ.get("AZURE_ANTHROPIC_API_KEY"):
+        return Anthropic_Claude_Sonnet_4_5_AF()
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        return Anthropic_Claude_4()
+    if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
+        return OpenAI_GPT_4_1()
+    return Anthropic_Claude_4()
+
+
+# Dynamically select models based on available API keys
+EXPLAIN_MODEL: LLM = _get_openai_model()
+PLAN_MODEL: LLM = _get_openai_model()
+EXECUTE_MODEL: LLM = _get_openai_model()
+OPTIMIZE_MODEL: LLM = _get_openai_model()
+RANKING_MODEL: LLM = _get_openai_model()
+
+REFINEMENT_MODEL: LLM = _get_anthropic_model()
+EXPLANATIONS_MODEL: LLM = _get_anthropic_model()
+OPTIMIZATION_REVIEW_MODEL: LLM = _get_anthropic_model()
+CODE_REPAIR_MODEL: LLM = _get_anthropic_model()
--- a/django/aiservice/aiservice/models/aimodels.py
+++ b/django/aiservice/aiservice/models/aimodels.py
@ -1,234 +0,0 @@
-import os
-from typing import Any, Literal
-
-from pydantic.dataclasses import dataclass
-
-
-# The following pricing information is based on public OpenAI and Claude documentation
-# as of August 2025. Prices can change, so always check the official:
-# https://docs.anthropic.com/en/docs/about-claude/pricing
-# https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
-# The pricing is in USD per 1M tokens.
-# Some of the pricing are placeholder from Open AI https://platform.openai.com/docs/pricing?latest-pricing=flex.
-@dataclass
-class LLM:
-    name: str  # On Azure OpenAI Service, this is the deployment name
-    max_tokens: int
-    model_type: Literal["openai", "anthropic", "google"]
-    # Add new pricing attributes in USD per 1M tokens
-    input_cost: float | None = None
-    output_cost: float | None = None
-
-
-# name of the model deployment on Azure OpenAI Service
-@dataclass
-class GPT_4_OMNI(LLM):
-    name: str = "gpt-4o-2" if os.environ.get("OPENAI_API_TYPE") == "azure" else "gpt-4o"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 128000
-    input_cost: float = 2.50
-    output_cost: float = 10.00
-
-
-@dataclass
-class GPT_4_128k(LLM):
-    name: str = "gpt-4-1106-preview"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 128000
-    input_cost: float = 10.00
-    output_cost: float = 30.00
-
-
-@dataclass
-class GPT_4_32k(LLM):
-    name: str = "gpt4-32k"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 32768
-    input_cost: float = 60.00
-    output_cost: float = 120.00
-
-
-@dataclass
-class GPT_4(LLM):
-    name: str = "gpt-4-0613"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 8192
-    input_cost: float = 30.00
-    output_cost: float = 60.00
-
-
-@dataclass
-class GPT_3_5_Turbo_16k(LLM):
-    name: str = "gpt-3.5-turbo-16k"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 16384
-    input_cost: float = 3.00
-    output_cost: float = 4.00
-
-
-@dataclass
-class GPT_3_5_Turbo(LLM):
-    name: str = "gpt-3.5-turbo"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 4096
-    input_cost: float = 0.50
-    output_cost: float = 1.50
-
-
-@dataclass
-class Antropic_Claude_3_7(LLM):
-    name: str = "claude-3-7-sonnet-20250219"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 100000
-    input_cost: float = 3.00
-    output_cost: float = 15.00
-
-
-@dataclass
-class Anthropic_Claude_4(LLM):
-    name: str = "claude-sonnet-4-20250514"
-    model_type: Literal["openai", "anthropic", "google"] = "anthropic"
-    max_tokens: int = 100000
-    input_cost: float = 3.00
-    output_cost: float = 15.00
-
-
-# AF = Azure Foundry
-@dataclass
-class Anthropic_Claude_Sonnet_4_5_AF(LLM):
-    name: str = "claude-sonnet-4-5"
-    model_type: Literal["openai", "anthropic", "google"] = "anthropic"
-    max_tokens: int = 200000
-    input_cost: float = 3.00
-    output_cost: float = 15.00
-
-
-@dataclass
-class OpenAI_GPT_4_1(LLM):
-    # name: str = "azure/gpt-4.1"
-    name: str = "gpt-4.1"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 100000
-    input_cost: float = 2.00
-    output_cost: float = 8.00
-
-
-@dataclass
-class Gemini_2_5(LLM):
-    name: str = "gemini/gemini-2.5-pro-preview-03-25"
-    model_type: Literal["openai", "anthropic", "google"] = "google"
-    max_tokens: int = 100000
-
-
-@dataclass
-class OpenAI_GPT_O_3(LLM):
-    name: str = "azure/o3"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 100000
-    input_cost: float = 2.00
-    output_cost: float = 8.00
-
-
-@dataclass
-class OpenAI_GPT_O_4_MINI(LLM):
-    name: str = "azure/o4-mini"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 100000
-    input_cost: float = 1.10
-    output_cost: float = 4.40
-
-
-@dataclass
-class GPT_5(LLM):  # IT IS TOO SLOW AT THE MOMENT, just here for documentation
-    name: str = "gpt-5-codex"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 100000
-    input_cost: float = 1.25
-    output_cost: float = 10.00
-
-
-@dataclass
-class GPT_4_1_Nano(LLM):
-    name: str = "gpt-4.1-nano"
-    model_type: Literal["openai", "anthropic", "google"] = "openai"
-    max_tokens: int = 100000
-    input_cost: float = 0.10
-    output_cost: float = 0.40
-
-
-def calculate_llm_cost(response: Any, llm: LLM) -> float | None:
-    """Calculates the cost of an OpenAI API chat completion call.
-
-    Args:
-        response (dict): The JSON response from the OpenAI API call.
-
-    Returns:
-        float: The total cost in USD, or None if the cost cannot be calculated.
-
-    """
-    try:
-        usage = response.usage
-        if hasattr(usage, "prompt_tokens"):  # for openai
-            prompt_tokens = usage.prompt_tokens
-            completion_tokens = usage.completion_tokens
-        else:  # for claude
-            prompt_tokens = usage.input_tokens
-            completion_tokens = usage.output_tokens
-
-        prompt_cost = (prompt_tokens / 1_000_000) * llm.input_cost
-        completion_cost = (completion_tokens / 1_000_000) * llm.output_cost
-
-        total_cost: float = prompt_cost + completion_cost
-
-        return total_cost
-
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return None
-
-
-def _get_openai_model() -> LLM:
-    """Return OpenAI GPT-4.1 if available, otherwise falls back to Anthropic Claude 4.
-
-    Returns:
-        LLM: The appropriate model instance based on available API keys.
-
-    """
-    if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
-        return OpenAI_GPT_4_1()
-    # Fall back to Anthropic if OpenAI not available
-    if os.environ.get("ANTHROPIC_API_KEY"):
-        return Anthropic_Claude_4()
-    # Default to OpenAI (will fail gracefully with clear error from env_specific.py)
-    return OpenAI_GPT_4_1()
-
-
-def _get_anthropic_model() -> LLM:
-    """Returns Anthropic Claude model prioritizing Azure Foundry, otherwise falls back to OpenAI GPT-4.1.
-
-    Returns:
-        LLM: The appropriate model instance based on available API keys.
-
-    """  # noqa: D401
-    if os.environ.get("AZURE_ANTHROPIC_API_KEY"):
-        return Anthropic_Claude_Sonnet_4_5_AF()
-    if os.environ.get("ANTHROPIC_API_KEY"):
-        return Anthropic_Claude_4()
-    # Fall back to OpenAI if Anthropic not available
-    if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
-        return OpenAI_GPT_4_1()
-    # Default to Claude (will fail gracefully with clear error from env_specific.py)
-    return Anthropic_Claude_4()
-
-
-# Dynamically select models based on available API keys
-EXPLAIN_MODEL: LLM = _get_openai_model()
-PLAN_MODEL: LLM = _get_openai_model()
-EXECUTE_MODEL: LLM = _get_openai_model()
-OPTIMIZE_MODEL: LLM = _get_openai_model()
-RANKING_MODEL: LLM = _get_openai_model()
-
-REFINEMENT_MODEL: LLM = _get_anthropic_model()
-EXPLANATIONS_MODEL: LLM = _get_anthropic_model()
-OPTIMIZATION_REVIEW_MODEL: LLM = _get_anthropic_model()
-CODE_REPAIR_MODEL: LLM = _get_anthropic_model()
--- a/django/aiservice/aiservice/observability/decorators.py
+++ b/django/aiservice/aiservice/observability/decorators.py
@ -21,7 +21,7 @@ from collections.abc import Callable
 from functools import wraps
 from typing import Any

-from aiservice.models.aimodels import calculate_llm_cost
+from aiservice.llm import calculate_llm_cost
 from aiservice.observability.database import ErrorRecorder, LLMCallRecorder

 logger = logging.getLogger(__name__)
--- a/django/aiservice/code_repair/code_repair.py
+++ b/django/aiservice/code_repair/code_repair.py
@ -13,8 +13,8 @@ from pydantic import ValidationError

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import CODE_REPAIR_MODEL, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import CODE_REPAIR_MODEL, calculate_llm_cost, call_llm
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
 from optimizer.models import OptimizedCandidateSource
@ -33,7 +33,7 @@ if TYPE_CHECKING:
        ChatCompletionToolMessageParam,
    )

-    from aiservice.models.aimodels import LLM
+    from aiservice.llm import LLM

 code_repair_api = NinjaAPI(urls_namespace="code_repair")

--- a/django/aiservice/explanations/explanations.py
+++ b/django/aiservice/explanations/explanations.py
@ -12,8 +12,8 @@ from packaging import version

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import EXPLANATIONS_MODEL, LLM, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import EXPLANATIONS_MODEL, LLM, calculate_llm_cost, call_llm
 from aiservice.observability.database import ErrorRecorder, LLMCallRecorder
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@ -25,8 +25,6 @@ if TYPE_CHECKING:
        ChatCompletionToolMessageParam,
    )

-    from aiservice.models.aimodels import LLM
-
 explanations_api = NinjaAPI(urls_namespace="explanations")
 explain_regex_pattern = re.compile(r"<explain>(.*)<\/explain>", re.DOTALL | re.IGNORECASE)

--- a/django/aiservice/optimization_review/optimization_review.py
+++ b/django/aiservice/optimization_review/optimization_review.py
@ -11,14 +11,14 @@ from ninja import NinjaAPI, Schema
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
 from packaging import version

-from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import OPTIMIZATION_REVIEW_MODEL, calculate_llm_cost
+from aiservice.analytics.posthog import ph
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import OPTIMIZATION_REVIEW_MODEL, LLMResponse, calculate_llm_cost, call_llm
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost, update_optimization_features_review

 if TYPE_CHECKING:
-    from aiservice.models.aimodels import LLM
-from aiservice.analytics.posthog import ph
+    from aiservice.llm import LLM

 optimization_review_api = NinjaAPI(urls_namespace="optimization_review")

--- a/django/aiservice/optimizer/optimizer.py
+++ b/django/aiservice/optimizer/optimizer.py
@ -14,13 +14,8 @@ from pydantic import ValidationError

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
-from aiservice.env_specific import (
-    LLMResponse,
-    call_llm,
-    debug_log_sensitive_data,
-    debug_log_sensitive_data_from_callable,
-)
-from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable
+from aiservice.llm import OPTIMIZE_MODEL, LLMResponse, calculate_llm_cost, call_llm
 from aiservice.observability.decorators import observe_llm_call
 from authapp.user import get_user_by_id
 from log_features.log_event import log_optimization_event
@ -37,7 +32,7 @@ from optimizer.models import OptimizedCandidateSource, OptimizeSchema  # noqa: T
 if TYPE_CHECKING:
    from django.http import HttpRequest

-    from aiservice.models.aimodels import LLM
+    from aiservice.llm import LLM


 optimizations_json = [
--- a/django/aiservice/optimizer/optimizer_line_profiler.py
+++ b/django/aiservice/optimizer/optimizer_line_profiler.py
@ -10,13 +10,8 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, validate_trace_id
-from aiservice.env_specific import (
-    LLMResponse,
-    call_llm,
-    debug_log_sensitive_data,
-    debug_log_sensitive_data_from_callable,
-)
-from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable
+from aiservice.llm import OPTIMIZE_MODEL, LLMResponse, calculate_llm_cost, call_llm
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@ -34,7 +29,7 @@ if TYPE_CHECKING:
        ChatCompletionToolMessageParam,
    )

-    from aiservice.models.aimodels import LLM
+    from aiservice.llm import LLM
    from optimizer.context_utils.optimizer_context import OptimizeResponseItemSchema


--- a/django/aiservice/optimizer/refinement.py
+++ b/django/aiservice/optimizer/refinement.py
@ -14,8 +14,8 @@ from pydantic import ValidationError

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import REFINEMENT_MODEL, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import REFINEMENT_MODEL, LLMResponse, calculate_llm_cost, call_llm
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@ -29,7 +29,7 @@ if TYPE_CHECKING:
        ChatCompletionToolMessageParam,
    )

-    from aiservice.models.aimodels import LLM
+    from aiservice.llm import LLM


 refinement_api = NinjaAPI(urls_namespace="refinement")
--- a/django/aiservice/ranker/ranker.py
+++ b/django/aiservice/ranker/ranker.py
@ -9,8 +9,8 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import LLM, RANKING_MODEL, LLMResponse, calculate_llm_cost, call_llm
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@ -22,8 +22,6 @@ if TYPE_CHECKING:
        ChatCompletionToolMessageParam,
    )

-    from aiservice.models.aimodels import LLM
-
 # from google import genai
 # from pydantic import BaseModel
 #
--- a/django/aiservice/testgen/gen_inspired_tests.py
+++ b/django/aiservice/testgen/gen_inspired_tests.py
@ -11,7 +11,7 @@ from codeflash.code_utils.code_utils import ellipsis_in_ast, get_imports_from_fi
 from codeflash.models.models import TestsInFile
 from codeflash.verification.gen_regression_tests import print_message_delta, print_messages

-from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL
+from aiservice.llm import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL


 def regression_tests_from_function_with_inspiration(
--- a/django/aiservice/testgen/sqlalchemy/sqlalchemy_testgen.py
+++ b/django/aiservice/testgen/sqlalchemy/sqlalchemy_testgen.py
@ -8,8 +8,8 @@ from pathlib import Path
 from typing import SupportsIndex

 from aiservice.common_utils import parse_python_version, safe_isort
-from aiservice.env_specific import call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost, call_llm
 from aiservice.models.functions_to_optimize import FunctionToOptimize
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
--- a/django/aiservice/testgen/testgen.py
+++ b/django/aiservice/testgen/testgen.py
@ -16,8 +16,8 @@ from openai import OpenAIError

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
-from aiservice.env_specific import IS_PRODUCTION, LLMResponse, call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import EXECUTE_MODEL, calculate_llm_cost
+from aiservice.env_specific import IS_PRODUCTION, debug_log_sensitive_data
+from aiservice.llm import EXECUTE_MODEL, LLMResponse, calculate_llm_cost, call_llm
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@ -35,7 +35,7 @@ from testgen.postprocessing.postprocess_pipeline import postprocessing_testgen_p
 from testgen.testgen_context import BaseTestGenContext, TestGenContextData

 if TYPE_CHECKING:
-    from aiservice.models.aimodels import LLM
+    from aiservice.llm import LLM
    from authapp.auth import AuthBearer

 testgen_api = NinjaAPI(urls_namespace="testgen")
--- a/django/aiservice/workflow_gen/workflow_gen.py
+++ b/django/aiservice/workflow_gen/workflow_gen.py
@ -11,8 +11,8 @@ from ninja import NinjaAPI, Schema
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam

 from aiservice.analytics.posthog import ph
-from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
-from aiservice.models.aimodels import EXECUTE_MODEL
+from aiservice.env_specific import debug_log_sensitive_data
+from aiservice.llm import EXECUTE_MODEL, LLMResponse, call_llm
 from aiservice.observability.decorators import observe_llm_call

 if TYPE_CHECKING: