unify
This commit is contained in:
parent
4a7f8a10f4
commit
273edff3ab
15 changed files with 301 additions and 395 deletions
|
|
@ -1,21 +1,17 @@
|
|||
"""Environment-specific configuration and utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from anthropic import AsyncAnthropicFoundry
|
||||
from dotenv import load_dotenv
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
|
||||
from anthropic.types import Message as AnthropicMessage
|
||||
from openai.types.chat import ChatCompletion
|
||||
|
||||
|
||||
IS_PRODUCTION = os.environ.get("ENVIRONMENT", default="") == "PRODUCTION"
|
||||
|
||||
|
|
@ -25,11 +21,13 @@ logging.getLogger("parso").setLevel(logging.WARNING)
|
|||
|
||||
|
||||
def load_env() -> None:
|
||||
"""Load environment variables from .env file in non-production."""
|
||||
if not IS_PRODUCTION:
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def set_logging_level() -> None:
|
||||
"""Set logging level based on environment."""
|
||||
if IS_PRODUCTION:
|
||||
logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, stream=sys.stdout)
|
||||
else:
|
||||
|
|
@ -37,121 +35,12 @@ def set_logging_level() -> None:
|
|||
|
||||
|
||||
def debug_log_sensitive_data(message: str) -> None:
|
||||
"""Log sensitive data only in non-production environments."""
|
||||
if not IS_PRODUCTION:
|
||||
logging.debug(message)
|
||||
|
||||
|
||||
def debug_log_sensitive_data_from_callable(message: Callable[[], str | None]) -> None:
|
||||
"""Log sensitive data from callable only in non-production environments."""
|
||||
if not IS_PRODUCTION:
|
||||
logging.debug(message())
|
||||
|
||||
|
||||
def create_llm_client(
|
||||
model_type: Literal["openai", "anthropic", "google"],
|
||||
) -> AsyncOpenAI | AsyncAnthropicFoundry | None:
|
||||
# Azure OpenAI endpoint configuration
|
||||
azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
|
||||
azure_openai_endpoint = os.environ.get(
|
||||
"AZURE_OPENAI_ENDPOINT", "https://codeflash-openai-resource.openai.azure.com/openai/v1/"
|
||||
)
|
||||
|
||||
# Azure Anthropic endpoint configuration
|
||||
azure_anthropic_api_key = os.environ.get("AZURE_ANTHROPIC_API_KEY")
|
||||
azure_anthropic_endpoint = os.environ.get(
|
||||
"AZURE_ANTHROPIC_ENDPOINT", "https://codeflash-anthropic-resource.openai.azure.com/anthropic"
|
||||
)
|
||||
|
||||
# Direct OpenAI and Google keys
|
||||
openai_key = os.environ.get("OPENAI_API_KEY")
|
||||
google_key = os.environ.get("GEMINI_API_KEY")
|
||||
|
||||
if model_type == "openai" and azure_openai_api_key:
|
||||
return AsyncOpenAI(api_key=azure_openai_api_key, base_url=azure_openai_endpoint)
|
||||
if model_type == "openai" and openai_key:
|
||||
return AsyncOpenAI(api_key=openai_key) # baseurl not needed for regular openai
|
||||
if model_type == "anthropic" and azure_anthropic_api_key:
|
||||
return AsyncAnthropicFoundry(api_key=azure_anthropic_api_key, base_url=azure_anthropic_endpoint)
|
||||
# # for future use : gemini supported only via GEMINI_API_KEY at the moment, todo for vertex ai
|
||||
if model_type == "google" and google_key:
|
||||
return AsyncOpenAI(api_key=google_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
||||
return None
|
||||
|
||||
|
||||
llm_clients = {
|
||||
"openai": create_llm_client("openai"),
|
||||
"anthropic": create_llm_client("anthropic"),
|
||||
# "google": create_llm_client("google"), # no need to instantiate right now as we're not using it
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMUsage:
|
||||
"""Unified usage stats for both OpenAI and Anthropic responses."""
|
||||
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
"""Unified response wrapper for both OpenAI and Anthropic API responses."""
|
||||
|
||||
content: str
|
||||
usage: LLMUsage
|
||||
raw_response: ChatCompletion | AnthropicMessage
|
||||
all_contents: list[str] | None = None # For multiple completions when n > 1
|
||||
|
||||
|
||||
async def call_llm(
|
||||
model_name: str,
|
||||
model_type: Literal["openai", "anthropic", "google"],
|
||||
messages: list[dict[str, Any]],
|
||||
max_tokens: int = 8192,
|
||||
temperature: float | None = None,
|
||||
n: int = 1,
|
||||
) -> LLMResponse:
|
||||
"""Call LLM with OpenAI or Anthropic client."""
|
||||
client = llm_clients[model_type]
|
||||
if client is None:
|
||||
msg = f"LLM client for model type '{model_type}' is not available"
|
||||
raise ValueError(msg)
|
||||
|
||||
if model_type == "anthropic":
|
||||
assert isinstance(client, AsyncAnthropicFoundry)
|
||||
system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
|
||||
anthropic_messages = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]
|
||||
|
||||
kwargs: dict[str, Any] = {"model": model_name, "messages": anthropic_messages, "max_tokens": max_tokens}
|
||||
if system_prompt:
|
||||
kwargs["system"] = system_prompt
|
||||
if temperature is not None:
|
||||
kwargs["temperature"] = temperature
|
||||
|
||||
response = await client.messages.create(**kwargs)
|
||||
content = "".join(block.text for block in response.content if hasattr(block, "text"))
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
usage=LLMUsage(input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens),
|
||||
raw_response=response,
|
||||
)
|
||||
|
||||
# OpenAI / Google (OpenAI-compatible)
|
||||
assert isinstance(client, AsyncOpenAI)
|
||||
openai_kwargs: dict[str, Any] = {"model": model_name, "messages": messages, "n": n}
|
||||
if temperature is not None:
|
||||
openai_kwargs["temperature"] = temperature
|
||||
response = await client.chat.completions.create(**openai_kwargs)
|
||||
|
||||
# Collect all contents when n > 1
|
||||
all_contents = [choice.message.content or "" for choice in response.choices] if len(response.choices) > 1 else None
|
||||
|
||||
return LLMResponse(
|
||||
content=response.choices[0].message.content or "",
|
||||
usage=LLMUsage(
|
||||
input_tokens=response.usage.prompt_tokens if response.usage else 0,
|
||||
output_tokens=response.usage.completion_tokens if response.usage else 0,
|
||||
),
|
||||
raw_response=response,
|
||||
all_contents=all_contents,
|
||||
)
|
||||
|
|
|
|||
265
django/aiservice/aiservice/llm.py
Normal file
265
django/aiservice/aiservice/llm.py
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
"""Unified LLM module for all model definitions, clients, and API calls."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from anthropic import AsyncAnthropicFoundry
|
||||
from openai import AsyncOpenAI
|
||||
from pydantic.dataclasses import dataclass as pydantic_dataclass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from anthropic.types import Message as AnthropicMessage
|
||||
from openai.types.chat import ChatCompletion
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Model Definitions
|
||||
# =============================================================================
|
||||
|
||||
# Pricing is in USD per 1M tokens. See:
|
||||
# https://docs.anthropic.com/en/docs/about-claude/pricing
|
||||
# https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
|
||||
|
||||
|
||||
@pydantic_dataclass
|
||||
class LLM:
|
||||
"""Base LLM configuration with pricing info."""
|
||||
|
||||
name: str # On Azure OpenAI Service, this is the deployment name
|
||||
max_tokens: int
|
||||
model_type: Literal["openai", "anthropic", "google"]
|
||||
input_cost: float | None = None # USD per 1M tokens
|
||||
output_cost: float | None = None # USD per 1M tokens
|
||||
|
||||
|
||||
@pydantic_dataclass
|
||||
class OpenAI_GPT_4_1(LLM):
|
||||
"""OpenAI GPT-4.1 model."""
|
||||
|
||||
name: str = "gpt-4.1"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 2.00
|
||||
output_cost: float = 8.00
|
||||
|
||||
|
||||
@pydantic_dataclass
|
||||
class Anthropic_Claude_4(LLM):
|
||||
"""Anthropic Claude 4 Sonnet model."""
|
||||
|
||||
name: str = "claude-sonnet-4-20250514"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "anthropic"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 3.00
|
||||
output_cost: float = 15.00
|
||||
|
||||
|
||||
@pydantic_dataclass
|
||||
class Anthropic_Claude_Sonnet_4_5_AF(LLM):
|
||||
"""Anthropic Claude 4.5 Sonnet via Azure Foundry."""
|
||||
|
||||
name: str = "claude-sonnet-4-5"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "anthropic"
|
||||
max_tokens: int = 200000
|
||||
input_cost: float = 3.00
|
||||
output_cost: float = 15.00
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LLM Client Setup
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def create_llm_client(
|
||||
model_type: Literal["openai", "anthropic", "google"],
|
||||
) -> AsyncOpenAI | AsyncAnthropicFoundry | None:
|
||||
"""Create an LLM client based on available API keys."""
|
||||
# Azure OpenAI endpoint configuration
|
||||
azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
|
||||
azure_openai_endpoint = os.environ.get(
|
||||
"AZURE_OPENAI_ENDPOINT", "https://codeflash-openai-resource.openai.azure.com/openai/v1/"
|
||||
)
|
||||
|
||||
# Azure Anthropic endpoint configuration
|
||||
azure_anthropic_api_key = os.environ.get("AZURE_ANTHROPIC_API_KEY")
|
||||
azure_anthropic_endpoint = os.environ.get(
|
||||
"AZURE_ANTHROPIC_ENDPOINT", "https://codeflash-anthropic-resource.openai.azure.com/anthropic"
|
||||
)
|
||||
|
||||
# Direct OpenAI and Google keys
|
||||
openai_key = os.environ.get("OPENAI_API_KEY")
|
||||
google_key = os.environ.get("GEMINI_API_KEY")
|
||||
|
||||
if model_type == "openai" and azure_openai_api_key:
|
||||
return AsyncOpenAI(api_key=azure_openai_api_key, base_url=azure_openai_endpoint)
|
||||
if model_type == "openai" and openai_key:
|
||||
return AsyncOpenAI(api_key=openai_key)
|
||||
if model_type == "anthropic" and azure_anthropic_api_key:
|
||||
return AsyncAnthropicFoundry(api_key=azure_anthropic_api_key, base_url=azure_anthropic_endpoint)
|
||||
if model_type == "google" and google_key:
|
||||
return AsyncOpenAI(api_key=google_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
||||
return None
|
||||
|
||||
|
||||
llm_clients = {
|
||||
"openai": create_llm_client("openai"),
|
||||
"anthropic": create_llm_client("anthropic"),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Response Types
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMUsage:
|
||||
"""Unified usage stats for both OpenAI and Anthropic responses."""
|
||||
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
"""Unified response wrapper for both OpenAI and Anthropic API responses."""
|
||||
|
||||
content: str
|
||||
usage: LLMUsage
|
||||
raw_response: ChatCompletion | AnthropicMessage
|
||||
all_contents: list[str] | None = None # For multiple completions when n > 1
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LLM API Call
|
||||
# =============================================================================
|
||||
|
||||
|
||||
async def call_llm(
|
||||
model_name: str,
|
||||
model_type: Literal["openai", "anthropic", "google"],
|
||||
messages: list[dict[str, Any]],
|
||||
max_tokens: int = 8192,
|
||||
temperature: float | None = None,
|
||||
n: int = 1,
|
||||
) -> LLMResponse:
|
||||
"""Call LLM with OpenAI or Anthropic client."""
|
||||
client = llm_clients[model_type]
|
||||
if client is None:
|
||||
msg = f"LLM client for model type '{model_type}' is not available"
|
||||
raise ValueError(msg)
|
||||
|
||||
if model_type == "anthropic":
|
||||
assert isinstance(client, AsyncAnthropicFoundry)
|
||||
system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
|
||||
anthropic_messages = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]
|
||||
|
||||
kwargs: dict[str, Any] = {"model": model_name, "messages": anthropic_messages, "max_tokens": max_tokens}
|
||||
if system_prompt:
|
||||
kwargs["system"] = system_prompt
|
||||
if temperature is not None:
|
||||
kwargs["temperature"] = temperature
|
||||
|
||||
response = await client.messages.create(**kwargs)
|
||||
content = "".join(block.text for block in response.content if hasattr(block, "text"))
|
||||
|
||||
return LLMResponse(
|
||||
content=content,
|
||||
usage=LLMUsage(input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens),
|
||||
raw_response=response,
|
||||
)
|
||||
|
||||
# OpenAI / Google (OpenAI-compatible)
|
||||
assert isinstance(client, AsyncOpenAI)
|
||||
openai_kwargs: dict[str, Any] = {"model": model_name, "messages": messages, "n": n}
|
||||
if temperature is not None:
|
||||
openai_kwargs["temperature"] = temperature
|
||||
response = await client.chat.completions.create(**openai_kwargs)
|
||||
|
||||
# Collect all contents when n > 1
|
||||
all_contents = [choice.message.content or "" for choice in response.choices] if len(response.choices) > 1 else None
|
||||
|
||||
return LLMResponse(
|
||||
content=response.choices[0].message.content or "",
|
||||
usage=LLMUsage(
|
||||
input_tokens=response.usage.prompt_tokens if response.usage else 0,
|
||||
output_tokens=response.usage.completion_tokens if response.usage else 0,
|
||||
),
|
||||
raw_response=response,
|
||||
all_contents=all_contents,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Cost Calculation
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def calculate_llm_cost(response: Any, llm: LLM) -> float | None:
|
||||
"""Calculate the cost of an LLM API call.
|
||||
|
||||
Args:
|
||||
response: The raw response from the LLM API call.
|
||||
llm: The LLM model configuration with pricing info.
|
||||
|
||||
Returns:
|
||||
The total cost in USD, or None if cost cannot be calculated.
|
||||
|
||||
"""
|
||||
try:
|
||||
usage = response.usage
|
||||
if hasattr(usage, "prompt_tokens"): # OpenAI
|
||||
prompt_tokens = usage.prompt_tokens
|
||||
completion_tokens = usage.completion_tokens
|
||||
else: # Anthropic
|
||||
prompt_tokens = usage.input_tokens
|
||||
completion_tokens = usage.output_tokens
|
||||
|
||||
prompt_cost = (prompt_tokens / 1_000_000) * llm.input_cost
|
||||
completion_cost = (completion_tokens / 1_000_000) * llm.output_cost
|
||||
|
||||
return prompt_cost + completion_cost
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Model Selection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _get_openai_model() -> LLM:
|
||||
"""Return OpenAI GPT-4.1 if available, otherwise falls back to Anthropic Claude 4."""
|
||||
if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
|
||||
return OpenAI_GPT_4_1()
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
return Anthropic_Claude_4()
|
||||
return OpenAI_GPT_4_1()
|
||||
|
||||
|
||||
def _get_anthropic_model() -> LLM:
|
||||
"""Return Anthropic Claude model prioritizing Azure Foundry, otherwise falls back to OpenAI."""
|
||||
if os.environ.get("AZURE_ANTHROPIC_API_KEY"):
|
||||
return Anthropic_Claude_Sonnet_4_5_AF()
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
return Anthropic_Claude_4()
|
||||
if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
|
||||
return OpenAI_GPT_4_1()
|
||||
return Anthropic_Claude_4()
|
||||
|
||||
|
||||
# Dynamically select models based on available API keys
|
||||
EXPLAIN_MODEL: LLM = _get_openai_model()
|
||||
PLAN_MODEL: LLM = _get_openai_model()
|
||||
EXECUTE_MODEL: LLM = _get_openai_model()
|
||||
OPTIMIZE_MODEL: LLM = _get_openai_model()
|
||||
RANKING_MODEL: LLM = _get_openai_model()
|
||||
|
||||
REFINEMENT_MODEL: LLM = _get_anthropic_model()
|
||||
EXPLANATIONS_MODEL: LLM = _get_anthropic_model()
|
||||
OPTIMIZATION_REVIEW_MODEL: LLM = _get_anthropic_model()
|
||||
CODE_REPAIR_MODEL: LLM = _get_anthropic_model()
|
||||
|
|
@ -1,234 +0,0 @@
|
|||
import os
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic.dataclasses import dataclass
|
||||
|
||||
|
||||
# The following pricing information is based on public OpenAI and Claude documentation
|
||||
# as of August 2025. Prices can change, so always check the official:
|
||||
# https://docs.anthropic.com/en/docs/about-claude/pricing
|
||||
# https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
|
||||
# The pricing is in USD per 1M tokens.
|
||||
# Some of the pricing are placeholder from Open AI https://platform.openai.com/docs/pricing?latest-pricing=flex.
|
||||
@dataclass
|
||||
class LLM:
|
||||
name: str # On Azure OpenAI Service, this is the deployment name
|
||||
max_tokens: int
|
||||
model_type: Literal["openai", "anthropic", "google"]
|
||||
# Add new pricing attributes in USD per 1M tokens
|
||||
input_cost: float | None = None
|
||||
output_cost: float | None = None
|
||||
|
||||
|
||||
# name of the model deployment on Azure OpenAI Service
|
||||
@dataclass
|
||||
class GPT_4_OMNI(LLM):
|
||||
name: str = "gpt-4o-2" if os.environ.get("OPENAI_API_TYPE") == "azure" else "gpt-4o"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 128000
|
||||
input_cost: float = 2.50
|
||||
output_cost: float = 10.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_4_128k(LLM):
|
||||
name: str = "gpt-4-1106-preview"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 128000
|
||||
input_cost: float = 10.00
|
||||
output_cost: float = 30.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_4_32k(LLM):
|
||||
name: str = "gpt4-32k"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 32768
|
||||
input_cost: float = 60.00
|
||||
output_cost: float = 120.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_4(LLM):
|
||||
name: str = "gpt-4-0613"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 8192
|
||||
input_cost: float = 30.00
|
||||
output_cost: float = 60.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_3_5_Turbo_16k(LLM):
|
||||
name: str = "gpt-3.5-turbo-16k"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 16384
|
||||
input_cost: float = 3.00
|
||||
output_cost: float = 4.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_3_5_Turbo(LLM):
|
||||
name: str = "gpt-3.5-turbo"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 4096
|
||||
input_cost: float = 0.50
|
||||
output_cost: float = 1.50
|
||||
|
||||
|
||||
@dataclass
|
||||
class Antropic_Claude_3_7(LLM):
|
||||
name: str = "claude-3-7-sonnet-20250219"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 3.00
|
||||
output_cost: float = 15.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class Anthropic_Claude_4(LLM):
|
||||
name: str = "claude-sonnet-4-20250514"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "anthropic"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 3.00
|
||||
output_cost: float = 15.00
|
||||
|
||||
|
||||
# AF = Azure Foundry
|
||||
@dataclass
|
||||
class Anthropic_Claude_Sonnet_4_5_AF(LLM):
|
||||
name: str = "claude-sonnet-4-5"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "anthropic"
|
||||
max_tokens: int = 200000
|
||||
input_cost: float = 3.00
|
||||
output_cost: float = 15.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAI_GPT_4_1(LLM):
|
||||
# name: str = "azure/gpt-4.1"
|
||||
name: str = "gpt-4.1"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 2.00
|
||||
output_cost: float = 8.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class Gemini_2_5(LLM):
|
||||
name: str = "gemini/gemini-2.5-pro-preview-03-25"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "google"
|
||||
max_tokens: int = 100000
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAI_GPT_O_3(LLM):
|
||||
name: str = "azure/o3"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 2.00
|
||||
output_cost: float = 8.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAI_GPT_O_4_MINI(LLM):
|
||||
name: str = "azure/o4-mini"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 1.10
|
||||
output_cost: float = 4.40
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_5(LLM): # IT IS TOO SLOW AT THE MOMENT, just here for documentation
|
||||
name: str = "gpt-5-codex"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 1.25
|
||||
output_cost: float = 10.00
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPT_4_1_Nano(LLM):
|
||||
name: str = "gpt-4.1-nano"
|
||||
model_type: Literal["openai", "anthropic", "google"] = "openai"
|
||||
max_tokens: int = 100000
|
||||
input_cost: float = 0.10
|
||||
output_cost: float = 0.40
|
||||
|
||||
|
||||
def calculate_llm_cost(response: Any, llm: LLM) -> float | None:
|
||||
"""Calculates the cost of an OpenAI API chat completion call.
|
||||
|
||||
Args:
|
||||
response (dict): The JSON response from the OpenAI API call.
|
||||
|
||||
Returns:
|
||||
float: The total cost in USD, or None if the cost cannot be calculated.
|
||||
|
||||
"""
|
||||
try:
|
||||
usage = response.usage
|
||||
if hasattr(usage, "prompt_tokens"): # for openai
|
||||
prompt_tokens = usage.prompt_tokens
|
||||
completion_tokens = usage.completion_tokens
|
||||
else: # for claude
|
||||
prompt_tokens = usage.input_tokens
|
||||
completion_tokens = usage.output_tokens
|
||||
|
||||
prompt_cost = (prompt_tokens / 1_000_000) * llm.input_cost
|
||||
completion_cost = (completion_tokens / 1_000_000) * llm.output_cost
|
||||
|
||||
total_cost: float = prompt_cost + completion_cost
|
||||
|
||||
return total_cost
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _get_openai_model() -> LLM:
|
||||
"""Return OpenAI GPT-4.1 if available, otherwise falls back to Anthropic Claude 4.
|
||||
|
||||
Returns:
|
||||
LLM: The appropriate model instance based on available API keys.
|
||||
|
||||
"""
|
||||
if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
|
||||
return OpenAI_GPT_4_1()
|
||||
# Fall back to Anthropic if OpenAI not available
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
return Anthropic_Claude_4()
|
||||
# Default to OpenAI (will fail gracefully with clear error from env_specific.py)
|
||||
return OpenAI_GPT_4_1()
|
||||
|
||||
|
||||
def _get_anthropic_model() -> LLM:
|
||||
"""Returns Anthropic Claude model prioritizing Azure Foundry, otherwise falls back to OpenAI GPT-4.1.
|
||||
|
||||
Returns:
|
||||
LLM: The appropriate model instance based on available API keys.
|
||||
|
||||
""" # noqa: D401
|
||||
if os.environ.get("AZURE_ANTHROPIC_API_KEY"):
|
||||
return Anthropic_Claude_Sonnet_4_5_AF()
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
return Anthropic_Claude_4()
|
||||
# Fall back to OpenAI if Anthropic not available
|
||||
if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
|
||||
return OpenAI_GPT_4_1()
|
||||
# Default to Claude (will fail gracefully with clear error from env_specific.py)
|
||||
return Anthropic_Claude_4()
|
||||
|
||||
|
||||
# Dynamically select models based on available API keys
|
||||
EXPLAIN_MODEL: LLM = _get_openai_model()
|
||||
PLAN_MODEL: LLM = _get_openai_model()
|
||||
EXECUTE_MODEL: LLM = _get_openai_model()
|
||||
OPTIMIZE_MODEL: LLM = _get_openai_model()
|
||||
RANKING_MODEL: LLM = _get_openai_model()
|
||||
|
||||
REFINEMENT_MODEL: LLM = _get_anthropic_model()
|
||||
EXPLANATIONS_MODEL: LLM = _get_anthropic_model()
|
||||
OPTIMIZATION_REVIEW_MODEL: LLM = _get_anthropic_model()
|
||||
CODE_REPAIR_MODEL: LLM = _get_anthropic_model()
|
||||
|
|
@ -21,7 +21,7 @@ from collections.abc import Callable
|
|||
from functools import wraps
|
||||
from typing import Any
|
||||
|
||||
from aiservice.models.aimodels import calculate_llm_cost
|
||||
from aiservice.llm import calculate_llm_cost
|
||||
from aiservice.observability.database import ErrorRecorder, LLMCallRecorder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@ from pydantic import ValidationError
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import CODE_REPAIR_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import CODE_REPAIR_MODEL, calculate_llm_cost, call_llm
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
from optimizer.models import OptimizedCandidateSource
|
||||
|
|
@ -33,7 +33,7 @@ if TYPE_CHECKING:
|
|||
ChatCompletionToolMessageParam,
|
||||
)
|
||||
|
||||
from aiservice.models.aimodels import LLM
|
||||
from aiservice.llm import LLM
|
||||
|
||||
code_repair_api = NinjaAPI(urls_namespace="code_repair")
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ from packaging import version
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXPLANATIONS_MODEL, LLM, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import EXPLANATIONS_MODEL, LLM, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.database import ErrorRecorder, LLMCallRecorder
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
|
@ -25,8 +25,6 @@ if TYPE_CHECKING:
|
|||
ChatCompletionToolMessageParam,
|
||||
)
|
||||
|
||||
from aiservice.models.aimodels import LLM
|
||||
|
||||
explanations_api = NinjaAPI(urls_namespace="explanations")
|
||||
explain_regex_pattern = re.compile(r"<explain>(.*)<\/explain>", re.DOTALL | re.IGNORECASE)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,14 +11,14 @@ from ninja import NinjaAPI, Schema
|
|||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
from packaging import version
|
||||
|
||||
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import OPTIMIZATION_REVIEW_MODEL, calculate_llm_cost
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import OPTIMIZATION_REVIEW_MODEL, LLMResponse, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost, update_optimization_features_review
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from aiservice.models.aimodels import LLM
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.llm import LLM
|
||||
|
||||
optimization_review_api = NinjaAPI(urls_namespace="optimization_review")
|
||||
|
||||
|
|
|
|||
|
|
@ -14,13 +14,8 @@ from pydantic import ValidationError
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
|
||||
from aiservice.env_specific import (
|
||||
LLMResponse,
|
||||
call_llm,
|
||||
debug_log_sensitive_data,
|
||||
debug_log_sensitive_data_from_callable,
|
||||
)
|
||||
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable
|
||||
from aiservice.llm import OPTIMIZE_MODEL, LLMResponse, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from authapp.user import get_user_by_id
|
||||
from log_features.log_event import log_optimization_event
|
||||
|
|
@ -37,7 +32,7 @@ from optimizer.models import OptimizedCandidateSource, OptimizeSchema # noqa: T
|
|||
if TYPE_CHECKING:
|
||||
from django.http import HttpRequest
|
||||
|
||||
from aiservice.models.aimodels import LLM
|
||||
from aiservice.llm import LLM
|
||||
|
||||
|
||||
optimizations_json = [
|
||||
|
|
|
|||
|
|
@ -10,13 +10,8 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import parse_python_version, validate_trace_id
|
||||
from aiservice.env_specific import (
|
||||
LLMResponse,
|
||||
call_llm,
|
||||
debug_log_sensitive_data,
|
||||
debug_log_sensitive_data_from_callable,
|
||||
)
|
||||
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable
|
||||
from aiservice.llm import OPTIMIZE_MODEL, LLMResponse, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
|
@ -34,7 +29,7 @@ if TYPE_CHECKING:
|
|||
ChatCompletionToolMessageParam,
|
||||
)
|
||||
|
||||
from aiservice.models.aimodels import LLM
|
||||
from aiservice.llm import LLM
|
||||
from optimizer.context_utils.optimizer_context import OptimizeResponseItemSchema
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ from pydantic import ValidationError
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import REFINEMENT_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import REFINEMENT_MODEL, LLMResponse, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
|
@ -29,7 +29,7 @@ if TYPE_CHECKING:
|
|||
ChatCompletionToolMessageParam,
|
||||
)
|
||||
|
||||
from aiservice.models.aimodels import LLM
|
||||
from aiservice.llm import LLM
|
||||
|
||||
|
||||
refinement_api = NinjaAPI(urls_namespace="refinement")
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import LLM, RANKING_MODEL, LLMResponse, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
|
@ -22,8 +22,6 @@ if TYPE_CHECKING:
|
|||
ChatCompletionToolMessageParam,
|
||||
)
|
||||
|
||||
from aiservice.models.aimodels import LLM
|
||||
|
||||
# from google import genai
|
||||
# from pydantic import BaseModel
|
||||
#
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from codeflash.code_utils.code_utils import ellipsis_in_ast, get_imports_from_fi
|
|||
from codeflash.models.models import TestsInFile
|
||||
from codeflash.verification.gen_regression_tests import print_message_delta, print_messages
|
||||
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL
|
||||
from aiservice.llm import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL
|
||||
|
||||
|
||||
def regression_tests_from_function_with_inspiration(
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ from pathlib import Path
|
|||
from typing import SupportsIndex
|
||||
|
||||
from aiservice.common_utils import parse_python_version, safe_isort
|
||||
from aiservice.env_specific import call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost, call_llm
|
||||
from aiservice.models.functions_to_optimize import FunctionToOptimize
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ from openai import OpenAIError
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
|
||||
from aiservice.env_specific import IS_PRODUCTION, LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL, calculate_llm_cost
|
||||
from aiservice.env_specific import IS_PRODUCTION, debug_log_sensitive_data
|
||||
from aiservice.llm import EXECUTE_MODEL, LLMResponse, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
|
@ -35,7 +35,7 @@ from testgen.postprocessing.postprocess_pipeline import postprocessing_testgen_p
|
|||
from testgen.testgen_context import BaseTestGenContext, TestGenContextData
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from aiservice.models.aimodels import LLM
|
||||
from aiservice.llm import LLM
|
||||
from authapp.auth import AuthBearer
|
||||
|
||||
testgen_api = NinjaAPI(urls_namespace="testgen")
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ from ninja import NinjaAPI, Schema
|
|||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import EXECUTE_MODEL, LLMResponse, call_llm
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
|
|||
Loading…
Reference in a new issue