lefovers
This commit is contained in:
parent
a03b22bab7
commit
ac566d6ad0
2 changed files with 6 additions and 68 deletions
|
|
@ -12,8 +12,6 @@ from anthropic import AsyncAnthropicFoundry
|
|||
from openai import AsyncOpenAI
|
||||
from pydantic.dataclasses import dataclass as pydantic_dataclass
|
||||
|
||||
from aiservice.observability.database import record_llm_call
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from anthropic.types import Message as AnthropicMessage
|
||||
from openai.types.chat import ChatCompletion
|
||||
|
|
@ -141,6 +139,8 @@ async def call_llm(
|
|||
context: dict[str, Any] | None = None,
|
||||
) -> LLMResponse:
|
||||
"""Call LLM with OpenAI or Anthropic client."""
|
||||
from aiservice.observability.database import record_llm_call # noqa: PLC0415
|
||||
|
||||
client = llm_clients[llm.model_type]
|
||||
if client is None:
|
||||
msg = f"LLM client for model type '{llm.model_type}' is not available"
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import sentry_sdk
|
||||
|
|
@ -14,7 +12,6 @@ from aiservice.analytics.posthog import ph
|
|||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
from aiservice.llm import EXPLANATIONS_MODEL, LLM, calculate_llm_cost, call_llm
|
||||
from aiservice.observability.database import ErrorRecorder, LLMCallRecorder
|
||||
from log_features.log_event import update_optimization_cost
|
||||
from log_features.log_features import log_features
|
||||
|
||||
|
|
@ -216,85 +213,26 @@ async def explain_optimizations( # noqa: D417
|
|||
| ChatCompletionFunctionMessageParam
|
||||
] = [system_message, user_message]
|
||||
|
||||
llm_recorder = LLMCallRecorder()
|
||||
error_recorder = ErrorRecorder()
|
||||
llm_call_id = None
|
||||
|
||||
obs_context: dict = {"optimization_id": data.optimization_id, "speedup": data.speedup}
|
||||
if data.call_sequence:
|
||||
obs_context["call_sequence"] = data.call_sequence
|
||||
|
||||
try:
|
||||
llm_call_id = await llm_recorder.record_llm_call_start(
|
||||
trace_id=data.trace_id,
|
||||
output = await call_llm(
|
||||
llm=explanations_model,
|
||||
messages=messages,
|
||||
call_type="explanation",
|
||||
model_name=explanations_model.name,
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=user_prompt,
|
||||
messages=[{"role": m["role"], "content": m["content"]} for m in messages],
|
||||
temperature=None,
|
||||
n_candidates=1,
|
||||
trace_id=data.trace_id,
|
||||
user_id=user_id,
|
||||
context=obs_context,
|
||||
)
|
||||
except Exception as obs_error:
|
||||
logging.warning(f"Observability recording failed (explain start): {obs_error}")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
output = await call_llm(llm=explanations_model, messages=messages)
|
||||
await update_optimization_cost(
|
||||
trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, explanations_model), user_id=user_id
|
||||
)
|
||||
except Exception as e:
|
||||
sentry_sdk.capture_exception(e)
|
||||
debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
|
||||
|
||||
try:
|
||||
error_context = {"model": explanations_model.name, "optimization_id": data.optimization_id}
|
||||
if data.call_sequence:
|
||||
error_context["call_sequence"] = data.call_sequence
|
||||
await error_recorder.record_error(
|
||||
trace_id=data.trace_id,
|
||||
error_type="llm_api",
|
||||
error_category="explanation_error",
|
||||
severity="error",
|
||||
error_message=str(e),
|
||||
error_code=type(e).__name__,
|
||||
context=error_context,
|
||||
)
|
||||
if llm_call_id:
|
||||
await llm_recorder.record_llm_call_completion(
|
||||
llm_call_id=llm_call_id,
|
||||
status="failed",
|
||||
error_type=type(e).__name__,
|
||||
error_message=str(e),
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
)
|
||||
except Exception as obs_error:
|
||||
logging.warning(f"Observability recording failed (explain error): {obs_error}")
|
||||
|
||||
return ExplanationsErrorResponseSchema(error=str(e))
|
||||
|
||||
cost = calculate_llm_cost(output.raw_response, explanations_model)
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
try:
|
||||
if llm_call_id:
|
||||
await llm_recorder.record_llm_call_completion(
|
||||
llm_call_id=llm_call_id,
|
||||
status="success",
|
||||
raw_response=output.content,
|
||||
prompt_tokens=output.usage.input_tokens if output.usage else None,
|
||||
completion_tokens=output.usage.output_tokens if output.usage else None,
|
||||
total_tokens=(output.usage.input_tokens + output.usage.output_tokens) if output.usage else None,
|
||||
llm_cost=cost,
|
||||
latency_ms=latency_ms,
|
||||
candidates_generated=1,
|
||||
)
|
||||
except Exception as obs_error:
|
||||
logging.warning(f"Observability recording failed (explain completion): {obs_error}")
|
||||
debug_log_sensitive_data(f"AIClient optimization response:\n{output.content}")
|
||||
if output.usage is not None:
|
||||
ph(
|
||||
|
|
|
|||
Loading…
Reference in a new issue