This commit is contained in:
Kevin Turcios 2025-12-26 15:21:46 -05:00
parent a03b22bab7
commit ac566d6ad0
2 changed files with 6 additions and 68 deletions

View file

@ -12,8 +12,6 @@ from anthropic import AsyncAnthropicFoundry
from openai import AsyncOpenAI
from pydantic.dataclasses import dataclass as pydantic_dataclass
from aiservice.observability.database import record_llm_call
if TYPE_CHECKING:
from anthropic.types import Message as AnthropicMessage
from openai.types.chat import ChatCompletion
@ -141,6 +139,8 @@ async def call_llm(
context: dict[str, Any] | None = None,
) -> LLMResponse:
"""Call LLM with OpenAI or Anthropic client."""
from aiservice.observability.database import record_llm_call # noqa: PLC0415
client = llm_clients[llm.model_type]
if client is None:
msg = f"LLM client for model type '{llm.model_type}' is not available"

View file

@ -1,8 +1,6 @@
from __future__ import annotations
import logging
import re
import time
from typing import TYPE_CHECKING
import sentry_sdk
@ -14,7 +12,6 @@ from aiservice.analytics.posthog import ph
from aiservice.common_utils import validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data
from aiservice.llm import EXPLANATIONS_MODEL, LLM, calculate_llm_cost, call_llm
from aiservice.observability.database import ErrorRecorder, LLMCallRecorder
from log_features.log_event import update_optimization_cost
from log_features.log_features import log_features
@ -216,85 +213,26 @@ async def explain_optimizations( # noqa: D417
| ChatCompletionFunctionMessageParam
] = [system_message, user_message]
llm_recorder = LLMCallRecorder()
error_recorder = ErrorRecorder()
llm_call_id = None
obs_context: dict = {"optimization_id": data.optimization_id, "speedup": data.speedup}
if data.call_sequence:
obs_context["call_sequence"] = data.call_sequence
try:
llm_call_id = await llm_recorder.record_llm_call_start(
trace_id=data.trace_id,
output = await call_llm(
llm=explanations_model,
messages=messages,
call_type="explanation",
model_name=explanations_model.name,
system_prompt=system_prompt,
user_prompt=user_prompt,
messages=[{"role": m["role"], "content": m["content"]} for m in messages],
temperature=None,
n_candidates=1,
trace_id=data.trace_id,
user_id=user_id,
context=obs_context,
)
except Exception as obs_error:
logging.warning(f"Observability recording failed (explain start): {obs_error}")
start_time = time.time()
try:
output = await call_llm(llm=explanations_model, messages=messages)
await update_optimization_cost(
trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, explanations_model), user_id=user_id
)
except Exception as e:
sentry_sdk.capture_exception(e)
debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
try:
error_context = {"model": explanations_model.name, "optimization_id": data.optimization_id}
if data.call_sequence:
error_context["call_sequence"] = data.call_sequence
await error_recorder.record_error(
trace_id=data.trace_id,
error_type="llm_api",
error_category="explanation_error",
severity="error",
error_message=str(e),
error_code=type(e).__name__,
context=error_context,
)
if llm_call_id:
await llm_recorder.record_llm_call_completion(
llm_call_id=llm_call_id,
status="failed",
error_type=type(e).__name__,
error_message=str(e),
latency_ms=int((time.time() - start_time) * 1000),
)
except Exception as obs_error:
logging.warning(f"Observability recording failed (explain error): {obs_error}")
return ExplanationsErrorResponseSchema(error=str(e))
cost = calculate_llm_cost(output.raw_response, explanations_model)
latency_ms = int((time.time() - start_time) * 1000)
try:
if llm_call_id:
await llm_recorder.record_llm_call_completion(
llm_call_id=llm_call_id,
status="success",
raw_response=output.content,
prompt_tokens=output.usage.input_tokens if output.usage else None,
completion_tokens=output.usage.output_tokens if output.usage else None,
total_tokens=(output.usage.input_tokens + output.usage.output_tokens) if output.usage else None,
llm_cost=cost,
latency_ms=latency_ms,
candidates_generated=1,
)
except Exception as obs_error:
logging.warning(f"Observability recording failed (explain completion): {obs_error}")
debug_log_sensitive_data(f"AIClient optimization response:\n{output.content}")
if output.usage is not None:
ph(