unified
This commit is contained in:
parent
d2d4aa2a99
commit
e938448e60
8 changed files with 100 additions and 122 deletions
|
|
@ -79,9 +79,7 @@ async def code_repair( # noqa: D417
|
|||
] = [system_message, user_message]
|
||||
debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
|
||||
try:
|
||||
output = await call_llm(
|
||||
model_name=optimize_model.name, model_type=optimize_model.model_type, messages=messages
|
||||
)
|
||||
output = await call_llm(model_name=optimize_model.name, model_type=optimize_model.model_type, messages=messages)
|
||||
llm_cost = calculate_llm_cost(output.raw_response, optimize_model)
|
||||
except Exception as e:
|
||||
logging.exception("Claude Code Generation error in code_repair")
|
||||
|
|
|
|||
|
|
@ -246,9 +246,7 @@ async def explain_optimizations( # noqa: D417
|
|||
# LLM API call with error handling
|
||||
try:
|
||||
output = await call_llm(
|
||||
model_name=explanations_model.name,
|
||||
model_type=explanations_model.model_type,
|
||||
messages=messages,
|
||||
model_name=explanations_model.name, model_type=explanations_model.model_type, messages=messages
|
||||
)
|
||||
await update_optimization_cost(
|
||||
trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, explanations_model)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,12 @@ from pydantic import ValidationError
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
|
||||
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
|
||||
from aiservice.env_specific import (
|
||||
LLMResponse,
|
||||
call_llm,
|
||||
debug_log_sensitive_data,
|
||||
debug_log_sensitive_data_from_callable,
|
||||
)
|
||||
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from authapp.user import get_user_by_id
|
||||
|
|
@ -122,7 +127,7 @@ async def call_optimization_llm(
|
|||
n: int = 1,
|
||||
user_id: str | None = None,
|
||||
python_version: str | None = None,
|
||||
):
|
||||
) -> LLMResponse:
|
||||
"""Call LLM for code optimization with automatic observability.
|
||||
|
||||
This function is decorated with @observe_llm_call which automatically:
|
||||
|
|
@ -133,15 +138,8 @@ async def call_optimization_llm(
|
|||
|
||||
All observability runs in the background without blocking the LLM call.
|
||||
"""
|
||||
llm_client = llm_clients[model.model_type]
|
||||
if llm_client is None:
|
||||
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
|
||||
|
||||
return await llm_client.with_options(max_retries=3).chat.completions.create(
|
||||
model=model.name,
|
||||
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
||||
n=n,
|
||||
)
|
||||
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
|
||||
return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages, n=n)
|
||||
|
||||
|
||||
async def optimize_python_code(
|
||||
|
|
@ -206,17 +204,17 @@ async def optimize_python_code(
|
|||
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.source_code}")
|
||||
return [], None
|
||||
|
||||
llm_cost = calculate_llm_cost(output, optimize_model)
|
||||
llm_cost = calculate_llm_cost(output.raw_response, optimize_model)
|
||||
|
||||
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.model_dump_json(indent=2)}")
|
||||
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.raw_response.model_dump_json(indent=2)}")
|
||||
|
||||
if output.usage is not None:
|
||||
if output.raw_response.usage is not None:
|
||||
ph(
|
||||
user_id,
|
||||
"aiservice-optimize-openai-usage",
|
||||
properties={"model": optimize_model.name, "n": n, "usage": output.usage.json()},
|
||||
properties={"model": optimize_model.name, "n": n, "usage": output.raw_response.usage.json()},
|
||||
)
|
||||
results = [content for op in output.choices if (content := op.message.content)]
|
||||
results = output.all_contents if output.all_contents else [output.content]
|
||||
optimization_response_items: list[OptimizeResponseItemSchema] = []
|
||||
for result in results:
|
||||
ctx.extract_code_and_explanation_from_llm_res(result)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,12 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import parse_python_version, validate_trace_id
|
||||
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
|
||||
from aiservice.env_specific import (
|
||||
LLMResponse,
|
||||
call_llm,
|
||||
debug_log_sensitive_data,
|
||||
debug_log_sensitive_data_from_callable,
|
||||
)
|
||||
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
|
|
@ -51,7 +56,7 @@ async def call_line_profiler_llm(
|
|||
user_id: str | None = None,
|
||||
python_version: str | None = None,
|
||||
context: dict | None = None,
|
||||
):
|
||||
) -> LLMResponse:
|
||||
"""Call LLM for line profiler optimization with automatic observability.
|
||||
|
||||
This function is decorated with @observe_llm_call which automatically:
|
||||
|
|
@ -62,13 +67,7 @@ async def call_line_profiler_llm(
|
|||
|
||||
All observability runs in the background without blocking the LLM call.
|
||||
"""
|
||||
llm_client = llm_clients[model.model_type]
|
||||
if llm_client is None:
|
||||
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
|
||||
|
||||
return await llm_client.with_options(max_retries=3).chat.completions.create(
|
||||
model=model.name, messages=messages, n=n
|
||||
)
|
||||
return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages, n=n)
|
||||
|
||||
|
||||
async def optimize_python_code_line_profiler( # noqa: D417
|
||||
|
|
@ -131,22 +130,22 @@ async def optimize_python_code_line_profiler( # noqa: D417
|
|||
python_version=python_version_str,
|
||||
context={"lsp_mode": lsp_mode},
|
||||
)
|
||||
await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output, optimize_model))
|
||||
await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output.raw_response, optimize_model))
|
||||
except Exception as e:
|
||||
logging.exception("OpenAI Code Generation error in optimizer-line-profiler")
|
||||
sentry_sdk.capture_exception(e)
|
||||
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.source_code}")
|
||||
return []
|
||||
|
||||
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.model_dump_json(indent=2)}")
|
||||
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.raw_response.model_dump_json(indent=2)}")
|
||||
|
||||
if output.usage is not None:
|
||||
if output.raw_response.usage is not None:
|
||||
ph(
|
||||
user_id,
|
||||
"aiservice-optimize-line-profiler-openai-usage",
|
||||
properties={"model": optimize_model.name, "n": n, "usage": output.usage.json()},
|
||||
properties={"model": optimize_model.name, "n": n, "usage": output.raw_response.usage.json()},
|
||||
)
|
||||
results = [content for op in output.choices if (content := op.message.content)]
|
||||
results = output.all_contents if output.all_contents else [output.content]
|
||||
optimization_response_items: list[OptimizeResponseItemSchema] = []
|
||||
for result in results:
|
||||
ctx.extract_code_and_explanation_from_llm_res(result)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
|
||||
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
|
|
@ -78,7 +78,7 @@ Here are the function references
|
|||
@observe_llm_call("ranking")
|
||||
async def call_ranker_llm(
|
||||
trace_id: str, model: LLM, messages: list[dict[str, str]], user_id: str | None = None, context: dict | None = None
|
||||
):
|
||||
) -> LLMResponse:
|
||||
"""Call LLM for ranking with automatic observability.
|
||||
|
||||
This function is decorated with @observe_llm_call which automatically:
|
||||
|
|
@ -89,13 +89,7 @@ async def call_ranker_llm(
|
|||
|
||||
All observability runs in the background without blocking the LLM call.
|
||||
"""
|
||||
llm_client = llm_clients[model.model_type]
|
||||
if llm_client is None:
|
||||
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
|
||||
|
||||
return await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=model.name, messages=messages, n=1
|
||||
)
|
||||
return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages)
|
||||
|
||||
|
||||
async def rank_optimizations( # noqa: D417
|
||||
|
|
@ -149,21 +143,21 @@ async def rank_optimizations( # noqa: D417
|
|||
"python_version": data.python_version,
|
||||
},
|
||||
)
|
||||
await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, rank_model))
|
||||
await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, rank_model))
|
||||
except Exception as e: # noqa: BLE001
|
||||
debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
|
||||
sentry_sdk.capture_exception(e)
|
||||
return RankErrorResponseSchema(error=str(e))
|
||||
debug_log_sensitive_data(f"AIClient optimization response:\n{output}")
|
||||
if output.usage is not None:
|
||||
if output.raw_response.usage is not None:
|
||||
ph(
|
||||
user_id,
|
||||
"aiservice-optimize-openai-usage",
|
||||
properties={"model": rank_model.name, "n": 1, "usage": output.usage.model_dump_json()},
|
||||
properties={"model": rank_model.name, "n": 1, "usage": output.raw_response.usage.model_dump_json()},
|
||||
)
|
||||
# parse xml tag for explanation, ranking
|
||||
try:
|
||||
explanation_match = re.search(explain_regex_pattern, output.choices[0].message.content)
|
||||
explanation_match = re.search(explain_regex_pattern, output.content)
|
||||
explanation = explanation_match.group(1)
|
||||
except: # noqa: E722
|
||||
# TODO add logging instead of print("No explanation found")
|
||||
|
|
@ -174,7 +168,7 @@ async def rank_optimizations( # noqa: D417
|
|||
pass
|
||||
# still doing stuff instead of returning coz ranking is important
|
||||
try:
|
||||
ranking_match = re.search(rank_regex_pattern, output.choices[0].message.content)
|
||||
ranking_match = re.search(rank_regex_pattern, output.content)
|
||||
# TODO better parsing, could be only comma separated, need to handle all edge cases
|
||||
ranking = list(map(int, ranking_match.group(1).strip().split(",")))
|
||||
except: # noqa: E722
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from pathlib import Path
|
|||
from typing import SupportsIndex
|
||||
|
||||
from aiservice.common_utils import parse_python_version, safe_isort
|
||||
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
|
||||
from aiservice.env_specific import call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost
|
||||
from aiservice.models.functions_to_optimize import FunctionToOptimize
|
||||
from log_features.log_event import update_optimization_cost
|
||||
|
|
@ -141,25 +141,25 @@ async def generate_regression_tests_from_function(
|
|||
if print_text:
|
||||
print_messages(explain_messages)
|
||||
try:
|
||||
llm_client = llm_clients[explain_model.model_type]
|
||||
if llm_client is None:
|
||||
raise TestGenerationFailedException(f"LLM client for model type '{explain_model.model_type}' is not available")
|
||||
explanation_response = await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=explain_model.name, messages=explain_messages, temperature=temperature
|
||||
explanation_response = await call_llm(
|
||||
model_name=explain_model.name,
|
||||
model_type=explain_model.model_type,
|
||||
messages=explain_messages,
|
||||
temperature=temperature,
|
||||
)
|
||||
total_llm_cost += calculate_llm_cost(explanation_response, explain_model) or 0.0
|
||||
total_llm_cost += calculate_llm_cost(explanation_response.raw_response, explain_model) or 0.0
|
||||
except Exception as e:
|
||||
logging.exception("OpenAI client error in explain step")
|
||||
sentry_sdk.capture_exception(e)
|
||||
raise TestGenerationFailedException(e) from e
|
||||
debug_log_sensitive_data(f"OpenAIClient explanation response:\n{explanation_response.model_dump_json(indent=2)}")
|
||||
if explanation_response.usage is not None:
|
||||
debug_log_sensitive_data(f"OpenAIClient explanation response:\n{explanation_response.raw_response.model_dump_json(indent=2)}")
|
||||
if explanation_response.raw_response.usage is not None:
|
||||
ph(
|
||||
user_id,
|
||||
"aiservice-testgen-explain-openai-usage",
|
||||
properties={"model": explain_model.name, "usage": explanation_response.usage.json()},
|
||||
properties={"model": explain_model.name, "usage": explanation_response.raw_response.usage.json()},
|
||||
)
|
||||
explanation = explanation_response.choices[0].message.content
|
||||
explanation = explanation_response.content
|
||||
explain_assistant_message = {"role": "assistant", "content": explanation}
|
||||
|
||||
# Step 1b: Fetch relevant data from the database to use as inputs based on function explanation
|
||||
|
|
@ -170,19 +170,19 @@ async def generate_regression_tests_from_function(
|
|||
if print_text:
|
||||
print_messages(explain_messages)
|
||||
try:
|
||||
llm_client = llm_clients[execute_model.model_type]
|
||||
if llm_client is None:
|
||||
raise TestGenerationFailedException(f"LLM client for model type '{execute_model.model_type}' is not available")
|
||||
fetch_data_response = await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=execute_model.name, messages=fetch_data_messages, temperature=temperature
|
||||
fetch_data_response = await call_llm(
|
||||
model_name=execute_model.name,
|
||||
model_type=execute_model.model_type,
|
||||
messages=fetch_data_messages,
|
||||
temperature=temperature,
|
||||
)
|
||||
total_llm_cost += calculate_llm_cost(fetch_data_response, execute_model) or 0.0
|
||||
total_llm_cost += calculate_llm_cost(fetch_data_response.raw_response, execute_model) or 0.0
|
||||
except Exception as e:
|
||||
logging.exception("OpenAI client error in explain step")
|
||||
sentry_sdk.capture_exception(e)
|
||||
raise TestGenerationFailedException(e) from e
|
||||
|
||||
fetch_data_function = fetch_data_response.choices[0].message.content
|
||||
fetch_data_function = fetch_data_response.content
|
||||
fetch_data_function = fetch_data_function.split("```python")[1].split("```")[0].strip()
|
||||
|
||||
# Step 1c: Run the function to get the data
|
||||
|
|
@ -219,26 +219,26 @@ To help unit test the function above, list diverse scenarios that the function s
|
|||
if print_text:
|
||||
print_messages([plan_user_message])
|
||||
try:
|
||||
llm_client = llm_clients[plan_model.model_type]
|
||||
if llm_client is None:
|
||||
raise TestGenerationFailedException(f"LLM client for model type '{plan_model.model_type}' is not available")
|
||||
plan_response = await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=plan_model.name, messages=plan_messages, temperature=temperature
|
||||
plan_response = await call_llm(
|
||||
model_name=plan_model.name,
|
||||
model_type=plan_model.model_type,
|
||||
messages=plan_messages,
|
||||
temperature=temperature,
|
||||
)
|
||||
total_llm_cost += calculate_llm_cost(plan_response, plan_model) or 0.0
|
||||
total_llm_cost += calculate_llm_cost(plan_response.raw_response, plan_model) or 0.0
|
||||
except Exception as e:
|
||||
logging.exception("OpenAI client error in plan step")
|
||||
sentry_sdk.capture_exception(e)
|
||||
raise TestGenerationFailedException(e) from e
|
||||
debug_log_sensitive_data(f"OpenAIClient plan response:\n{plan_response.model_dump_json(indent=2)}")
|
||||
if plan_response.usage is not None:
|
||||
debug_log_sensitive_data(f"OpenAIClient plan response:\n{plan_response.raw_response.model_dump_json(indent=2)}")
|
||||
if plan_response.raw_response.usage is not None:
|
||||
ph(
|
||||
user_id,
|
||||
"aiservice-testgen-plan-openai-usage",
|
||||
properties={"model": plan_model.name, "usage": plan_response.usage.json()},
|
||||
properties={"model": plan_model.name, "usage": plan_response.raw_response.usage.json()},
|
||||
)
|
||||
|
||||
plan = plan_response.choices[0].message.content
|
||||
plan = plan_response.content
|
||||
plan_assistant_message = {"role": "assistant", "content": plan}
|
||||
|
||||
# Step 2b: If the plan is short, ask GPT to elaborate further
|
||||
|
|
@ -261,23 +261,23 @@ To help unit test the function above, list diverse scenarios that the function s
|
|||
if print_text:
|
||||
print_messages([elaboration_user_message])
|
||||
try:
|
||||
llm_client = llm_clients[plan_model.model_type]
|
||||
if llm_client is None:
|
||||
raise TestGenerationFailedException(f"LLM client for model type '{plan_model.model_type}' is not available")
|
||||
elaboration_response = await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=plan_model.name, messages=elaboration_messages, temperature=temperature
|
||||
elaboration_response = await call_llm(
|
||||
model_name=plan_model.name,
|
||||
model_type=plan_model.model_type,
|
||||
messages=elaboration_messages,
|
||||
temperature=temperature,
|
||||
)
|
||||
total_llm_cost += calculate_llm_cost(elaboration_response, plan_model) or 0.0
|
||||
total_llm_cost += calculate_llm_cost(elaboration_response.raw_response, plan_model) or 0.0
|
||||
except Exception as e:
|
||||
logging.exception("OpenAI client error in elaboration step")
|
||||
sentry_sdk.capture_exception(e)
|
||||
raise TestGenerationFailedException(e) from e
|
||||
|
||||
debug_log_sensitive_data(
|
||||
f"OpenAIClient elaboration response:\n{elaboration_response.model_dump_json(indent=2)}"
|
||||
f"OpenAIClient elaboration response:\n{elaboration_response.raw_response.model_dump_json(indent=2)}"
|
||||
)
|
||||
|
||||
elaboration = elaboration_response.choices[0].message.content
|
||||
elaboration = elaboration_response.content
|
||||
elaboration_assistant_message = {"role": "assistant", "content": elaboration}
|
||||
|
||||
# Step 3: Generate the unit test
|
||||
|
|
@ -314,25 +314,25 @@ To help unit test the function above, list diverse scenarios that the function s
|
|||
tries = 2
|
||||
while tries > 0:
|
||||
try:
|
||||
llm_client = llm_clients[execute_model.model_type]
|
||||
if llm_client is None:
|
||||
raise TestGenerationFailedException(f"LLM client for model type '{execute_model.model_type}' is not available")
|
||||
execute_response = await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=execute_model.name, messages=execute_messages, temperature=temperature
|
||||
execute_response = await call_llm(
|
||||
model_name=execute_model.name,
|
||||
model_type=execute_model.model_type,
|
||||
messages=execute_messages,
|
||||
temperature=temperature,
|
||||
)
|
||||
total_llm_cost += calculate_llm_cost(execute_response, execute_model) or 0.0
|
||||
total_llm_cost += calculate_llm_cost(execute_response.raw_response, execute_model) or 0.0
|
||||
except Exception as e:
|
||||
logging.exception("OpenAI client error in execute step")
|
||||
sentry_sdk.capture_exception(e)
|
||||
raise TestGenerationFailedException(e) from e
|
||||
debug_log_sensitive_data(f"OpenAIClient execute response:\n{execute_response.model_dump_json(indent=2)}")
|
||||
if execute_response.usage is not None:
|
||||
debug_log_sensitive_data(f"OpenAIClient execute response:\n{execute_response.raw_response.model_dump_json(indent=2)}")
|
||||
if execute_response.raw_response.usage is not None:
|
||||
ph(
|
||||
user_id,
|
||||
"aiservice-testgen-execute-openai-usage",
|
||||
properties={"model": execute_model.name, "usage": execute_response.usage.json()},
|
||||
properties={"model": execute_model.name, "usage": execute_response.raw_response.usage.json()},
|
||||
)
|
||||
execution_output = execute_response.choices[0].message.content
|
||||
execution_output = execute_response.content
|
||||
|
||||
# check the output for errors
|
||||
code = execution_output.split("```python")[1].split("```")[0].strip()
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from openai import OpenAIError
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
|
||||
from aiservice.env_specific import IS_PRODUCTION, debug_log_sensitive_data, llm_clients
|
||||
from aiservice.env_specific import IS_PRODUCTION, LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL, calculate_llm_cost
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
from log_features.log_event import update_optimization_cost
|
||||
|
|
@ -198,7 +198,7 @@ async def call_testgen_llm(
|
|||
temperature: float,
|
||||
user_id: str | None = None,
|
||||
python_version: str | None = None,
|
||||
):
|
||||
) -> LLMResponse:
|
||||
"""Call LLM for test generation with automatic observability.
|
||||
|
||||
This function is decorated with @observe_llm_call which automatically:
|
||||
|
|
@ -209,12 +209,8 @@ async def call_testgen_llm(
|
|||
|
||||
All observability runs in the background without blocking the LLM call.
|
||||
"""
|
||||
llm_client = llm_clients[model.model_type]
|
||||
if llm_client is None:
|
||||
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
|
||||
|
||||
return await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=model.name, messages=messages, temperature=temperature
|
||||
return await call_llm(
|
||||
model_name=model.name, model_type=model.model_type, messages=messages, temperature=temperature
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -242,24 +238,23 @@ async def generate_and_validate_test_code(
|
|||
python_version=".".join(str(v) for v in python_version),
|
||||
)
|
||||
|
||||
cost = calculate_llm_cost(response, execute_model) or 0.0
|
||||
cost = calculate_llm_cost(response.raw_response, execute_model) or 0.0
|
||||
cost_tracker.append(cost)
|
||||
|
||||
debug_log_sensitive_data(f"OpenAIClient {error_context}execute response:\n{response.model_dump_json(indent=2)}")
|
||||
debug_log_sensitive_data(
|
||||
f"OpenAIClient {error_context}execute response:\n{response.raw_response.model_dump_json(indent=2)}"
|
||||
)
|
||||
|
||||
if response.usage:
|
||||
if response.raw_response.usage:
|
||||
ph(
|
||||
user_id,
|
||||
f"aiservice-testgen-{posthog_event_suffix}execute-openai-usage",
|
||||
properties={"model": execute_model.name, "usage": response.usage.model_dump_json()},
|
||||
properties={"model": execute_model.name, "usage": response.raw_response.usage.model_dump_json()},
|
||||
)
|
||||
|
||||
# Parse and validate
|
||||
validated_code = parse_and_validate_llm_output(
|
||||
response_content=response.choices[0].message.content,
|
||||
ctx=ctx,
|
||||
python_version=python_version,
|
||||
error_context=error_context,
|
||||
response_content=response.content, ctx=ctx, python_version=python_version, error_context=error_context
|
||||
)
|
||||
|
||||
return validated_code
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from ninja import NinjaAPI, Schema
|
|||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
|
||||
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
|
||||
from aiservice.models.aimodels import EXECUTE_MODEL
|
||||
from aiservice.observability.decorators import observe_llm_call
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ async def call_workflow_gen_llm(
|
|||
n: int = 1,
|
||||
user_id: str | None = None,
|
||||
context: dict | None = None,
|
||||
):
|
||||
) -> LLMResponse:
|
||||
"""Call LLM for workflow generation with automatic observability.
|
||||
|
||||
This function is decorated with @observe_llm_call which automatically:
|
||||
|
|
@ -88,12 +88,8 @@ async def call_workflow_gen_llm(
|
|||
|
||||
All observability runs in the background without blocking the LLM call.
|
||||
"""
|
||||
llm_client = llm_clients[model.model_type]
|
||||
if llm_client is None:
|
||||
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
|
||||
|
||||
return await llm_client.with_options(max_retries=2).chat.completions.create(
|
||||
model=model.name, messages=messages, n=n, temperature=temperature
|
||||
return await call_llm(
|
||||
model_name=model.name, model_type=model.model_type, messages=messages, n=n, temperature=temperature
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -135,11 +131,11 @@ async def generate_workflow_steps_llm(
|
|||
context={"num_files": len(repo_files)},
|
||||
)
|
||||
|
||||
if not response.choices or not response.choices[0].message.content:
|
||||
if not response.content:
|
||||
logger.warning("LLM returned empty response for workflow generation")
|
||||
return None
|
||||
|
||||
response_text = response.choices[0].message.content.strip()
|
||||
response_text = response.content.strip()
|
||||
|
||||
# Extract YAML steps
|
||||
steps_yaml = _extract_yaml_steps(response_text)
|
||||
|
|
|
|||
Loading…
Reference in a new issue