This commit is contained in:
Kevin Turcios 2025-12-22 23:28:29 -05:00
parent d2d4aa2a99
commit e938448e60
8 changed files with 100 additions and 122 deletions

View file

@ -79,9 +79,7 @@ async def code_repair( # noqa: D417
] = [system_message, user_message]
debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
try:
output = await call_llm(
model_name=optimize_model.name, model_type=optimize_model.model_type, messages=messages
)
output = await call_llm(model_name=optimize_model.name, model_type=optimize_model.model_type, messages=messages)
llm_cost = calculate_llm_cost(output.raw_response, optimize_model)
except Exception as e:
logging.exception("Claude Code Generation error in code_repair")

View file

@ -246,9 +246,7 @@ async def explain_optimizations( # noqa: D417
# LLM API call with error handling
try:
output = await call_llm(
model_name=explanations_model.name,
model_type=explanations_model.model_type,
messages=messages,
model_name=explanations_model.name, model_type=explanations_model.model_type, messages=messages
)
await update_optimization_cost(
trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, explanations_model)

View file

@ -14,7 +14,12 @@ from pydantic import ValidationError
from aiservice.analytics.posthog import ph
from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
from aiservice.env_specific import (
LLMResponse,
call_llm,
debug_log_sensitive_data,
debug_log_sensitive_data_from_callable,
)
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
from aiservice.observability.decorators import observe_llm_call
from authapp.user import get_user_by_id
@ -122,7 +127,7 @@ async def call_optimization_llm(
n: int = 1,
user_id: str | None = None,
python_version: str | None = None,
):
) -> LLMResponse:
"""Call LLM for code optimization with automatic observability.
This function is decorated with @observe_llm_call which automatically:
@ -133,15 +138,8 @@ async def call_optimization_llm(
All observability runs in the background without blocking the LLM call.
"""
llm_client = llm_clients[model.model_type]
if llm_client is None:
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
return await llm_client.with_options(max_retries=3).chat.completions.create(
model=model.name,
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
n=n,
)
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages, n=n)
async def optimize_python_code(
@ -206,17 +204,17 @@ async def optimize_python_code(
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.source_code}")
return [], None
llm_cost = calculate_llm_cost(output, optimize_model)
llm_cost = calculate_llm_cost(output.raw_response, optimize_model)
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.model_dump_json(indent=2)}")
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.raw_response.model_dump_json(indent=2)}")
if output.usage is not None:
if output.raw_response.usage is not None:
ph(
user_id,
"aiservice-optimize-openai-usage",
properties={"model": optimize_model.name, "n": n, "usage": output.usage.json()},
properties={"model": optimize_model.name, "n": n, "usage": output.raw_response.usage.json()},
)
results = [content for op in output.choices if (content := op.message.content)]
results = output.all_contents if output.all_contents else [output.content]
optimization_response_items: list[OptimizeResponseItemSchema] = []
for result in results:
ctx.extract_code_and_explanation_from_llm_res(result)

View file

@ -10,7 +10,12 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs
from aiservice.analytics.posthog import ph
from aiservice.common_utils import parse_python_version, validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
from aiservice.env_specific import (
LLMResponse,
call_llm,
debug_log_sensitive_data,
debug_log_sensitive_data_from_callable,
)
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
from aiservice.observability.decorators import observe_llm_call
from log_features.log_event import update_optimization_cost
@ -51,7 +56,7 @@ async def call_line_profiler_llm(
user_id: str | None = None,
python_version: str | None = None,
context: dict | None = None,
):
) -> LLMResponse:
"""Call LLM for line profiler optimization with automatic observability.
This function is decorated with @observe_llm_call which automatically:
@ -62,13 +67,7 @@ async def call_line_profiler_llm(
All observability runs in the background without blocking the LLM call.
"""
llm_client = llm_clients[model.model_type]
if llm_client is None:
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
return await llm_client.with_options(max_retries=3).chat.completions.create(
model=model.name, messages=messages, n=n
)
return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages, n=n)
async def optimize_python_code_line_profiler( # noqa: D417
@ -131,22 +130,22 @@ async def optimize_python_code_line_profiler( # noqa: D417
python_version=python_version_str,
context={"lsp_mode": lsp_mode},
)
await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output, optimize_model))
await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output.raw_response, optimize_model))
except Exception as e:
logging.exception("OpenAI Code Generation error in optimizer-line-profiler")
sentry_sdk.capture_exception(e)
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.source_code}")
return []
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.model_dump_json(indent=2)}")
debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.raw_response.model_dump_json(indent=2)}")
if output.usage is not None:
if output.raw_response.usage is not None:
ph(
user_id,
"aiservice-optimize-line-profiler-openai-usage",
properties={"model": optimize_model.name, "n": n, "usage": output.usage.json()},
properties={"model": optimize_model.name, "n": n, "usage": output.raw_response.usage.json()},
)
results = [content for op in output.choices if (content := op.message.content)]
results = output.all_contents if output.all_contents else [output.content]
optimization_response_items: list[OptimizeResponseItemSchema] = []
for result in results:
ctx.extract_code_and_explanation_from_llm_res(result)

View file

@ -9,7 +9,7 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs
from aiservice.analytics.posthog import ph
from aiservice.common_utils import validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
from aiservice.observability.decorators import observe_llm_call
from log_features.log_event import update_optimization_cost
@ -78,7 +78,7 @@ Here are the function references
@observe_llm_call("ranking")
async def call_ranker_llm(
trace_id: str, model: LLM, messages: list[dict[str, str]], user_id: str | None = None, context: dict | None = None
):
) -> LLMResponse:
"""Call LLM for ranking with automatic observability.
This function is decorated with @observe_llm_call which automatically:
@ -89,13 +89,7 @@ async def call_ranker_llm(
All observability runs in the background without blocking the LLM call.
"""
llm_client = llm_clients[model.model_type]
if llm_client is None:
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
return await llm_client.with_options(max_retries=2).chat.completions.create(
model=model.name, messages=messages, n=1
)
return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages)
async def rank_optimizations( # noqa: D417
@ -149,21 +143,21 @@ async def rank_optimizations( # noqa: D417
"python_version": data.python_version,
},
)
await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, rank_model))
await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, rank_model))
except Exception as e: # noqa: BLE001
debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
sentry_sdk.capture_exception(e)
return RankErrorResponseSchema(error=str(e))
debug_log_sensitive_data(f"AIClient optimization response:\n{output}")
if output.usage is not None:
if output.raw_response.usage is not None:
ph(
user_id,
"aiservice-optimize-openai-usage",
properties={"model": rank_model.name, "n": 1, "usage": output.usage.model_dump_json()},
properties={"model": rank_model.name, "n": 1, "usage": output.raw_response.usage.model_dump_json()},
)
# parse xml tag for explanation, ranking
try:
explanation_match = re.search(explain_regex_pattern, output.choices[0].message.content)
explanation_match = re.search(explain_regex_pattern, output.content)
explanation = explanation_match.group(1)
except: # noqa: E722
# TODO add logging instead of print("No explanation found")
@ -174,7 +168,7 @@ async def rank_optimizations( # noqa: D417
pass
# still doing stuff instead of returning coz ranking is important
try:
ranking_match = re.search(rank_regex_pattern, output.choices[0].message.content)
ranking_match = re.search(rank_regex_pattern, output.content)
# TODO better parsing, could be only comma separated, need to handle all edge cases
ranking = list(map(int, ranking_match.group(1).strip().split(",")))
except: # noqa: E722

View file

@ -8,7 +8,7 @@ from pathlib import Path
from typing import SupportsIndex
from aiservice.common_utils import parse_python_version, safe_isort
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
from aiservice.env_specific import call_llm, debug_log_sensitive_data
from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost
from aiservice.models.functions_to_optimize import FunctionToOptimize
from log_features.log_event import update_optimization_cost
@ -141,25 +141,25 @@ async def generate_regression_tests_from_function(
if print_text:
print_messages(explain_messages)
try:
llm_client = llm_clients[explain_model.model_type]
if llm_client is None:
raise TestGenerationFailedException(f"LLM client for model type '{explain_model.model_type}' is not available")
explanation_response = await llm_client.with_options(max_retries=2).chat.completions.create(
model=explain_model.name, messages=explain_messages, temperature=temperature
explanation_response = await call_llm(
model_name=explain_model.name,
model_type=explain_model.model_type,
messages=explain_messages,
temperature=temperature,
)
total_llm_cost += calculate_llm_cost(explanation_response, explain_model) or 0.0
total_llm_cost += calculate_llm_cost(explanation_response.raw_response, explain_model) or 0.0
except Exception as e:
logging.exception("OpenAI client error in explain step")
sentry_sdk.capture_exception(e)
raise TestGenerationFailedException(e) from e
debug_log_sensitive_data(f"OpenAIClient explanation response:\n{explanation_response.model_dump_json(indent=2)}")
if explanation_response.usage is not None:
debug_log_sensitive_data(f"OpenAIClient explanation response:\n{explanation_response.raw_response.model_dump_json(indent=2)}")
if explanation_response.raw_response.usage is not None:
ph(
user_id,
"aiservice-testgen-explain-openai-usage",
properties={"model": explain_model.name, "usage": explanation_response.usage.json()},
properties={"model": explain_model.name, "usage": explanation_response.raw_response.usage.json()},
)
explanation = explanation_response.choices[0].message.content
explanation = explanation_response.content
explain_assistant_message = {"role": "assistant", "content": explanation}
# Step 1b: Fetch relevant data from the database to use as inputs based on function explanation
@ -170,19 +170,19 @@ async def generate_regression_tests_from_function(
if print_text:
print_messages(explain_messages)
try:
llm_client = llm_clients[execute_model.model_type]
if llm_client is None:
raise TestGenerationFailedException(f"LLM client for model type '{execute_model.model_type}' is not available")
fetch_data_response = await llm_client.with_options(max_retries=2).chat.completions.create(
model=execute_model.name, messages=fetch_data_messages, temperature=temperature
fetch_data_response = await call_llm(
model_name=execute_model.name,
model_type=execute_model.model_type,
messages=fetch_data_messages,
temperature=temperature,
)
total_llm_cost += calculate_llm_cost(fetch_data_response, execute_model) or 0.0
total_llm_cost += calculate_llm_cost(fetch_data_response.raw_response, execute_model) or 0.0
except Exception as e:
logging.exception("OpenAI client error in explain step")
sentry_sdk.capture_exception(e)
raise TestGenerationFailedException(e) from e
fetch_data_function = fetch_data_response.choices[0].message.content
fetch_data_function = fetch_data_response.content
fetch_data_function = fetch_data_function.split("```python")[1].split("```")[0].strip()
# Step 1c: Run the function to get the data
@ -219,26 +219,26 @@ To help unit test the function above, list diverse scenarios that the function s
if print_text:
print_messages([plan_user_message])
try:
llm_client = llm_clients[plan_model.model_type]
if llm_client is None:
raise TestGenerationFailedException(f"LLM client for model type '{plan_model.model_type}' is not available")
plan_response = await llm_client.with_options(max_retries=2).chat.completions.create(
model=plan_model.name, messages=plan_messages, temperature=temperature
plan_response = await call_llm(
model_name=plan_model.name,
model_type=plan_model.model_type,
messages=plan_messages,
temperature=temperature,
)
total_llm_cost += calculate_llm_cost(plan_response, plan_model) or 0.0
total_llm_cost += calculate_llm_cost(plan_response.raw_response, plan_model) or 0.0
except Exception as e:
logging.exception("OpenAI client error in plan step")
sentry_sdk.capture_exception(e)
raise TestGenerationFailedException(e) from e
debug_log_sensitive_data(f"OpenAIClient plan response:\n{plan_response.model_dump_json(indent=2)}")
if plan_response.usage is not None:
debug_log_sensitive_data(f"OpenAIClient plan response:\n{plan_response.raw_response.model_dump_json(indent=2)}")
if plan_response.raw_response.usage is not None:
ph(
user_id,
"aiservice-testgen-plan-openai-usage",
properties={"model": plan_model.name, "usage": plan_response.usage.json()},
properties={"model": plan_model.name, "usage": plan_response.raw_response.usage.json()},
)
plan = plan_response.choices[0].message.content
plan = plan_response.content
plan_assistant_message = {"role": "assistant", "content": plan}
# Step 2b: If the plan is short, ask GPT to elaborate further
@ -261,23 +261,23 @@ To help unit test the function above, list diverse scenarios that the function s
if print_text:
print_messages([elaboration_user_message])
try:
llm_client = llm_clients[plan_model.model_type]
if llm_client is None:
raise TestGenerationFailedException(f"LLM client for model type '{plan_model.model_type}' is not available")
elaboration_response = await llm_client.with_options(max_retries=2).chat.completions.create(
model=plan_model.name, messages=elaboration_messages, temperature=temperature
elaboration_response = await call_llm(
model_name=plan_model.name,
model_type=plan_model.model_type,
messages=elaboration_messages,
temperature=temperature,
)
total_llm_cost += calculate_llm_cost(elaboration_response, plan_model) or 0.0
total_llm_cost += calculate_llm_cost(elaboration_response.raw_response, plan_model) or 0.0
except Exception as e:
logging.exception("OpenAI client error in elaboration step")
sentry_sdk.capture_exception(e)
raise TestGenerationFailedException(e) from e
debug_log_sensitive_data(
f"OpenAIClient elaboration response:\n{elaboration_response.model_dump_json(indent=2)}"
f"OpenAIClient elaboration response:\n{elaboration_response.raw_response.model_dump_json(indent=2)}"
)
elaboration = elaboration_response.choices[0].message.content
elaboration = elaboration_response.content
elaboration_assistant_message = {"role": "assistant", "content": elaboration}
# Step 3: Generate the unit test
@ -314,25 +314,25 @@ To help unit test the function above, list diverse scenarios that the function s
tries = 2
while tries > 0:
try:
llm_client = llm_clients[execute_model.model_type]
if llm_client is None:
raise TestGenerationFailedException(f"LLM client for model type '{execute_model.model_type}' is not available")
execute_response = await llm_client.with_options(max_retries=2).chat.completions.create(
model=execute_model.name, messages=execute_messages, temperature=temperature
execute_response = await call_llm(
model_name=execute_model.name,
model_type=execute_model.model_type,
messages=execute_messages,
temperature=temperature,
)
total_llm_cost += calculate_llm_cost(execute_response, execute_model) or 0.0
total_llm_cost += calculate_llm_cost(execute_response.raw_response, execute_model) or 0.0
except Exception as e:
logging.exception("OpenAI client error in execute step")
sentry_sdk.capture_exception(e)
raise TestGenerationFailedException(e) from e
debug_log_sensitive_data(f"OpenAIClient execute response:\n{execute_response.model_dump_json(indent=2)}")
if execute_response.usage is not None:
debug_log_sensitive_data(f"OpenAIClient execute response:\n{execute_response.raw_response.model_dump_json(indent=2)}")
if execute_response.raw_response.usage is not None:
ph(
user_id,
"aiservice-testgen-execute-openai-usage",
properties={"model": execute_model.name, "usage": execute_response.usage.json()},
properties={"model": execute_model.name, "usage": execute_response.raw_response.usage.json()},
)
execution_output = execute_response.choices[0].message.content
execution_output = execute_response.content
# check the output for errors
code = execution_output.split("```python")[1].split("```")[0].strip()

View file

@ -16,7 +16,7 @@ from openai import OpenAIError
from aiservice.analytics.posthog import ph
from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
from aiservice.env_specific import IS_PRODUCTION, debug_log_sensitive_data, llm_clients
from aiservice.env_specific import IS_PRODUCTION, LLMResponse, call_llm, debug_log_sensitive_data
from aiservice.models.aimodels import EXECUTE_MODEL, calculate_llm_cost
from aiservice.observability.decorators import observe_llm_call
from log_features.log_event import update_optimization_cost
@ -198,7 +198,7 @@ async def call_testgen_llm(
temperature: float,
user_id: str | None = None,
python_version: str | None = None,
):
) -> LLMResponse:
"""Call LLM for test generation with automatic observability.
This function is decorated with @observe_llm_call which automatically:
@ -209,12 +209,8 @@ async def call_testgen_llm(
All observability runs in the background without blocking the LLM call.
"""
llm_client = llm_clients[model.model_type]
if llm_client is None:
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
return await llm_client.with_options(max_retries=2).chat.completions.create(
model=model.name, messages=messages, temperature=temperature
return await call_llm(
model_name=model.name, model_type=model.model_type, messages=messages, temperature=temperature
)
@ -242,24 +238,23 @@ async def generate_and_validate_test_code(
python_version=".".join(str(v) for v in python_version),
)
cost = calculate_llm_cost(response, execute_model) or 0.0
cost = calculate_llm_cost(response.raw_response, execute_model) or 0.0
cost_tracker.append(cost)
debug_log_sensitive_data(f"OpenAIClient {error_context}execute response:\n{response.model_dump_json(indent=2)}")
debug_log_sensitive_data(
f"OpenAIClient {error_context}execute response:\n{response.raw_response.model_dump_json(indent=2)}"
)
if response.usage:
if response.raw_response.usage:
ph(
user_id,
f"aiservice-testgen-{posthog_event_suffix}execute-openai-usage",
properties={"model": execute_model.name, "usage": response.usage.model_dump_json()},
properties={"model": execute_model.name, "usage": response.raw_response.usage.model_dump_json()},
)
# Parse and validate
validated_code = parse_and_validate_llm_output(
response_content=response.choices[0].message.content,
ctx=ctx,
python_version=python_version,
error_context=error_context,
response_content=response.content, ctx=ctx, python_version=python_version, error_context=error_context
)
return validated_code

View file

@ -11,7 +11,7 @@ from ninja import NinjaAPI, Schema
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
from aiservice.analytics.posthog import ph
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
from aiservice.models.aimodels import EXECUTE_MODEL
from aiservice.observability.decorators import observe_llm_call
@ -77,7 +77,7 @@ async def call_workflow_gen_llm(
n: int = 1,
user_id: str | None = None,
context: dict | None = None,
):
) -> LLMResponse:
"""Call LLM for workflow generation with automatic observability.
This function is decorated with @observe_llm_call which automatically:
@ -88,12 +88,8 @@ async def call_workflow_gen_llm(
All observability runs in the background without blocking the LLM call.
"""
llm_client = llm_clients[model.model_type]
if llm_client is None:
raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
return await llm_client.with_options(max_retries=2).chat.completions.create(
model=model.name, messages=messages, n=n, temperature=temperature
return await call_llm(
model_name=model.name, model_type=model.model_type, messages=messages, n=n, temperature=temperature
)
@ -135,11 +131,11 @@ async def generate_workflow_steps_llm(
context={"num_files": len(repo_files)},
)
if not response.choices or not response.choices[0].message.content:
if not response.content:
logger.warning("LLM returned empty response for workflow generation")
return None
response_text = response.choices[0].message.content.strip()
response_text = response.content.strip()
# Extract YAML steps
steps_yaml = _extract_yaml_steps(response_text)