unified

2025-12-22 23:28:29 -05:00 · 2025-12-22 23:28:29 -05:00 · e938448e60
commit e938448e60
parent d2d4aa2a99
8 changed files with 100 additions and 122 deletions
--- a/django/aiservice/code_repair/code_repair.py
+++ b/django/aiservice/code_repair/code_repair.py
@ -79,9 +79,7 @@ async def code_repair(  # noqa: D417
    ] = [system_message, user_message]
    debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
    try:
-        output = await call_llm(
-            model_name=optimize_model.name, model_type=optimize_model.model_type, messages=messages
-        )
+        output = await call_llm(model_name=optimize_model.name, model_type=optimize_model.model_type, messages=messages)
        llm_cost = calculate_llm_cost(output.raw_response, optimize_model)
    except Exception as e:
        logging.exception("Claude Code Generation error in code_repair")
--- a/django/aiservice/explanations/explanations.py
+++ b/django/aiservice/explanations/explanations.py
@ -246,9 +246,7 @@ async def explain_optimizations(  # noqa: D417
    # LLM API call with error handling
    try:
        output = await call_llm(
-            model_name=explanations_model.name,
-            model_type=explanations_model.model_type,
-            messages=messages,
+            model_name=explanations_model.name, model_type=explanations_model.model_type, messages=messages
        )
        await update_optimization_cost(
            trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, explanations_model)
--- a/django/aiservice/optimizer/optimizer.py
+++ b/django/aiservice/optimizer/optimizer.py
@ -14,7 +14,12 @@ from pydantic import ValidationError

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
-from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
+from aiservice.env_specific import (
+    LLMResponse,
+    call_llm,
+    debug_log_sensitive_data,
+    debug_log_sensitive_data_from_callable,
+)
 from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
 from aiservice.observability.decorators import observe_llm_call
 from authapp.user import get_user_by_id
@ -122,7 +127,7 @@ async def call_optimization_llm(
    n: int = 1,
    user_id: str | None = None,
    python_version: str | None = None,
-):
+) -> LLMResponse:
    """Call LLM for code optimization with automatic observability.

    This function is decorated with @observe_llm_call which automatically:
@ -133,15 +138,8 @@ async def call_optimization_llm(

    All observability runs in the background without blocking the LLM call.
    """
-    llm_client = llm_clients[model.model_type]
-    if llm_client is None:
-        raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
-
-    return await llm_client.with_options(max_retries=3).chat.completions.create(
-        model=model.name,
-        messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
-        n=n,
-    )
+    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
+    return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages, n=n)


 async def optimize_python_code(
@ -206,17 +204,17 @@ async def optimize_python_code(
        debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.source_code}")
        return [], None

-    llm_cost = calculate_llm_cost(output, optimize_model)
+    llm_cost = calculate_llm_cost(output.raw_response, optimize_model)

-    debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.model_dump_json(indent=2)}")
+    debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.raw_response.model_dump_json(indent=2)}")

-    if output.usage is not None:
+    if output.raw_response.usage is not None:
        ph(
            user_id,
            "aiservice-optimize-openai-usage",
-            properties={"model": optimize_model.name, "n": n, "usage": output.usage.json()},
+            properties={"model": optimize_model.name, "n": n, "usage": output.raw_response.usage.json()},
        )
-    results = [content for op in output.choices if (content := op.message.content)]
+    results = output.all_contents if output.all_contents else [output.content]
    optimization_response_items: list[OptimizeResponseItemSchema] = []
    for result in results:
        ctx.extract_code_and_explanation_from_llm_res(result)
--- a/django/aiservice/optimizer/optimizer_line_profiler.py
+++ b/django/aiservice/optimizer/optimizer_line_profiler.py
@ -10,7 +10,12 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, validate_trace_id
-from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
+from aiservice.env_specific import (
+    LLMResponse,
+    call_llm,
+    debug_log_sensitive_data,
+    debug_log_sensitive_data_from_callable,
+)
 from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
@ -51,7 +56,7 @@ async def call_line_profiler_llm(
    user_id: str | None = None,
    python_version: str | None = None,
    context: dict | None = None,
-):
+) -> LLMResponse:
    """Call LLM for line profiler optimization with automatic observability.

    This function is decorated with @observe_llm_call which automatically:
@ -62,13 +67,7 @@ async def call_line_profiler_llm(

    All observability runs in the background without blocking the LLM call.
    """
-    llm_client = llm_clients[model.model_type]
-    if llm_client is None:
-        raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
-
-    return await llm_client.with_options(max_retries=3).chat.completions.create(
-        model=model.name, messages=messages, n=n
-    )
+    return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages, n=n)


 async def optimize_python_code_line_profiler(  # noqa: D417
@ -131,22 +130,22 @@ async def optimize_python_code_line_profiler(  # noqa: D417
            python_version=python_version_str,
            context={"lsp_mode": lsp_mode},
        )
-        await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output, optimize_model))
+        await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output.raw_response, optimize_model))
    except Exception as e:
        logging.exception("OpenAI Code Generation error in optimizer-line-profiler")
        sentry_sdk.capture_exception(e)
        debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.source_code}")
        return []

-    debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.model_dump_json(indent=2)}")
+    debug_log_sensitive_data(f"OpenAIClient optimization response:\n{output.raw_response.model_dump_json(indent=2)}")

-    if output.usage is not None:
+    if output.raw_response.usage is not None:
        ph(
            user_id,
            "aiservice-optimize-line-profiler-openai-usage",
-            properties={"model": optimize_model.name, "n": n, "usage": output.usage.json()},
+            properties={"model": optimize_model.name, "n": n, "usage": output.raw_response.usage.json()},
        )
-    results = [content for op in output.choices if (content := op.message.content)]
+    results = output.all_contents if output.all_contents else [output.content]
    optimization_response_items: list[OptimizeResponseItemSchema] = []
    for result in results:
        ctx.extract_code_and_explanation_from_llm_res(result)
--- a/django/aiservice/ranker/ranker.py
+++ b/django/aiservice/ranker/ranker.py
@ -9,7 +9,7 @@ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUs

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import debug_log_sensitive_data, llm_clients
+from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
 from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
@ -78,7 +78,7 @@ Here are the function references
@observe_llm_call("ranking")
 async def call_ranker_llm(
    trace_id: str, model: LLM, messages: list[dict[str, str]], user_id: str | None = None, context: dict | None = None
-):
+) -> LLMResponse:
    """Call LLM for ranking with automatic observability.

    This function is decorated with @observe_llm_call which automatically:
@ -89,13 +89,7 @@ async def call_ranker_llm(

    All observability runs in the background without blocking the LLM call.
    """
-    llm_client = llm_clients[model.model_type]
-    if llm_client is None:
-        raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
-
-    return await llm_client.with_options(max_retries=2).chat.completions.create(
-        model=model.name, messages=messages, n=1
-    )
+    return await call_llm(model_name=model.name, model_type=model.model_type, messages=messages)


 async def rank_optimizations(  # noqa: D417
@ -149,21 +143,21 @@ async def rank_optimizations(  # noqa: D417
                "python_version": data.python_version,
            },
        )
-        await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, rank_model))
+        await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output.raw_response, rank_model))
    except Exception as e:  # noqa: BLE001
        debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
        sentry_sdk.capture_exception(e)
        return RankErrorResponseSchema(error=str(e))
    debug_log_sensitive_data(f"AIClient optimization response:\n{output}")
-    if output.usage is not None:
+    if output.raw_response.usage is not None:
        ph(
            user_id,
            "aiservice-optimize-openai-usage",
-            properties={"model": rank_model.name, "n": 1, "usage": output.usage.model_dump_json()},
+            properties={"model": rank_model.name, "n": 1, "usage": output.raw_response.usage.model_dump_json()},
        )
    # parse xml tag for explanation, ranking
    try:
-        explanation_match = re.search(explain_regex_pattern, output.choices[0].message.content)
+        explanation_match = re.search(explain_regex_pattern, output.content)
        explanation = explanation_match.group(1)
    except:  # noqa: E722
        # TODO add logging instead of print("No explanation found")
@ -174,7 +168,7 @@ async def rank_optimizations(  # noqa: D417
        pass
        # still doing stuff instead of returning coz ranking is important
    try:
-        ranking_match = re.search(rank_regex_pattern, output.choices[0].message.content)
+        ranking_match = re.search(rank_regex_pattern, output.content)
        # TODO better parsing, could be only comma separated, need to handle all edge cases
        ranking = list(map(int, ranking_match.group(1).strip().split(",")))
    except:  # noqa: E722
--- a/django/aiservice/testgen/sqlalchemy/sqlalchemy_testgen.py
+++ b/django/aiservice/testgen/sqlalchemy/sqlalchemy_testgen.py
@ -8,7 +8,7 @@ from pathlib import Path
 from typing import SupportsIndex

 from aiservice.common_utils import parse_python_version, safe_isort
-from aiservice.env_specific import debug_log_sensitive_data, llm_clients
+from aiservice.env_specific import call_llm, debug_log_sensitive_data
 from aiservice.models.aimodels import EXECUTE_MODEL, EXPLAIN_MODEL, LLM, PLAN_MODEL, calculate_llm_cost
 from aiservice.models.functions_to_optimize import FunctionToOptimize
 from log_features.log_event import update_optimization_cost
@ -141,25 +141,25 @@ async def generate_regression_tests_from_function(
    if print_text:
        print_messages(explain_messages)
    try:
-        llm_client = llm_clients[explain_model.model_type]
-        if llm_client is None:
-            raise TestGenerationFailedException(f"LLM client for model type '{explain_model.model_type}' is not available")
-        explanation_response = await llm_client.with_options(max_retries=2).chat.completions.create(
-            model=explain_model.name, messages=explain_messages, temperature=temperature
+        explanation_response = await call_llm(
+            model_name=explain_model.name,
+            model_type=explain_model.model_type,
+            messages=explain_messages,
+            temperature=temperature,
        )
-        total_llm_cost += calculate_llm_cost(explanation_response, explain_model) or 0.0
+        total_llm_cost += calculate_llm_cost(explanation_response.raw_response, explain_model) or 0.0
    except Exception as e:
        logging.exception("OpenAI client error in explain step")
        sentry_sdk.capture_exception(e)
        raise TestGenerationFailedException(e) from e
-    debug_log_sensitive_data(f"OpenAIClient explanation response:\n{explanation_response.model_dump_json(indent=2)}")
-    if explanation_response.usage is not None:
+    debug_log_sensitive_data(f"OpenAIClient explanation response:\n{explanation_response.raw_response.model_dump_json(indent=2)}")
+    if explanation_response.raw_response.usage is not None:
        ph(
            user_id,
            "aiservice-testgen-explain-openai-usage",
-            properties={"model": explain_model.name, "usage": explanation_response.usage.json()},
+            properties={"model": explain_model.name, "usage": explanation_response.raw_response.usage.json()},
        )
-    explanation = explanation_response.choices[0].message.content
+    explanation = explanation_response.content
    explain_assistant_message = {"role": "assistant", "content": explanation}

    # Step 1b: Fetch relevant data from the database to use as inputs based on function explanation
@ -170,19 +170,19 @@ async def generate_regression_tests_from_function(
    if print_text:
        print_messages(explain_messages)
    try:
-        llm_client = llm_clients[execute_model.model_type]
-        if llm_client is None:
-            raise TestGenerationFailedException(f"LLM client for model type '{execute_model.model_type}' is not available")
-        fetch_data_response = await llm_client.with_options(max_retries=2).chat.completions.create(
-            model=execute_model.name, messages=fetch_data_messages, temperature=temperature
+        fetch_data_response = await call_llm(
+            model_name=execute_model.name,
+            model_type=execute_model.model_type,
+            messages=fetch_data_messages,
+            temperature=temperature,
        )
-        total_llm_cost += calculate_llm_cost(fetch_data_response, execute_model) or 0.0
+        total_llm_cost += calculate_llm_cost(fetch_data_response.raw_response, execute_model) or 0.0
    except Exception as e:
        logging.exception("OpenAI client error in explain step")
        sentry_sdk.capture_exception(e)
        raise TestGenerationFailedException(e) from e

-    fetch_data_function = fetch_data_response.choices[0].message.content
+    fetch_data_function = fetch_data_response.content
    fetch_data_function = fetch_data_function.split("```python")[1].split("```")[0].strip()

    # Step 1c: Run the function to get the data
@ -219,26 +219,26 @@ To help unit test the function above, list diverse scenarios that the function s
    if print_text:
        print_messages([plan_user_message])
    try:
-        llm_client = llm_clients[plan_model.model_type]
-        if llm_client is None:
-            raise TestGenerationFailedException(f"LLM client for model type '{plan_model.model_type}' is not available")
-        plan_response = await llm_client.with_options(max_retries=2).chat.completions.create(
-            model=plan_model.name, messages=plan_messages, temperature=temperature
+        plan_response = await call_llm(
+            model_name=plan_model.name,
+            model_type=plan_model.model_type,
+            messages=plan_messages,
+            temperature=temperature,
        )
-        total_llm_cost += calculate_llm_cost(plan_response, plan_model) or 0.0
+        total_llm_cost += calculate_llm_cost(plan_response.raw_response, plan_model) or 0.0
    except Exception as e:
        logging.exception("OpenAI client error in plan step")
        sentry_sdk.capture_exception(e)
        raise TestGenerationFailedException(e) from e
-    debug_log_sensitive_data(f"OpenAIClient plan response:\n{plan_response.model_dump_json(indent=2)}")
-    if plan_response.usage is not None:
+    debug_log_sensitive_data(f"OpenAIClient plan response:\n{plan_response.raw_response.model_dump_json(indent=2)}")
+    if plan_response.raw_response.usage is not None:
        ph(
            user_id,
            "aiservice-testgen-plan-openai-usage",
-            properties={"model": plan_model.name, "usage": plan_response.usage.json()},
+            properties={"model": plan_model.name, "usage": plan_response.raw_response.usage.json()},
        )

-    plan = plan_response.choices[0].message.content
+    plan = plan_response.content
    plan_assistant_message = {"role": "assistant", "content": plan}

    # Step 2b: If the plan is short, ask GPT to elaborate further
@ -261,23 +261,23 @@ To help unit test the function above, list diverse scenarios that the function s
        if print_text:
            print_messages([elaboration_user_message])
        try:
-            llm_client = llm_clients[plan_model.model_type]
-            if llm_client is None:
-                raise TestGenerationFailedException(f"LLM client for model type '{plan_model.model_type}' is not available")
-            elaboration_response = await llm_client.with_options(max_retries=2).chat.completions.create(
-                model=plan_model.name, messages=elaboration_messages, temperature=temperature
+            elaboration_response = await call_llm(
+                model_name=plan_model.name,
+                model_type=plan_model.model_type,
+                messages=elaboration_messages,
+                temperature=temperature,
            )
-            total_llm_cost += calculate_llm_cost(elaboration_response, plan_model) or 0.0
+            total_llm_cost += calculate_llm_cost(elaboration_response.raw_response, plan_model) or 0.0
        except Exception as e:
            logging.exception("OpenAI client error in elaboration step")
            sentry_sdk.capture_exception(e)
            raise TestGenerationFailedException(e) from e

        debug_log_sensitive_data(
-            f"OpenAIClient elaboration response:\n{elaboration_response.model_dump_json(indent=2)}"
+            f"OpenAIClient elaboration response:\n{elaboration_response.raw_response.model_dump_json(indent=2)}"
        )

-        elaboration = elaboration_response.choices[0].message.content
+        elaboration = elaboration_response.content
        elaboration_assistant_message = {"role": "assistant", "content": elaboration}

    # Step 3: Generate the unit test
@ -314,25 +314,25 @@ To help unit test the function above, list diverse scenarios that the function s
    tries = 2
    while tries > 0:
        try:
-            llm_client = llm_clients[execute_model.model_type]
-            if llm_client is None:
-                raise TestGenerationFailedException(f"LLM client for model type '{execute_model.model_type}' is not available")
-            execute_response = await llm_client.with_options(max_retries=2).chat.completions.create(
-                model=execute_model.name, messages=execute_messages, temperature=temperature
+            execute_response = await call_llm(
+                model_name=execute_model.name,
+                model_type=execute_model.model_type,
+                messages=execute_messages,
+                temperature=temperature,
            )
-            total_llm_cost += calculate_llm_cost(execute_response, execute_model) or 0.0
+            total_llm_cost += calculate_llm_cost(execute_response.raw_response, execute_model) or 0.0
        except Exception as e:
            logging.exception("OpenAI client error in execute step")
            sentry_sdk.capture_exception(e)
            raise TestGenerationFailedException(e) from e
-        debug_log_sensitive_data(f"OpenAIClient execute response:\n{execute_response.model_dump_json(indent=2)}")
-        if execute_response.usage is not None:
+        debug_log_sensitive_data(f"OpenAIClient execute response:\n{execute_response.raw_response.model_dump_json(indent=2)}")
+        if execute_response.raw_response.usage is not None:
            ph(
                user_id,
                "aiservice-testgen-execute-openai-usage",
-                properties={"model": execute_model.name, "usage": execute_response.usage.json()},
+                properties={"model": execute_model.name, "usage": execute_response.raw_response.usage.json()},
            )
-        execution_output = execute_response.choices[0].message.content
+        execution_output = execute_response.content

        # check the output for errors
        code = execution_output.split("```python")[1].split("```")[0].strip()
--- a/django/aiservice/testgen/testgen.py
+++ b/django/aiservice/testgen/testgen.py
@ -16,7 +16,7 @@ from openai import OpenAIError

 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
-from aiservice.env_specific import IS_PRODUCTION, debug_log_sensitive_data, llm_clients
+from aiservice.env_specific import IS_PRODUCTION, LLMResponse, call_llm, debug_log_sensitive_data
 from aiservice.models.aimodels import EXECUTE_MODEL, calculate_llm_cost
 from aiservice.observability.decorators import observe_llm_call
 from log_features.log_event import update_optimization_cost
@ -198,7 +198,7 @@ async def call_testgen_llm(
    temperature: float,
    user_id: str | None = None,
    python_version: str | None = None,
-):
+) -> LLMResponse:
    """Call LLM for test generation with automatic observability.

    This function is decorated with @observe_llm_call which automatically:
@ -209,12 +209,8 @@ async def call_testgen_llm(

    All observability runs in the background without blocking the LLM call.
    """
-    llm_client = llm_clients[model.model_type]
-    if llm_client is None:
-        raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
-
-    return await llm_client.with_options(max_retries=2).chat.completions.create(
-        model=model.name, messages=messages, temperature=temperature
+    return await call_llm(
+        model_name=model.name, model_type=model.model_type, messages=messages, temperature=temperature
    )


@ -242,24 +238,23 @@ async def generate_and_validate_test_code(
        python_version=".".join(str(v) for v in python_version),
    )

-    cost = calculate_llm_cost(response, execute_model) or 0.0
+    cost = calculate_llm_cost(response.raw_response, execute_model) or 0.0
    cost_tracker.append(cost)

-    debug_log_sensitive_data(f"OpenAIClient {error_context}execute response:\n{response.model_dump_json(indent=2)}")
+    debug_log_sensitive_data(
+        f"OpenAIClient {error_context}execute response:\n{response.raw_response.model_dump_json(indent=2)}"
+    )

-    if response.usage:
+    if response.raw_response.usage:
        ph(
            user_id,
            f"aiservice-testgen-{posthog_event_suffix}execute-openai-usage",
-            properties={"model": execute_model.name, "usage": response.usage.model_dump_json()},
+            properties={"model": execute_model.name, "usage": response.raw_response.usage.model_dump_json()},
        )

    # Parse and validate
    validated_code = parse_and_validate_llm_output(
-        response_content=response.choices[0].message.content,
-        ctx=ctx,
-        python_version=python_version,
-        error_context=error_context,
+        response_content=response.content, ctx=ctx, python_version=python_version, error_context=error_context
    )

    return validated_code
--- a/django/aiservice/workflow_gen/workflow_gen.py
+++ b/django/aiservice/workflow_gen/workflow_gen.py
@ -11,7 +11,7 @@ from ninja import NinjaAPI, Schema
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam

 from aiservice.analytics.posthog import ph
-from aiservice.env_specific import debug_log_sensitive_data, llm_clients
+from aiservice.env_specific import LLMResponse, call_llm, debug_log_sensitive_data
 from aiservice.models.aimodels import EXECUTE_MODEL
 from aiservice.observability.decorators import observe_llm_call

@ -77,7 +77,7 @@ async def call_workflow_gen_llm(
    n: int = 1,
    user_id: str | None = None,
    context: dict | None = None,
-):
+) -> LLMResponse:
    """Call LLM for workflow generation with automatic observability.

    This function is decorated with @observe_llm_call which automatically:
@ -88,12 +88,8 @@ async def call_workflow_gen_llm(

    All observability runs in the background without blocking the LLM call.
    """
-    llm_client = llm_clients[model.model_type]
-    if llm_client is None:
-        raise ValueError(f"LLM client for model type '{model.model_type}' is not available")
-
-    return await llm_client.with_options(max_retries=2).chat.completions.create(
-        model=model.name, messages=messages, n=n, temperature=temperature
+    return await call_llm(
+        model_name=model.name, model_type=model.model_type, messages=messages, n=n, temperature=temperature
    )


@ -135,11 +131,11 @@ async def generate_workflow_steps_llm(
            context={"num_files": len(repo_files)},
        )

-        if not response.choices or not response.choices[0].message.content:
+        if not response.content:
            logger.warning("LLM returned empty response for workflow generation")
            return None

-        response_text = response.choices[0].message.content.strip()
+        response_text = response.content.strip()

        # Extract YAML steps
        steps_yaml = _extract_yaml_steps(response_text)