aiservice logs add and misc fix to track the errors (#2530)

# Pull Request Checklist ## Description - [ ] **Description of PR**: Clear and concise description of what this PR accomplishes - [ ] **Breaking Changes**: Document any breaking changes (if applicable) - [ ] **Related Issues**: Link to any related issues or tickets ## Testing - [ ] **Test cases Attached**: All relevant test cases have been added/updated - [ ] **Manual Testing**: Manual testing completed for the changes ## Monitoring & Debugging - [ ] **Logging in place**: Appropriate logging has been added for debugging user issues - [ ] **Sentry will be able to catch errors**: Error handling ensures Sentry can capture and report errors - [ ] **Avoid Dev based/Prisma logging**: No development-only or Prisma-specific logging in production code ## Configuration - [ ] **Env variables newly added**: Any new environment variables are documented in .env.example file or mentioned in description --- ## Additional Notes  Co-authored-by: ali <mohammed18200118@gmail.com>
2026-05-04 18:25:18 +00:00 · 2026-04-03 16:50:45 +05:30 · 2026-04-03 16:50:45 +05:30 · 9bf81e7418
commit 9bf81e7418
parent d04d0dbbd2
7 changed files with 79 additions and 32 deletions
--- a/django/aiservice/aiservice/common/markdown_utils.py
+++ b/django/aiservice/aiservice/common/markdown_utils.py
@ -13,13 +13,13 @@ from aiservice.common.llm_output_utils import truncate_pathological_output
 # Matches both ```python and ```python:filepath blocks, captures content only
 MARKDOWN_CODE_BLOCK_PATTERN = re.compile(r"```python(?::[^\n]*)?\n(.*?)```", re.DOTALL)

-# Matches first ```python block (no filepath), captures content.
+# Matches first ```python block (with optional :filepath), captures content.
 # Uses greedy (.*) to handle LLM outputs with nested code fences (e.g. ```python:filepath
 # blocks inside the main block). Requires closing ``` to be alone on its line.
-FIRST_CODE_BLOCK_PATTERN = re.compile(r"^```python\s*\n(.*)\n```[ \t]*$", re.MULTILINE | re.DOTALL)
+FIRST_CODE_BLOCK_PATTERN = re.compile(r"^```python(?::[^\n]*)?\s*\n(.*)\n```[ \t]*$", re.MULTILINE | re.DOTALL)

 # Fallback for incomplete code blocks (missing closing ```)
-FIRST_CODE_BLOCK_FALLBACK_PATTERN = re.compile(r"^```python\s*\n(.*)", re.MULTILINE | re.DOTALL)
+FIRST_CODE_BLOCK_FALLBACK_PATTERN = re.compile(r"^```python(?::[^\n]*)?\s*\n(.*)", re.MULTILINE | re.DOTALL)


 def extract_all_code_from_markdown(markdown: str) -> str:
--- a/django/aiservice/aiservice/llm.py
+++ b/django/aiservice/aiservice/llm.py
@ -9,22 +9,18 @@ import time
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any

-import stamina
 import sentry_sdk
-from anthropic import (
-    APIConnectionError as AnthropicConnectionError,
-    APITimeoutError as AnthropicTimeoutError,
-    AsyncAnthropicBedrock,
-    InternalServerError as AnthropicServerError,
-    RateLimitError as AnthropicRateLimitError,
-)
-from openai import (
-    APIConnectionError as OpenAIConnectionError,
-    APITimeoutError as OpenAITimeoutError,
-    AsyncAzureOpenAI,
-    InternalServerError as OpenAIServerError,
-    RateLimitError as OpenAIRateLimitError,
-)
+import stamina
+from anthropic import APIConnectionError as AnthropicConnectionError
+from anthropic import APITimeoutError as AnthropicTimeoutError
+from anthropic import AsyncAnthropicBedrock
+from anthropic import InternalServerError as AnthropicServerError
+from anthropic import RateLimitError as AnthropicRateLimitError
+from openai import APIConnectionError as OpenAIConnectionError
+from openai import APITimeoutError as OpenAITimeoutError
+from openai import AsyncAzureOpenAI
+from openai import InternalServerError as OpenAIServerError
+from openai import RateLimitError as OpenAIRateLimitError

 from aiservice.llm_models import has_anthropic, has_openai

@ -36,6 +32,9 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+_ANTHROPIC_MAX_INPUT_TOKENS = 195_000
+_CHARS_PER_TOKEN_ESTIMATE = 4
+
 _TRANSIENT_LLM_ERRORS = (
    AnthropicConnectionError,
    AnthropicTimeoutError,
@ -167,6 +166,11 @@ class LLMClient:
    async def call_anthropic(
        self, llm: LLM, messages: list[ChatCompletionMessageParam], max_tokens: int
    ) -> LLMResponse:
+        estimated_tokens = sum(len(str(m["content"])) for m in messages) // _CHARS_PER_TOKEN_ESTIMATE
+        if estimated_tokens > _ANTHROPIC_MAX_INPUT_TOKENS:
+            msg = f"Prompt too large (~{estimated_tokens} tokens estimated, limit {_ANTHROPIC_MAX_INPUT_TOKENS})"
+            raise ValueError(msg)
+
        system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
        non_system = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]

--- a/django/aiservice/aiservice/middleware/track_usage_middleware.py
+++ b/django/aiservice/aiservice/middleware/track_usage_middleware.py
@ -141,6 +141,12 @@ class TrackUsageMiddleware:
                        return JsonResponse({"error": "Failed to initialize user subscription"}, status=500)

            if subscription.subscription_status != "active":
+                logging.warning(
+                    "403 subscription inactive: user_id=%s, status=%s, endpoint=%s",
+                    user_id,
+                    subscription.subscription_status,
+                    endpoint,
+                )
                return JsonResponse(
                    {"error": "Subscription is not active", "status": subscription.subscription_status}, status=403
                )
@ -150,6 +156,14 @@ class TrackUsageMiddleware:
            current_used = subscription.optimizations_used or 0

            if current_used + cost > subscription.optimizations_limit:
+                logging.warning(
+                    "403 usage limit exceeded: user_id=%s, used=%s, limit=%s, tier=%s, endpoint=%s",
+                    user_id,
+                    current_used,
+                    subscription.optimizations_limit,
+                    subscription.plan_type,
+                    endpoint,
+                )
                return JsonResponse(
                    {
                        "error": "Usage limit exceeded",
--- a/django/aiservice/authapp/auth.py
+++ b/django/aiservice/authapp/auth.py
@ -5,6 +5,7 @@ from django.db.models.functions import Now
 from ninja.errors import HttpError
 from ninja.security import HttpBearer

+from aiservice.background import fire_and_forget
 from authapp.auth_utils import hash_api_key
 from authapp.models import CFAPIKeys, Organizations, Subscriptions

@ -58,7 +59,7 @@ class AuthBearer(HttpBearer):
        api_key_instance = await CFAPIKeys.objects.filter(key=hashed_token).afirst()
        if api_key_instance is None:
            raise HttpError(403, "Invalid API key")
-        await CFAPIKeys.objects.filter(id=api_key_instance.id).aupdate(last_used=Now())
+        fire_and_forget(CFAPIKeys.objects.filter(id=api_key_instance.id).aupdate(last_used=Now()))
        request.user = api_key_instance.user_id
        request.tier = api_key_instance.tier
        request.api_key_id = api_key_instance.id
--- a/django/aiservice/core/languages/python/adaptive_optimizer/adaptive_optimizer.py
+++ b/django/aiservice/core/languages/python/adaptive_optimizer/adaptive_optimizer.py
@ -1,11 +1,13 @@
 from __future__ import annotations

 import asyncio
+import logging
 from pathlib import Path
 from typing import TYPE_CHECKING

 import libcst as cst
 import sentry_sdk
+import stamina
 from ninja import NinjaAPI, Schema
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
 from pydantic import ValidationError
@ -14,8 +16,6 @@ from aiservice.analytics.posthog import ph
 from aiservice.background import fire_and_forget
 from aiservice.common_utils import validate_trace_id
 from aiservice.env_specific import debug_log_sensitive_data
-import stamina
-
 from aiservice.llm import LLMOutputUnparseable, llm_client
 from aiservice.llm_models import ADAPTIVE_OPTIMIZE_MODEL
 from authapp.auth import AuthenticatedRequest
@ -26,6 +26,8 @@ from core.shared.optimizer_schemas import OptimizeResponseItemSchema

 from .adaptive_optimizer_context import AdaptiveOptContext, AdaptiveOptContextData, AdaptiveOptRequestSchema

+logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
    from openai.types.chat import ChatCompletionMessageParam

@ -69,6 +71,7 @@ async def perform_adaptive_optimize(
        )
        llm_cost = output.cost
    except Exception as e:
+        logger.exception("adaptive_optimize LLM call failed: trace_id=%s, user_id=%s", trace_id, user_id)
        debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
        raise LLMOutputUnparseable(str(e)) from e
    debug_log_sensitive_data(f"ClaudeClient optimization response:\n{output.content}")
@ -90,6 +93,7 @@ async def perform_adaptive_optimize(
        new_opt = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
        if not new_opt or not ctx.is_valid_code():
            extracted_code = ctx.extracted_code_and_expl.code if ctx.extracted_code_and_expl else None
+            logger.error("adaptive_optimize invalid code: trace_id=%s, user_id=%s", trace_id, user_id)
            raise LLMOutputUnparseable("Invalid code generated " + str(extracted_code), cost=llm_cost)

        # the parent is the last candidate in the previous optimizations
@ -97,6 +101,7 @@ async def perform_adaptive_optimize(
        new_opt.parent_id = last_optimization_id
        return new_opt, llm_cost  # noqa: TRY300
    except (ValueError, ValidationError, cst.ParserSyntaxError) as exc:
+        logger.exception("adaptive_optimize parsing failed: trace_id=%s, user_id=%s", trace_id, user_id)
        sentry_sdk.capture_exception(exc)
        debug_log_sensitive_data(f"{type(exc).__name__} for source:\n{ctx.data.original_source_code}")
        debug_log_sensitive_data(f"Traceback: {exc}")
@ -123,16 +128,18 @@ async def adaptive_optimize(
    trace_id = data.trace_id
    if not validate_trace_id(trace_id):
        return 400, AdaptiveOptErrorResponseSchema(error="Invalid trace ID. Please provide a valid UUIDv4.")
-
    try:
        adaptive_optimization_candidate, llm_cost = await perform_adaptive_optimize(
            user_id=request.user, ctx=ctx, trace_id=trace_id
        )
+        if adaptive_optimization_candidate is None:
+            logger.error("adaptive_optimize endpoint returning 500: trace_id=%s, no candidate generated", trace_id)
+            return 500, AdaptiveOptErrorResponseSchema(error="Failed to generate optimization candidate")
+
    except LLMOutputUnparseable as e:
        return 422, AdaptiveOptErrorResponseSchema(error=str(e))
-    total_llm_cost = llm_cost

-    fire_and_forget(update_optimization_cost(trace_id=trace_id, cost=total_llm_cost, user_id=request.user))
+    fire_and_forget(update_optimization_cost(trace_id=trace_id, cost=llm_cost, user_id=request.user))
    if hasattr(request, "should_log_features") and request.should_log_features:
        fire_and_forget(
            safe_log_features(
--- a/django/aiservice/core/languages/python/code_repair/code_repair.py
+++ b/django/aiservice/core/languages/python/code_repair/code_repair.py
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING

 import libcst as cst
 import sentry_sdk
+import stamina
 from ninja import NinjaAPI, Schema
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
 from pydantic import ValidationError
@ -14,8 +15,6 @@ from pydantic import ValidationError
 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
 from aiservice.env_specific import debug_log_sensitive_data
-import stamina
-
 from aiservice.llm import LLMOutputUnparseable, llm_client
 from aiservice.llm_models import CODE_REPAIR_MODEL
 from authapp.auth import AuthenticatedRequest
@ -45,8 +44,12 @@ USER_PROMPT = (current_dir / "CODE_REPAIR_USER_PROMPT.md").read_text()

@stamina.retry(on=LLMOutputUnparseable, attempts=2)
 async def code_repair(  # noqa: D417
-    user_id: str, optimization_id: str, ctx: CodeRepairContext, optimize_model: LLM = CODE_REPAIR_MODEL
-) -> CodeRepairIntermediateResponseItemschema:
+    user_id: str,
+    optimization_id: str,
+    ctx: CodeRepairContext,
+    trace_id: str = "",
+    optimize_model: LLM = CODE_REPAIR_MODEL,
+) -> CodeRepairIntermediateResponseItemschema | CodeRepairErrorResponseSchema:
    """Repair the given candidate to match the behaviour of the original code.

    Parameters
@ -73,7 +76,9 @@ async def code_repair(  # noqa: D417
    messages: list[ChatCompletionMessageParam] = [system_message, user_message]
    debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
    try:
-        output = await llm_client.call(llm=optimize_model, messages=messages)
+        output = await llm_client.call(
+            llm=optimize_model, messages=messages, call_type="code_repair", trace_id=trace_id, user_id=user_id
+        )
        llm_cost = output.cost
    except Exception as e:
        debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
@ -160,10 +165,14 @@ async def repair(
                return 200, result

    try:
-        code_repair_data = await code_repair(user_id=request.user, optimization_id=data.optimization_id, ctx=ctx)
+        code_repair_data = await code_repair(
+            user_id=request.user, optimization_id=data.optimization_id, ctx=ctx, trace_id=trace_id
+        )
+        if isinstance(code_repair_data, CodeRepairErrorResponseSchema):
+            return 500, code_repair_data
    except LLMOutputUnparseable as e:
        return 422, CodeRepairErrorResponseSchema(error=str(e))
-    total_llm_cost = code_repair_data.llm_cost
+    llm_cost = code_repair_data.llm_cost
    try:
        ctx.validate_module()
    except cst.ParserSyntaxError as e:
@ -180,7 +189,7 @@ async def repair(
        return 422, CodeRepairErrorResponseSchema(error=str(exc))

    async with asyncio.TaskGroup() as tg:
-        tg.create_task(update_optimization_cost(trace_id=trace_id, cost=total_llm_cost, user_id=request.user))
+        tg.create_task(update_optimization_cost(trace_id=trace_id, cost=llm_cost, user_id=request.user))
        if hasattr(request, "should_log_features") and request.should_log_features:
            tg.create_task(
                safe_log_features(
--- a/django/aiservice/tests/aiservice/test_markdown_utils.py
+++ b/django/aiservice/tests/aiservice/test_markdown_utils.py
@ -181,6 +181,18 @@ x ="""
    assert result == expected


+def test_extract_code_block_with_filepath_annotation() -> None:
+    text = "```python:src/main.py\ndef foo(): pass\n```"
+    result = extract_code_block(text)
+    assert result == "def foo(): pass"
+
+
+def test_extract_code_block_with_filepath_annotation_fallback() -> None:
+    text = "```python:src/main.py\ndef foo(): pass"
+    result = extract_code_block(text)
+    assert result == "def foo(): pass"
+
+
 def test_extract_code_block_nested_code_fence_in_triple_quote() -> None:
    # LLM embeds function definition in a triple-quoted string containing ```
    text = '```python\nimport pytest\n_source = """```python:file.py\ndef foo(): pass\n```"""\ndef test_foo():\n    assert True\n```'