aiservice logs add and misc fix to track the errors (#2530)

# Pull Request Checklist

## Description
- [ ] **Description of PR**: Clear and concise description of what this
PR accomplishes
- [ ] **Breaking Changes**: Document any breaking changes (if
applicable)
- [ ] **Related Issues**: Link to any related issues or tickets

## Testing
- [ ] **Test cases Attached**: All relevant test cases have been
added/updated
- [ ] **Manual Testing**: Manual testing completed for the changes

## Monitoring & Debugging
- [ ] **Logging in place**: Appropriate logging has been added for
debugging user issues
- [ ] **Sentry will be able to catch errors**: Error handling ensures
Sentry can capture and report errors
- [ ] **Avoid Dev based/Prisma logging**: No development-only or
Prisma-specific logging in production code

## Configuration
- [ ] **Env variables newly added**: Any new environment variables are
documented in .env.example file or mentioned in description
---

## Additional Notes
<!-- Add any additional context, screenshots, or notes for reviewers
here -->

Co-authored-by: ali <mohammed18200118@gmail.com>
This commit is contained in:
Sarthak Agarwal 2026-04-03 16:50:45 +05:30 committed by GitHub
parent d04d0dbbd2
commit 9bf81e7418
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 79 additions and 32 deletions

View file

@ -13,13 +13,13 @@ from aiservice.common.llm_output_utils import truncate_pathological_output
# Matches both ```python and ```python:filepath blocks, captures content only
MARKDOWN_CODE_BLOCK_PATTERN = re.compile(r"```python(?::[^\n]*)?\n(.*?)```", re.DOTALL)
# Matches first ```python block (no filepath), captures content.
# Matches first ```python block (with optional :filepath), captures content.
# Uses greedy (.*) to handle LLM outputs with nested code fences (e.g. ```python:filepath
# blocks inside the main block). Requires closing ``` to be alone on its line.
FIRST_CODE_BLOCK_PATTERN = re.compile(r"^```python\s*\n(.*)\n```[ \t]*$", re.MULTILINE | re.DOTALL)
FIRST_CODE_BLOCK_PATTERN = re.compile(r"^```python(?::[^\n]*)?\s*\n(.*)\n```[ \t]*$", re.MULTILINE | re.DOTALL)
# Fallback for incomplete code blocks (missing closing ```)
FIRST_CODE_BLOCK_FALLBACK_PATTERN = re.compile(r"^```python\s*\n(.*)", re.MULTILINE | re.DOTALL)
FIRST_CODE_BLOCK_FALLBACK_PATTERN = re.compile(r"^```python(?::[^\n]*)?\s*\n(.*)", re.MULTILINE | re.DOTALL)
def extract_all_code_from_markdown(markdown: str) -> str:

View file

@ -9,22 +9,18 @@ import time
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
import stamina
import sentry_sdk
from anthropic import (
APIConnectionError as AnthropicConnectionError,
APITimeoutError as AnthropicTimeoutError,
AsyncAnthropicBedrock,
InternalServerError as AnthropicServerError,
RateLimitError as AnthropicRateLimitError,
)
from openai import (
APIConnectionError as OpenAIConnectionError,
APITimeoutError as OpenAITimeoutError,
AsyncAzureOpenAI,
InternalServerError as OpenAIServerError,
RateLimitError as OpenAIRateLimitError,
)
import stamina
from anthropic import APIConnectionError as AnthropicConnectionError
from anthropic import APITimeoutError as AnthropicTimeoutError
from anthropic import AsyncAnthropicBedrock
from anthropic import InternalServerError as AnthropicServerError
from anthropic import RateLimitError as AnthropicRateLimitError
from openai import APIConnectionError as OpenAIConnectionError
from openai import APITimeoutError as OpenAITimeoutError
from openai import AsyncAzureOpenAI
from openai import InternalServerError as OpenAIServerError
from openai import RateLimitError as OpenAIRateLimitError
from aiservice.llm_models import has_anthropic, has_openai
@ -36,6 +32,9 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
_ANTHROPIC_MAX_INPUT_TOKENS = 195_000
_CHARS_PER_TOKEN_ESTIMATE = 4
_TRANSIENT_LLM_ERRORS = (
AnthropicConnectionError,
AnthropicTimeoutError,
@ -167,6 +166,11 @@ class LLMClient:
async def call_anthropic(
self, llm: LLM, messages: list[ChatCompletionMessageParam], max_tokens: int
) -> LLMResponse:
estimated_tokens = sum(len(str(m["content"])) for m in messages) // _CHARS_PER_TOKEN_ESTIMATE
if estimated_tokens > _ANTHROPIC_MAX_INPUT_TOKENS:
msg = f"Prompt too large (~{estimated_tokens} tokens estimated, limit {_ANTHROPIC_MAX_INPUT_TOKENS})"
raise ValueError(msg)
system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
non_system = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]

View file

@ -141,6 +141,12 @@ class TrackUsageMiddleware:
return JsonResponse({"error": "Failed to initialize user subscription"}, status=500)
if subscription.subscription_status != "active":
logging.warning(
"403 subscription inactive: user_id=%s, status=%s, endpoint=%s",
user_id,
subscription.subscription_status,
endpoint,
)
return JsonResponse(
{"error": "Subscription is not active", "status": subscription.subscription_status}, status=403
)
@ -150,6 +156,14 @@ class TrackUsageMiddleware:
current_used = subscription.optimizations_used or 0
if current_used + cost > subscription.optimizations_limit:
logging.warning(
"403 usage limit exceeded: user_id=%s, used=%s, limit=%s, tier=%s, endpoint=%s",
user_id,
current_used,
subscription.optimizations_limit,
subscription.plan_type,
endpoint,
)
return JsonResponse(
{
"error": "Usage limit exceeded",

View file

@ -5,6 +5,7 @@ from django.db.models.functions import Now
from ninja.errors import HttpError
from ninja.security import HttpBearer
from aiservice.background import fire_and_forget
from authapp.auth_utils import hash_api_key
from authapp.models import CFAPIKeys, Organizations, Subscriptions
@ -58,7 +59,7 @@ class AuthBearer(HttpBearer):
api_key_instance = await CFAPIKeys.objects.filter(key=hashed_token).afirst()
if api_key_instance is None:
raise HttpError(403, "Invalid API key")
await CFAPIKeys.objects.filter(id=api_key_instance.id).aupdate(last_used=Now())
fire_and_forget(CFAPIKeys.objects.filter(id=api_key_instance.id).aupdate(last_used=Now()))
request.user = api_key_instance.user_id
request.tier = api_key_instance.tier
request.api_key_id = api_key_instance.id

View file

@ -1,11 +1,13 @@
from __future__ import annotations
import asyncio
import logging
from pathlib import Path
from typing import TYPE_CHECKING
import libcst as cst
import sentry_sdk
import stamina
from ninja import NinjaAPI, Schema
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
from pydantic import ValidationError
@ -14,8 +16,6 @@ from aiservice.analytics.posthog import ph
from aiservice.background import fire_and_forget
from aiservice.common_utils import validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data
import stamina
from aiservice.llm import LLMOutputUnparseable, llm_client
from aiservice.llm_models import ADAPTIVE_OPTIMIZE_MODEL
from authapp.auth import AuthenticatedRequest
@ -26,6 +26,8 @@ from core.shared.optimizer_schemas import OptimizeResponseItemSchema
from .adaptive_optimizer_context import AdaptiveOptContext, AdaptiveOptContextData, AdaptiveOptRequestSchema
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from openai.types.chat import ChatCompletionMessageParam
@ -69,6 +71,7 @@ async def perform_adaptive_optimize(
)
llm_cost = output.cost
except Exception as e:
logger.exception("adaptive_optimize LLM call failed: trace_id=%s, user_id=%s", trace_id, user_id)
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
raise LLMOutputUnparseable(str(e)) from e
debug_log_sensitive_data(f"ClaudeClient optimization response:\n{output.content}")
@ -90,6 +93,7 @@ async def perform_adaptive_optimize(
new_opt = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
if not new_opt or not ctx.is_valid_code():
extracted_code = ctx.extracted_code_and_expl.code if ctx.extracted_code_and_expl else None
logger.error("adaptive_optimize invalid code: trace_id=%s, user_id=%s", trace_id, user_id)
raise LLMOutputUnparseable("Invalid code generated " + str(extracted_code), cost=llm_cost)
# the parent is the last candidate in the previous optimizations
@ -97,6 +101,7 @@ async def perform_adaptive_optimize(
new_opt.parent_id = last_optimization_id
return new_opt, llm_cost # noqa: TRY300
except (ValueError, ValidationError, cst.ParserSyntaxError) as exc:
logger.exception("adaptive_optimize parsing failed: trace_id=%s, user_id=%s", trace_id, user_id)
sentry_sdk.capture_exception(exc)
debug_log_sensitive_data(f"{type(exc).__name__} for source:\n{ctx.data.original_source_code}")
debug_log_sensitive_data(f"Traceback: {exc}")
@ -123,16 +128,18 @@ async def adaptive_optimize(
trace_id = data.trace_id
if not validate_trace_id(trace_id):
return 400, AdaptiveOptErrorResponseSchema(error="Invalid trace ID. Please provide a valid UUIDv4.")
try:
adaptive_optimization_candidate, llm_cost = await perform_adaptive_optimize(
user_id=request.user, ctx=ctx, trace_id=trace_id
)
if adaptive_optimization_candidate is None:
logger.error("adaptive_optimize endpoint returning 500: trace_id=%s, no candidate generated", trace_id)
return 500, AdaptiveOptErrorResponseSchema(error="Failed to generate optimization candidate")
except LLMOutputUnparseable as e:
return 422, AdaptiveOptErrorResponseSchema(error=str(e))
total_llm_cost = llm_cost
fire_and_forget(update_optimization_cost(trace_id=trace_id, cost=total_llm_cost, user_id=request.user))
fire_and_forget(update_optimization_cost(trace_id=trace_id, cost=llm_cost, user_id=request.user))
if hasattr(request, "should_log_features") and request.should_log_features:
fire_and_forget(
safe_log_features(

View file

@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
import libcst as cst
import sentry_sdk
import stamina
from ninja import NinjaAPI, Schema
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
from pydantic import ValidationError
@ -14,8 +15,6 @@ from pydantic import ValidationError
from aiservice.analytics.posthog import ph
from aiservice.common_utils import validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data
import stamina
from aiservice.llm import LLMOutputUnparseable, llm_client
from aiservice.llm_models import CODE_REPAIR_MODEL
from authapp.auth import AuthenticatedRequest
@ -45,8 +44,12 @@ USER_PROMPT = (current_dir / "CODE_REPAIR_USER_PROMPT.md").read_text()
@stamina.retry(on=LLMOutputUnparseable, attempts=2)
async def code_repair( # noqa: D417
user_id: str, optimization_id: str, ctx: CodeRepairContext, optimize_model: LLM = CODE_REPAIR_MODEL
) -> CodeRepairIntermediateResponseItemschema:
user_id: str,
optimization_id: str,
ctx: CodeRepairContext,
trace_id: str = "",
optimize_model: LLM = CODE_REPAIR_MODEL,
) -> CodeRepairIntermediateResponseItemschema | CodeRepairErrorResponseSchema:
"""Repair the given candidate to match the behaviour of the original code.
Parameters
@ -73,7 +76,9 @@ async def code_repair( # noqa: D417
messages: list[ChatCompletionMessageParam] = [system_message, user_message]
debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
try:
output = await llm_client.call(llm=optimize_model, messages=messages)
output = await llm_client.call(
llm=optimize_model, messages=messages, call_type="code_repair", trace_id=trace_id, user_id=user_id
)
llm_cost = output.cost
except Exception as e:
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
@ -160,10 +165,14 @@ async def repair(
return 200, result
try:
code_repair_data = await code_repair(user_id=request.user, optimization_id=data.optimization_id, ctx=ctx)
code_repair_data = await code_repair(
user_id=request.user, optimization_id=data.optimization_id, ctx=ctx, trace_id=trace_id
)
if isinstance(code_repair_data, CodeRepairErrorResponseSchema):
return 500, code_repair_data
except LLMOutputUnparseable as e:
return 422, CodeRepairErrorResponseSchema(error=str(e))
total_llm_cost = code_repair_data.llm_cost
llm_cost = code_repair_data.llm_cost
try:
ctx.validate_module()
except cst.ParserSyntaxError as e:
@ -180,7 +189,7 @@ async def repair(
return 422, CodeRepairErrorResponseSchema(error=str(exc))
async with asyncio.TaskGroup() as tg:
tg.create_task(update_optimization_cost(trace_id=trace_id, cost=total_llm_cost, user_id=request.user))
tg.create_task(update_optimization_cost(trace_id=trace_id, cost=llm_cost, user_id=request.user))
if hasattr(request, "should_log_features") and request.should_log_features:
tg.create_task(
safe_log_features(

View file

@ -181,6 +181,18 @@ x ="""
assert result == expected
def test_extract_code_block_with_filepath_annotation() -> None:
text = "```python:src/main.py\ndef foo(): pass\n```"
result = extract_code_block(text)
assert result == "def foo(): pass"
def test_extract_code_block_with_filepath_annotation_fallback() -> None:
text = "```python:src/main.py\ndef foo(): pass"
result = extract_code_block(text)
assert result == "def foo(): pass"
def test_extract_code_block_nested_code_fence_in_triple_quote() -> None:
# LLM embeds function definition in a triple-quoted string containing ```
text = '```python\nimport pytest\n_source = """```python:file.py\ndef foo(): pass\n```"""\ndef test_foo():\n assert True\n```'