mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
aiservice logs add and misc fix to track the errors (#2530)
# Pull Request Checklist ## Description - [ ] **Description of PR**: Clear and concise description of what this PR accomplishes - [ ] **Breaking Changes**: Document any breaking changes (if applicable) - [ ] **Related Issues**: Link to any related issues or tickets ## Testing - [ ] **Test cases Attached**: All relevant test cases have been added/updated - [ ] **Manual Testing**: Manual testing completed for the changes ## Monitoring & Debugging - [ ] **Logging in place**: Appropriate logging has been added for debugging user issues - [ ] **Sentry will be able to catch errors**: Error handling ensures Sentry can capture and report errors - [ ] **Avoid Dev based/Prisma logging**: No development-only or Prisma-specific logging in production code ## Configuration - [ ] **Env variables newly added**: Any new environment variables are documented in .env.example file or mentioned in description --- ## Additional Notes <!-- Add any additional context, screenshots, or notes for reviewers here --> Co-authored-by: ali <mohammed18200118@gmail.com>
This commit is contained in:
parent
d04d0dbbd2
commit
9bf81e7418
7 changed files with 79 additions and 32 deletions
|
|
@ -13,13 +13,13 @@ from aiservice.common.llm_output_utils import truncate_pathological_output
|
|||
# Matches both ```python and ```python:filepath blocks, captures content only
|
||||
MARKDOWN_CODE_BLOCK_PATTERN = re.compile(r"```python(?::[^\n]*)?\n(.*?)```", re.DOTALL)
|
||||
|
||||
# Matches first ```python block (no filepath), captures content.
|
||||
# Matches first ```python block (with optional :filepath), captures content.
|
||||
# Uses greedy (.*) to handle LLM outputs with nested code fences (e.g. ```python:filepath
|
||||
# blocks inside the main block). Requires closing ``` to be alone on its line.
|
||||
FIRST_CODE_BLOCK_PATTERN = re.compile(r"^```python\s*\n(.*)\n```[ \t]*$", re.MULTILINE | re.DOTALL)
|
||||
FIRST_CODE_BLOCK_PATTERN = re.compile(r"^```python(?::[^\n]*)?\s*\n(.*)\n```[ \t]*$", re.MULTILINE | re.DOTALL)
|
||||
|
||||
# Fallback for incomplete code blocks (missing closing ```)
|
||||
FIRST_CODE_BLOCK_FALLBACK_PATTERN = re.compile(r"^```python\s*\n(.*)", re.MULTILINE | re.DOTALL)
|
||||
FIRST_CODE_BLOCK_FALLBACK_PATTERN = re.compile(r"^```python(?::[^\n]*)?\s*\n(.*)", re.MULTILINE | re.DOTALL)
|
||||
|
||||
|
||||
def extract_all_code_from_markdown(markdown: str) -> str:
|
||||
|
|
|
|||
|
|
@ -9,22 +9,18 @@ import time
|
|||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import stamina
|
||||
import sentry_sdk
|
||||
from anthropic import (
|
||||
APIConnectionError as AnthropicConnectionError,
|
||||
APITimeoutError as AnthropicTimeoutError,
|
||||
AsyncAnthropicBedrock,
|
||||
InternalServerError as AnthropicServerError,
|
||||
RateLimitError as AnthropicRateLimitError,
|
||||
)
|
||||
from openai import (
|
||||
APIConnectionError as OpenAIConnectionError,
|
||||
APITimeoutError as OpenAITimeoutError,
|
||||
AsyncAzureOpenAI,
|
||||
InternalServerError as OpenAIServerError,
|
||||
RateLimitError as OpenAIRateLimitError,
|
||||
)
|
||||
import stamina
|
||||
from anthropic import APIConnectionError as AnthropicConnectionError
|
||||
from anthropic import APITimeoutError as AnthropicTimeoutError
|
||||
from anthropic import AsyncAnthropicBedrock
|
||||
from anthropic import InternalServerError as AnthropicServerError
|
||||
from anthropic import RateLimitError as AnthropicRateLimitError
|
||||
from openai import APIConnectionError as OpenAIConnectionError
|
||||
from openai import APITimeoutError as OpenAITimeoutError
|
||||
from openai import AsyncAzureOpenAI
|
||||
from openai import InternalServerError as OpenAIServerError
|
||||
from openai import RateLimitError as OpenAIRateLimitError
|
||||
|
||||
from aiservice.llm_models import has_anthropic, has_openai
|
||||
|
||||
|
|
@ -36,6 +32,9 @@ if TYPE_CHECKING:
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ANTHROPIC_MAX_INPUT_TOKENS = 195_000
|
||||
_CHARS_PER_TOKEN_ESTIMATE = 4
|
||||
|
||||
_TRANSIENT_LLM_ERRORS = (
|
||||
AnthropicConnectionError,
|
||||
AnthropicTimeoutError,
|
||||
|
|
@ -167,6 +166,11 @@ class LLMClient:
|
|||
async def call_anthropic(
|
||||
self, llm: LLM, messages: list[ChatCompletionMessageParam], max_tokens: int
|
||||
) -> LLMResponse:
|
||||
estimated_tokens = sum(len(str(m["content"])) for m in messages) // _CHARS_PER_TOKEN_ESTIMATE
|
||||
if estimated_tokens > _ANTHROPIC_MAX_INPUT_TOKENS:
|
||||
msg = f"Prompt too large (~{estimated_tokens} tokens estimated, limit {_ANTHROPIC_MAX_INPUT_TOKENS})"
|
||||
raise ValueError(msg)
|
||||
|
||||
system_prompt = next((m["content"] for m in messages if m["role"] == "system"), None)
|
||||
non_system = [{"role": m["role"], "content": m["content"]} for m in messages if m["role"] != "system"]
|
||||
|
||||
|
|
|
|||
|
|
@ -141,6 +141,12 @@ class TrackUsageMiddleware:
|
|||
return JsonResponse({"error": "Failed to initialize user subscription"}, status=500)
|
||||
|
||||
if subscription.subscription_status != "active":
|
||||
logging.warning(
|
||||
"403 subscription inactive: user_id=%s, status=%s, endpoint=%s",
|
||||
user_id,
|
||||
subscription.subscription_status,
|
||||
endpoint,
|
||||
)
|
||||
return JsonResponse(
|
||||
{"error": "Subscription is not active", "status": subscription.subscription_status}, status=403
|
||||
)
|
||||
|
|
@ -150,6 +156,14 @@ class TrackUsageMiddleware:
|
|||
current_used = subscription.optimizations_used or 0
|
||||
|
||||
if current_used + cost > subscription.optimizations_limit:
|
||||
logging.warning(
|
||||
"403 usage limit exceeded: user_id=%s, used=%s, limit=%s, tier=%s, endpoint=%s",
|
||||
user_id,
|
||||
current_used,
|
||||
subscription.optimizations_limit,
|
||||
subscription.plan_type,
|
||||
endpoint,
|
||||
)
|
||||
return JsonResponse(
|
||||
{
|
||||
"error": "Usage limit exceeded",
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from django.db.models.functions import Now
|
|||
from ninja.errors import HttpError
|
||||
from ninja.security import HttpBearer
|
||||
|
||||
from aiservice.background import fire_and_forget
|
||||
from authapp.auth_utils import hash_api_key
|
||||
from authapp.models import CFAPIKeys, Organizations, Subscriptions
|
||||
|
||||
|
|
@ -58,7 +59,7 @@ class AuthBearer(HttpBearer):
|
|||
api_key_instance = await CFAPIKeys.objects.filter(key=hashed_token).afirst()
|
||||
if api_key_instance is None:
|
||||
raise HttpError(403, "Invalid API key")
|
||||
await CFAPIKeys.objects.filter(id=api_key_instance.id).aupdate(last_used=Now())
|
||||
fire_and_forget(CFAPIKeys.objects.filter(id=api_key_instance.id).aupdate(last_used=Now()))
|
||||
request.user = api_key_instance.user_id
|
||||
request.tier = api_key_instance.tier
|
||||
request.api_key_id = api_key_instance.id
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import libcst as cst
|
||||
import sentry_sdk
|
||||
import stamina
|
||||
from ninja import NinjaAPI, Schema
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
from pydantic import ValidationError
|
||||
|
|
@ -14,8 +16,6 @@ from aiservice.analytics.posthog import ph
|
|||
from aiservice.background import fire_and_forget
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
import stamina
|
||||
|
||||
from aiservice.llm import LLMOutputUnparseable, llm_client
|
||||
from aiservice.llm_models import ADAPTIVE_OPTIMIZE_MODEL
|
||||
from authapp.auth import AuthenticatedRequest
|
||||
|
|
@ -26,6 +26,8 @@ from core.shared.optimizer_schemas import OptimizeResponseItemSchema
|
|||
|
||||
from .adaptive_optimizer_context import AdaptiveOptContext, AdaptiveOptContextData, AdaptiveOptRequestSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
|
||||
|
|
@ -69,6 +71,7 @@ async def perform_adaptive_optimize(
|
|||
)
|
||||
llm_cost = output.cost
|
||||
except Exception as e:
|
||||
logger.exception("adaptive_optimize LLM call failed: trace_id=%s, user_id=%s", trace_id, user_id)
|
||||
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
|
||||
raise LLMOutputUnparseable(str(e)) from e
|
||||
debug_log_sensitive_data(f"ClaudeClient optimization response:\n{output.content}")
|
||||
|
|
@ -90,6 +93,7 @@ async def perform_adaptive_optimize(
|
|||
new_opt = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
|
||||
if not new_opt or not ctx.is_valid_code():
|
||||
extracted_code = ctx.extracted_code_and_expl.code if ctx.extracted_code_and_expl else None
|
||||
logger.error("adaptive_optimize invalid code: trace_id=%s, user_id=%s", trace_id, user_id)
|
||||
raise LLMOutputUnparseable("Invalid code generated " + str(extracted_code), cost=llm_cost)
|
||||
|
||||
# the parent is the last candidate in the previous optimizations
|
||||
|
|
@ -97,6 +101,7 @@ async def perform_adaptive_optimize(
|
|||
new_opt.parent_id = last_optimization_id
|
||||
return new_opt, llm_cost # noqa: TRY300
|
||||
except (ValueError, ValidationError, cst.ParserSyntaxError) as exc:
|
||||
logger.exception("adaptive_optimize parsing failed: trace_id=%s, user_id=%s", trace_id, user_id)
|
||||
sentry_sdk.capture_exception(exc)
|
||||
debug_log_sensitive_data(f"{type(exc).__name__} for source:\n{ctx.data.original_source_code}")
|
||||
debug_log_sensitive_data(f"Traceback: {exc}")
|
||||
|
|
@ -123,16 +128,18 @@ async def adaptive_optimize(
|
|||
trace_id = data.trace_id
|
||||
if not validate_trace_id(trace_id):
|
||||
return 400, AdaptiveOptErrorResponseSchema(error="Invalid trace ID. Please provide a valid UUIDv4.")
|
||||
|
||||
try:
|
||||
adaptive_optimization_candidate, llm_cost = await perform_adaptive_optimize(
|
||||
user_id=request.user, ctx=ctx, trace_id=trace_id
|
||||
)
|
||||
if adaptive_optimization_candidate is None:
|
||||
logger.error("adaptive_optimize endpoint returning 500: trace_id=%s, no candidate generated", trace_id)
|
||||
return 500, AdaptiveOptErrorResponseSchema(error="Failed to generate optimization candidate")
|
||||
|
||||
except LLMOutputUnparseable as e:
|
||||
return 422, AdaptiveOptErrorResponseSchema(error=str(e))
|
||||
total_llm_cost = llm_cost
|
||||
|
||||
fire_and_forget(update_optimization_cost(trace_id=trace_id, cost=total_llm_cost, user_id=request.user))
|
||||
fire_and_forget(update_optimization_cost(trace_id=trace_id, cost=llm_cost, user_id=request.user))
|
||||
if hasattr(request, "should_log_features") and request.should_log_features:
|
||||
fire_and_forget(
|
||||
safe_log_features(
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
|||
|
||||
import libcst as cst
|
||||
import sentry_sdk
|
||||
import stamina
|
||||
from ninja import NinjaAPI, Schema
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
from pydantic import ValidationError
|
||||
|
|
@ -14,8 +15,6 @@ from pydantic import ValidationError
|
|||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common_utils import validate_trace_id
|
||||
from aiservice.env_specific import debug_log_sensitive_data
|
||||
import stamina
|
||||
|
||||
from aiservice.llm import LLMOutputUnparseable, llm_client
|
||||
from aiservice.llm_models import CODE_REPAIR_MODEL
|
||||
from authapp.auth import AuthenticatedRequest
|
||||
|
|
@ -45,8 +44,12 @@ USER_PROMPT = (current_dir / "CODE_REPAIR_USER_PROMPT.md").read_text()
|
|||
|
||||
@stamina.retry(on=LLMOutputUnparseable, attempts=2)
|
||||
async def code_repair( # noqa: D417
|
||||
user_id: str, optimization_id: str, ctx: CodeRepairContext, optimize_model: LLM = CODE_REPAIR_MODEL
|
||||
) -> CodeRepairIntermediateResponseItemschema:
|
||||
user_id: str,
|
||||
optimization_id: str,
|
||||
ctx: CodeRepairContext,
|
||||
trace_id: str = "",
|
||||
optimize_model: LLM = CODE_REPAIR_MODEL,
|
||||
) -> CodeRepairIntermediateResponseItemschema | CodeRepairErrorResponseSchema:
|
||||
"""Repair the given candidate to match the behaviour of the original code.
|
||||
|
||||
Parameters
|
||||
|
|
@ -73,7 +76,9 @@ async def code_repair( # noqa: D417
|
|||
messages: list[ChatCompletionMessageParam] = [system_message, user_message]
|
||||
debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
|
||||
try:
|
||||
output = await llm_client.call(llm=optimize_model, messages=messages)
|
||||
output = await llm_client.call(
|
||||
llm=optimize_model, messages=messages, call_type="code_repair", trace_id=trace_id, user_id=user_id
|
||||
)
|
||||
llm_cost = output.cost
|
||||
except Exception as e:
|
||||
debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
|
||||
|
|
@ -160,10 +165,14 @@ async def repair(
|
|||
return 200, result
|
||||
|
||||
try:
|
||||
code_repair_data = await code_repair(user_id=request.user, optimization_id=data.optimization_id, ctx=ctx)
|
||||
code_repair_data = await code_repair(
|
||||
user_id=request.user, optimization_id=data.optimization_id, ctx=ctx, trace_id=trace_id
|
||||
)
|
||||
if isinstance(code_repair_data, CodeRepairErrorResponseSchema):
|
||||
return 500, code_repair_data
|
||||
except LLMOutputUnparseable as e:
|
||||
return 422, CodeRepairErrorResponseSchema(error=str(e))
|
||||
total_llm_cost = code_repair_data.llm_cost
|
||||
llm_cost = code_repair_data.llm_cost
|
||||
try:
|
||||
ctx.validate_module()
|
||||
except cst.ParserSyntaxError as e:
|
||||
|
|
@ -180,7 +189,7 @@ async def repair(
|
|||
return 422, CodeRepairErrorResponseSchema(error=str(exc))
|
||||
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
tg.create_task(update_optimization_cost(trace_id=trace_id, cost=total_llm_cost, user_id=request.user))
|
||||
tg.create_task(update_optimization_cost(trace_id=trace_id, cost=llm_cost, user_id=request.user))
|
||||
if hasattr(request, "should_log_features") and request.should_log_features:
|
||||
tg.create_task(
|
||||
safe_log_features(
|
||||
|
|
|
|||
|
|
@ -181,6 +181,18 @@ x ="""
|
|||
assert result == expected
|
||||
|
||||
|
||||
def test_extract_code_block_with_filepath_annotation() -> None:
|
||||
text = "```python:src/main.py\ndef foo(): pass\n```"
|
||||
result = extract_code_block(text)
|
||||
assert result == "def foo(): pass"
|
||||
|
||||
|
||||
def test_extract_code_block_with_filepath_annotation_fallback() -> None:
|
||||
text = "```python:src/main.py\ndef foo(): pass"
|
||||
result = extract_code_block(text)
|
||||
assert result == "def foo(): pass"
|
||||
|
||||
|
||||
def test_extract_code_block_nested_code_fence_in_triple_quote() -> None:
|
||||
# LLM embeds function definition in a triple-quoted string containing ```
|
||||
text = '```python\nimport pytest\n_source = """```python:file.py\ndef foo(): pass\n```"""\ndef test_foo():\n assert True\n```'
|
||||
|
|
|
|||
Loading…
Reference in a new issue