fix: plug memory leak from LogRecord buffering and unblock async event loop (#2523)
## Summary - **Memory leak fix**: Added explicit `LOGGING` config in `settings.py` to prevent unbounded `LogRecord` buffering. Django's `django.request` logger creates WARNING records for 4xx responses with the full `ASGIRequest` (headers, body, payload) pinned in `args`. Without explicit config, Django's default handlers and Sentry's `enable_logs=True` buffer these indefinitely. Setting `django.request` to ERROR level + removing `enable_logs=True` eliminated the leak — load testing showed **84% reduction** in per-request memory growth (7.4 → 1.2 KiB/req). - **Async event loop fix**: Wrapped `parse_and_generate_candidate_schema()` in `asyncio.to_thread()` across all 4 async callers (optimizer, optimizer_line_profiler, jit_rewrite, adaptive_optimizer). This offloads the synchronous libcst parsing + 8-stage postprocessing pipeline to the thread pool, preventing it from blocking the event loop during peak traffic. ## Test plan - [x] All 550 tests pass (`uv run pytest tests/ --ignore=tests/profiling -x -q`) - [ ] Monitor Azure memory alerts after deploy — expect significant reduction in memory growth rate - [ ] Monitor 5xx error rate during peak traffic — expect reduction from event loop no longer blocked by sync postprocessing --------- Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
df90110fe8
commit
d504f111a7
5 changed files with 31 additions and 15 deletions
|
|
@ -118,11 +118,26 @@ STATIC_URL: str = "static/"
|
|||
|
||||
DEFAULT_AUTO_FIELD: str = "django.db.models.BigAutoField"
|
||||
|
||||
# Logging — explicit config prevents unbounded record buffering.
|
||||
# Django's default adds AdminEmailHandler which buffers; Sentry's LoggingIntegration
|
||||
# adds handlers that capture every record. StreamHandler writes + flushes immediately.
|
||||
# django.request at ERROR skips 4xx WARNING logs whose args pin the full ASGIRequest
|
||||
# (headers, body, payload) in memory for the lifetime of the LogRecord.
|
||||
LOGGING: dict[str, object] = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"handlers": {"console": {"class": "logging.StreamHandler"}},
|
||||
"root": {"handlers": ["console"], "level": "WARNING"},
|
||||
"loggers": {
|
||||
"django": {"handlers": ["console"], "level": "INFO", "propagate": False},
|
||||
"django.request": {"level": "ERROR"},
|
||||
},
|
||||
}
|
||||
|
||||
# Sentry
|
||||
if os.environ.get("ENVIRONMENT", default="") == "PRODUCTION":
|
||||
sentry_sdk.init(
|
||||
dsn="https://8a857cbf974ca889a46c1b39173db44b@o4506833230561280.ingest.sentry.io/4506833234493440",
|
||||
traces_sample_rate=0.1,
|
||||
profiles_sample_rate=0.01,
|
||||
enable_logs=True,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ async def perform_adaptive_optimize(
|
|||
|
||||
try:
|
||||
ctx.extract_code_and_explanation_from_llm_res(llm_res)
|
||||
new_opt = ctx.parse_and_generate_candidate_schema()
|
||||
new_opt = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
|
||||
if not new_opt or not ctx.is_valid_code():
|
||||
extracted_code = ctx.extracted_code_and_expl.code if ctx.extracted_code_and_expl else None
|
||||
return (None, None, AdaptiveOptErrorResponseSchema(error="Invalid code generated " + str(extracted_code)))
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ async def jit_rewrite_python_code_single(
|
|||
)
|
||||
ctx.extract_code_and_explanation_from_llm_res(output.content)
|
||||
try:
|
||||
res = ctx.parse_and_generate_candidate_schema()
|
||||
res = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
|
||||
if res is not None and ctx.is_valid_code():
|
||||
return res, llm_cost, jit_rewrite_model.name
|
||||
except (ValueError, ValidationError, cst.ParserSyntaxError) as e:
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ async def generate_optimization_candidate(
|
|||
|
||||
ctx.extract_code_and_explanation_from_llm_res(output.content)
|
||||
try:
|
||||
res = ctx.parse_and_generate_candidate_schema()
|
||||
res = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
|
||||
if res is not None and ctx.is_valid_code():
|
||||
return res, llm_cost, optimize_model.name
|
||||
except (ValueError, ValidationError, cst.ParserSyntaxError) as e:
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
|||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from ninja import NinjaAPI
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
||||
|
|
@ -31,6 +31,7 @@ from core.shared.optimizer_schemas import (
|
|||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from authapp.auth import AuthenticatedRequest
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
|
||||
from aiservice.llm_models import LLM
|
||||
|
|
@ -74,7 +75,7 @@ async def optimize_python_code_line_profiler_single(
|
|||
debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
|
||||
# TODO: Verify if the context window length is within the model capability
|
||||
|
||||
obs_context: dict = {}
|
||||
obs_context: dict[str, Any] = {}
|
||||
if call_sequence is not None:
|
||||
obs_context["call_sequence"] = call_sequence
|
||||
|
||||
|
|
@ -104,7 +105,7 @@ async def optimize_python_code_line_profiler_single(
|
|||
)
|
||||
|
||||
ctx.extract_code_and_explanation_from_llm_res(output.content)
|
||||
res = ctx.parse_and_generate_candidate_schema()
|
||||
res = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
|
||||
if res is not None and ctx.is_valid_code():
|
||||
return res, llm_cost, optimize_model.name
|
||||
|
||||
|
|
@ -120,7 +121,7 @@ async def optimize_python_code_line_profiler(
|
|||
dependency_code: str | None = None,
|
||||
n_candidates: int = 0,
|
||||
python_version: tuple[int, int, int] = (3, 12, 9),
|
||||
) -> tuple[list[OptimizeResponseItemSchema], float, dict[str, dict], dict[str, str]]:
|
||||
) -> tuple[list[OptimizeResponseItemSchema], float, dict[str, dict[str, str]], dict[str, str]]:
|
||||
"""Run parallel line profiler optimizations with multiple models.
|
||||
|
||||
Returns:
|
||||
|
|
@ -167,7 +168,7 @@ async def optimize_python_code_line_profiler(
|
|||
# Collect results
|
||||
optimization_results: list[OptimizeResponseItemSchema] = []
|
||||
total_cost = 0.0
|
||||
code_and_explanations: dict[str, dict] = {}
|
||||
code_and_explanations: dict[str, dict[str, str]] = {}
|
||||
optimization_models: dict[str, str] = {}
|
||||
|
||||
for task, task_ctx in tasks:
|
||||
|
|
@ -187,7 +188,9 @@ async def optimize_python_code_line_profiler(
|
|||
@optimize_line_profiler_api.post(
|
||||
"/", response={200: OptimizeResponseSchema, 400: OptimizeErrorResponseSchema, 500: OptimizeErrorResponseSchema}
|
||||
)
|
||||
async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeResponseSchema | OptimizeErrorResponseSchema]: # noqa: ANN001
|
||||
async def optimize(
|
||||
request: AuthenticatedRequest, data: OptimizeSchemaLP
|
||||
) -> tuple[int, OptimizeResponseSchema | OptimizeErrorResponseSchema]:
|
||||
if data.rerun_trace_id:
|
||||
from core.shared.replay import get_rerun_record, rerun_optimize # noqa: PLC0415
|
||||
|
||||
|
|
@ -278,7 +281,7 @@ async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeRespon
|
|||
n_candidates=data.n_candidates,
|
||||
)
|
||||
# JavaScript path doesn't have code_and_explanations dict like Python
|
||||
code_and_explanations: dict[str, dict] = {}
|
||||
code_and_explanations: dict[str, dict[str, str]] = {}
|
||||
|
||||
elif language == "java":
|
||||
# Java path
|
||||
|
|
@ -329,11 +332,9 @@ async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeRespon
|
|||
system_prompt = SYSTEM_PROMPT
|
||||
if data.is_numerical_code:
|
||||
system_prompt += f"\n{JIT_INSTRUCTIONS}\n"
|
||||
ctx: BaseOptimizerContext = BaseOptimizerContext.get_dynamic_context(
|
||||
system_prompt, USER_PROMPT, data.source_code, DiffMethod.NO_DIFF
|
||||
)
|
||||
ctx = BaseOptimizerContext.get_dynamic_context(system_prompt, USER_PROMPT, data.source_code, DiffMethod.NO_DIFF)
|
||||
try:
|
||||
python_version: tuple[int, int, int] = parse_python_version(data.python_version or "3.12.0")
|
||||
python_version = parse_python_version(data.python_version or "3.12.0")
|
||||
except: # noqa: E722
|
||||
return 400, OptimizeErrorResponseSchema(
|
||||
error="Invalid Python version, it should look like 3.x.x. We only support Python 3.9 and above."
|
||||
|
|
|
|||
Loading…
Reference in a new issue