fix: plug memory leak from LogRecord buffering and unblock async event loop (#2523)

## Summary

- **Memory leak fix**: Added explicit `LOGGING` config in `settings.py`
to prevent unbounded `LogRecord` buffering. Django's `django.request`
logger creates WARNING records for 4xx responses with the full
`ASGIRequest` (headers, body, payload) pinned in `args`. Without
explicit config, Django's default handlers and Sentry's
`enable_logs=True` buffer these indefinitely. Setting `django.request`
to ERROR level + removing `enable_logs=True` eliminated the leak — load
testing showed **84% reduction** in per-request memory growth (7.4 → 1.2
KiB/req).

- **Async event loop fix**: Wrapped
`parse_and_generate_candidate_schema()` in `asyncio.to_thread()` across
all 4 async callers (optimizer, optimizer_line_profiler, jit_rewrite,
adaptive_optimizer). This offloads the synchronous libcst parsing +
8-stage postprocessing pipeline to the thread pool, preventing it from
blocking the event loop during peak traffic.

## Test plan

- [x] All 550 tests pass (`uv run pytest tests/ --ignore=tests/profiling
-x -q`)
- [ ] Monitor Azure memory alerts after deploy — expect significant
reduction in memory growth rate
- [ ] Monitor 5xx error rate during peak traffic — expect reduction from
event loop no longer blocked by sync postprocessing

---------

Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Kevin Turcios 2026-04-02 10:57:58 -05:00 committed by GitHub
parent df90110fe8
commit d504f111a7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 31 additions and 15 deletions

View file

@ -118,11 +118,26 @@ STATIC_URL: str = "static/"
DEFAULT_AUTO_FIELD: str = "django.db.models.BigAutoField"
# Logging — explicit config prevents unbounded record buffering.
# Django's default adds AdminEmailHandler which buffers; Sentry's LoggingIntegration
# adds handlers that capture every record. StreamHandler writes + flushes immediately.
# django.request at ERROR skips 4xx WARNING logs whose args pin the full ASGIRequest
# (headers, body, payload) in memory for the lifetime of the LogRecord.
LOGGING: dict[str, object] = {
"version": 1,
"disable_existing_loggers": False,
"handlers": {"console": {"class": "logging.StreamHandler"}},
"root": {"handlers": ["console"], "level": "WARNING"},
"loggers": {
"django": {"handlers": ["console"], "level": "INFO", "propagate": False},
"django.request": {"level": "ERROR"},
},
}
# Sentry
if os.environ.get("ENVIRONMENT", default="") == "PRODUCTION":
sentry_sdk.init(
dsn="https://8a857cbf974ca889a46c1b39173db44b@o4506833230561280.ingest.sentry.io/4506833234493440",
traces_sample_rate=0.1,
profiles_sample_rate=0.01,
enable_logs=True,
)

View file

@ -83,7 +83,7 @@ async def perform_adaptive_optimize(
try:
ctx.extract_code_and_explanation_from_llm_res(llm_res)
new_opt = ctx.parse_and_generate_candidate_schema()
new_opt = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
if not new_opt or not ctx.is_valid_code():
extracted_code = ctx.extracted_code_and_expl.code if ctx.extracted_code_and_expl else None
return (None, None, AdaptiveOptErrorResponseSchema(error="Invalid code generated " + str(extracted_code)))

View file

@ -86,7 +86,7 @@ async def jit_rewrite_python_code_single(
)
ctx.extract_code_and_explanation_from_llm_res(output.content)
try:
res = ctx.parse_and_generate_candidate_schema()
res = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
if res is not None and ctx.is_valid_code():
return res, llm_cost, jit_rewrite_model.name
except (ValueError, ValidationError, cst.ParserSyntaxError) as e:

View file

@ -98,7 +98,7 @@ async def generate_optimization_candidate(
ctx.extract_code_and_explanation_from_llm_res(output.content)
try:
res = ctx.parse_and_generate_candidate_schema()
res = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
if res is not None and ctx.is_valid_code():
return res, llm_cost, optimize_model.name
except (ValueError, ValidationError, cst.ParserSyntaxError) as e:

View file

@ -3,7 +3,7 @@ from __future__ import annotations
import asyncio
import logging
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any
from ninja import NinjaAPI
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
@ -31,6 +31,7 @@ from core.shared.optimizer_schemas import (
)
if TYPE_CHECKING:
from authapp.auth import AuthenticatedRequest
from openai.types.chat import ChatCompletionMessageParam
from aiservice.llm_models import LLM
@ -74,7 +75,7 @@ async def optimize_python_code_line_profiler_single(
debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
# TODO: Verify if the context window length is within the model capability
obs_context: dict = {}
obs_context: dict[str, Any] = {}
if call_sequence is not None:
obs_context["call_sequence"] = call_sequence
@ -104,7 +105,7 @@ async def optimize_python_code_line_profiler_single(
)
ctx.extract_code_and_explanation_from_llm_res(output.content)
res = ctx.parse_and_generate_candidate_schema()
res = await asyncio.to_thread(ctx.parse_and_generate_candidate_schema)
if res is not None and ctx.is_valid_code():
return res, llm_cost, optimize_model.name
@ -120,7 +121,7 @@ async def optimize_python_code_line_profiler(
dependency_code: str | None = None,
n_candidates: int = 0,
python_version: tuple[int, int, int] = (3, 12, 9),
) -> tuple[list[OptimizeResponseItemSchema], float, dict[str, dict], dict[str, str]]:
) -> tuple[list[OptimizeResponseItemSchema], float, dict[str, dict[str, str]], dict[str, str]]:
"""Run parallel line profiler optimizations with multiple models.
Returns:
@ -167,7 +168,7 @@ async def optimize_python_code_line_profiler(
# Collect results
optimization_results: list[OptimizeResponseItemSchema] = []
total_cost = 0.0
code_and_explanations: dict[str, dict] = {}
code_and_explanations: dict[str, dict[str, str]] = {}
optimization_models: dict[str, str] = {}
for task, task_ctx in tasks:
@ -187,7 +188,9 @@ async def optimize_python_code_line_profiler(
@optimize_line_profiler_api.post(
"/", response={200: OptimizeResponseSchema, 400: OptimizeErrorResponseSchema, 500: OptimizeErrorResponseSchema}
)
async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeResponseSchema | OptimizeErrorResponseSchema]: # noqa: ANN001
async def optimize(
request: AuthenticatedRequest, data: OptimizeSchemaLP
) -> tuple[int, OptimizeResponseSchema | OptimizeErrorResponseSchema]:
if data.rerun_trace_id:
from core.shared.replay import get_rerun_record, rerun_optimize # noqa: PLC0415
@ -278,7 +281,7 @@ async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeRespon
n_candidates=data.n_candidates,
)
# JavaScript path doesn't have code_and_explanations dict like Python
code_and_explanations: dict[str, dict] = {}
code_and_explanations: dict[str, dict[str, str]] = {}
elif language == "java":
# Java path
@ -329,11 +332,9 @@ async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeRespon
system_prompt = SYSTEM_PROMPT
if data.is_numerical_code:
system_prompt += f"\n{JIT_INSTRUCTIONS}\n"
ctx: BaseOptimizerContext = BaseOptimizerContext.get_dynamic_context(
system_prompt, USER_PROMPT, data.source_code, DiffMethod.NO_DIFF
)
ctx = BaseOptimizerContext.get_dynamic_context(system_prompt, USER_PROMPT, data.source_code, DiffMethod.NO_DIFF)
try:
python_version: tuple[int, int, int] = parse_python_version(data.python_version or "3.12.0")
python_version = parse_python_version(data.python_version or "3.12.0")
except: # noqa: E722
return 400, OptimizeErrorResponseSchema(
error="Invalid Python version, it should look like 3.x.x. We only support Python 3.9 and above."