Merge pull request #2548 from codeflash-ai/fix/llm-close-errors

Fix: Handle LLM client close() errors gracefully
2026-05-04 18:25:18 +00:00 · 2026-04-03 14:37:35 -07:00 · 2026-04-03 14:37:35 -07:00 · c54904daf9
commit c54904daf9
parent b814e1e7e6 681e8187ca
2 changed files with 134 additions and 2 deletions
--- a/django/aiservice/aiservice/llm.py
+++ b/django/aiservice/aiservice/llm.py
@ -93,10 +93,21 @@ class LLMClient:
        loop = asyncio.get_running_loop()
        if loop is not self.client_loop:
            # Close old clients to prevent connection leaks and event loop closure errors
+            # Ignore errors if the client is already closed or the transport is in a bad state
            if self.openai_client is not None:
-                await self.openai_client.close()
+                try:
+                    await self.openai_client.close()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to close OpenAI client (already closed or transport error): %s", type(e).__name__
+                    )
            if self.anthropic_client is not None:
-                await self.anthropic_client.close()
+                try:
+                    await self.anthropic_client.close()
+                except Exception as e:
+                    logger.debug(
+                        "Failed to close Anthropic client (already closed or transport error): %s", type(e).__name__
+                    )

            self.client_loop = loop
            self.background_tasks = set()
--- a/django/aiservice/aiservice/tests/test_llm_client_close.py
+++ b/django/aiservice/aiservice/tests/test_llm_client_close.py
@ -0,0 +1,121 @@
+"""
+Test for LLM client close() error handling.
+
+This test verifies that the LLMClient handles exceptions gracefully when
+closing clients during event loop changes, preventing 500 errors.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+class TestLLMClientClose:
+    """Test LLMClient handles close() failures gracefully"""
+
+    @pytest.mark.asyncio
+    async def test_close_handles_transport_errors(self) -> None:
+        """
+        Test that client close() errors are caught and don't propagate.
+
+        This reproduces the bug where httpx transport errors during close()
+        cause 500 errors on /ai/optimization_review.
+        """
+        from aiservice.llm import LLMClient
+        from aiservice.llm_models import OpenAI_GPT_4_1
+
+        # Create a mock client that fails on close()
+        mock_openai = AsyncMock()
+        mock_openai.close = AsyncMock(side_effect=RuntimeError("Transport already closed"))
+
+        # Patch the client creation
+        with (
+            patch("aiservice.llm.AsyncAzureOpenAI", return_value=mock_openai),
+            patch("aiservice.llm.has_openai", True),
+            patch("aiservice.llm.has_anthropic", False),
+        ):
+            client = LLMClient()
+
+            # First call - creates clients
+            with patch.object(client, "call_openai", AsyncMock()):
+                try:
+                    await client.call(
+                        llm=OpenAI_GPT_4_1(),
+                        messages=[{"role": "user", "content": "test"}],
+                        call_type="test",
+                        trace_id="test-trace-1",
+                    )
+                except Exception:
+                    pass  # Ignore call failures, we only care about close()
+
+            # Force event loop change detection
+            client.client_loop = None
+
+            # Second call - should try to close old client and handle errors gracefully
+            with patch.object(client, "call_openai", AsyncMock()):
+                # This should NOT raise RuntimeError from close()
+                try:
+                    await client.call(
+                        llm=OpenAI_GPT_4_1(),
+                        messages=[{"role": "user", "content": "test"}],
+                        call_type="test",
+                        trace_id="test-trace-2",
+                    )
+                    # If we get here without exception, the bug is fixed
+                    assert True
+                except RuntimeError as e:
+                    if "Transport already closed" in str(e):
+                        pytest.fail(f"close() error was not handled: {e}. This causes 500 errors in production.")
+                    raise
+
+    @pytest.mark.asyncio
+    async def test_close_handles_event_loop_closed_error(self) -> None:
+        """
+        Test that event loop closed errors during close() are handled.
+
+        This handles the case where the event loop is closed before we
+        try to close the client.
+        """
+        from aiservice.llm import LLMClient
+        from aiservice.llm_models import OpenAI_GPT_4_1
+
+        mock_openai = AsyncMock()
+        mock_openai.close = AsyncMock(side_effect=RuntimeError("Event loop is closed"))
+
+        with (
+            patch("aiservice.llm.AsyncAzureOpenAI", return_value=mock_openai),
+            patch("aiservice.llm.has_openai", True),
+            patch("aiservice.llm.has_anthropic", False),
+        ):
+            client = LLMClient()
+
+            # Create client
+            with patch.object(client, "call_openai", AsyncMock()):
+                try:
+                    await client.call(
+                        llm=OpenAI_GPT_4_1(),
+                        messages=[{"role": "user", "content": "test"}],
+                        call_type="test",
+                        trace_id="test-trace-1",
+                    )
+                except Exception:
+                    pass
+
+            # Force event loop change
+            client.client_loop = None
+
+            # Second call should handle close error
+            with patch.object(client, "call_openai", AsyncMock()):
+                try:
+                    await client.call(
+                        llm=OpenAI_GPT_4_1(),
+                        messages=[{"role": "user", "content": "test"}],
+                        call_type="test",
+                        trace_id="test-trace-2",
+                    )
+                    assert True
+                except RuntimeError as e:
+                    if "Event loop is closed" in str(e):
+                        pytest.fail(f"Event loop error was not handled: {e}")
+                    raise