fix: prevent context blowup by redacting thinking blocks between rounds

Thinking blocks from previous tool rounds (10-50KB each) were accumulating in conversation history, causing Azure AI Foundry to hang after 4+ rounds. Redact thinking content before each API call while preserving required block structure. Also adds per-round timeout safety net and status indicators between rounds.
2026-05-04 18:25:18 +00:00 · 2026-02-15 02:12:48 -05:00 · 2026-02-15 02:12:48 -05:00 · ae2bff113d
commit ae2bff113d
parent fbcc283e97
2 changed files with 42 additions and 5 deletions
--- a/js/cf-webapp/src/app/api/observability/chat/route.ts
+++ b/js/cf-webapp/src/app/api/observability/chat/route.ts
@ -16,6 +16,7 @@ interface ChatMessage {

 const MAX_TOOL_ROUNDS = 15
 const KEEPALIVE_INTERVAL_MS = 15_000
+const ROUND_TIMEOUT_MS = 3 * 60_000 // 3 minutes per API round

 const TOOL_DISPLAY_NAMES: Record<string, string> = {
  get_original_code: "Reading original code",
@ -183,15 +184,35 @@ export async function POST(request: NextRequest): Promise<Response> {
        // Unified agent loop — each iteration either processes tool calls or
        // extracts the final text. Uses stream()+finalMessage() to avoid the
        // SDK's non-streaming timeout (max_tokens 32k estimates >10min).
-        // No separate "final streaming call" — when the loop gets a non-tool
-        // response, it extracts text directly, saving an extra API round-trip.
+        // Each round has a timeout to catch silent connection drops from the
+        // Azure AI Foundry proxy. Thinking blocks from older rounds are
+        // redacted (emptied) before each call to keep context size manageable.
        let toolRounds = 0
        while (toolRounds <= MAX_TOOL_ROUNDS) {
+          enqueue(`data: ${JSON.stringify({ type: "status", message: toolRounds === 0 ? "Thinking…" : "Analyzing…" })}\n\n`)
+
+          // Redact thinking content from previous assistant messages to prevent
+          // context blowup. The API requires the block structure but allows
+          // empty content. Each round's thinking can be 10-50KB.
+          for (const msg of conversationMessages) {
+            if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
+            for (const block of msg.content) {
+              if ((block as { type: string }).type === "thinking") {
+                (block as { thinking: string }).thinking = ""
+              }
+            }
+          }
+
          const messageStream = client.messages.stream(baseParams(systemPrompt, conversationMessages))
-          const response = await messageStream.finalMessage()
+          const timeout = setTimeout(() => messageStream.abort(), ROUND_TIMEOUT_MS)
+          let response: Anthropic.Message
+          try {
+            response = await messageStream.finalMessage()
+          } finally {
+            clearTimeout(timeout)
+          }

          if (response.stop_reason !== "tool_use") {
-            // Final response — extract text blocks and send to client
            for (const block of response.content) {
              if (block.type === "text") {
                enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
--- a/js/cf-webapp/src/app/observability/components/timeline-chat.tsx
+++ b/js/cf-webapp/src/app/observability/components/timeline-chat.tsx
@ -34,6 +34,7 @@ export const TimelineChat = memo(function TimelineChat({
  const [isStreaming, setIsStreaming] = useState(false)
  const [completedRounds, setCompletedRounds] = useState<ToolStep[][]>([])
  const [activeSteps, setActiveSteps] = useState<ToolStep[]>([])
+  const [statusMessage, setStatusMessage] = useState<string | null>(null)
  const messagesEndRef = useRef<HTMLDivElement>(null)
  const inputRef = useRef<HTMLTextAreaElement>(null)
  const abortRef = useRef<AbortController | null>(null)
@ -44,7 +45,7 @@ export const TimelineChat = memo(function TimelineChat({

  useEffect(() => {
    scrollToBottom()
-  }, [messages, completedRounds, activeSteps, scrollToBottom])
+  }, [messages, completedRounds, activeSteps, statusMessage, scrollToBottom])

  useEffect(() => {
    if (isOpen) {
@ -104,8 +105,14 @@ export const TimelineChat = memo(function TimelineChat({
          try {
            const parsed = JSON.parse(data)

+            if (parsed.type === "status") {
+              setStatusMessage(parsed.message ?? null)
+              continue
+            }
+
            // Handle typed events (new protocol)
            if (parsed.type === "tool_start") {
+              setStatusMessage(null)
              setActiveSteps((prev) => {
                // If all previous steps are done, this is a new round — commit previous steps
                if (prev.length > 0 && prev.every((s) => s.status === "done")) {
@ -142,6 +149,7 @@ export const TimelineChat = memo(function TimelineChat({
            // Handle text — both new {type: "text", text} and old {text} formats
            const textContent = parsed.type === "text" ? parsed.text : parsed.text
            if (textContent) {
+              setStatusMessage(null)
              setMessages((prev) => {
                const updated = [...prev]
                const last = updated[updated.length - 1]
@ -185,6 +193,7 @@ export const TimelineChat = memo(function TimelineChat({
        }
        return []
      })
+      setStatusMessage(null)
      setIsStreaming(false)
      abortRef.current = null
    }
@ -271,6 +280,13 @@ export const TimelineChat = memo(function TimelineChat({

        {activeSteps.length > 0 && <ToolRoundBubble steps={activeSteps} isActive />}

+        {statusMessage && (
+          <div className="flex items-center gap-2 text-xs text-zinc-400 dark:text-zinc-500 px-2">
+            <Loader2 className="h-3 w-3 animate-spin" />
+            <span>{statusMessage}</span>
+          </div>
+        )}
+
        <div ref={messagesEndRef} />
      </div>