fix: guarantee text response when agent loop produces only thinking blocks

Remove MAX_TOOL_ROUNDS cap so the model decides when to stop calling tools. Add a safety net that makes a final tool-free API call if the loop ends without emitting any visible text, fixing empty assistant bubbles. Clean up redundant comments.
2026-02-15 02:36:49 -05:00 · 2026-02-15 02:36:49 -05:00 · a3f9c655f9
commit a3f9c655f9
parent 870968e7a7
1 changed files with 34 additions and 18 deletions
--- a/js/cf-webapp/src/app/api/observability/chat/route.ts
+++ b/js/cf-webapp/src/app/api/observability/chat/route.ts
@ -14,7 +14,6 @@ interface ChatMessage {
  content: string
 }

-const MAX_TOOL_ROUNDS = 15
 const KEEPALIVE_INTERVAL_MS = 15_000
 const ROUND_TIMEOUT_MS = 3 * 60_000 // 3 minutes per API round

@ -76,7 +75,6 @@ async function processToolCalls(
  )
  if (toolUseBlocks.length === 0) return []

-  // Emit tool_start events for all tools
  for (const block of toolUseBlocks) {
    enqueue(
      `data: ${JSON.stringify({
@ -87,7 +85,6 @@ async function processToolCalls(
    )
  }

-  // Execute all tool calls in parallel
  const results = await Promise.all(
    toolUseBlocks.map(async (block) => {
      const result = await resolveToolCall(
@ -116,10 +113,6 @@ async function processToolCalls(
  return results
 }

-// Shared base params for both streaming and non-streaming calls.
-// Adaptive thinking lets Claude decide how much reasoning is needed per request.
-// On Opus 4.6, this automatically enables interleaved thinking (thinking between
-// tool calls) without needing a beta header.
 function baseParams(
  systemPrompt: string,
  conversationMessages: Anthropic.MessageParam[],
@ -181,19 +174,13 @@ export async function POST(request: NextRequest): Promise<Response> {
      const keepalive = setInterval(() => enqueue(": keepalive\n\n"), KEEPALIVE_INTERVAL_MS)

      try {
-        // Unified agent loop — each iteration either processes tool calls or
-        // extracts the final text. Uses stream()+finalMessage() to avoid the
-        // SDK's non-streaming timeout (max_tokens 32k estimates >10min).
-        // Each round has a timeout to catch silent connection drops from the
-        // Azure AI Foundry proxy. Thinking blocks from older rounds are
-        // redacted (emptied) before each call to keep context size manageable.
        let toolRounds = 0
-        while (toolRounds <= MAX_TOOL_ROUNDS) {
+        let emittedText = false
+        // eslint-disable-next-line no-constant-condition
+        while (true) {
          enqueue(`data: ${JSON.stringify({ type: "status", message: toolRounds === 0 ? "Thinking…" : "Analyzing…" })}\n\n`)

-          // Redact thinking content from previous assistant messages to prevent
-          // context blowup. The API requires the block structure but allows
-          // empty content. Each round's thinking can be 10-50KB.
+          // Redact thinking blocks from prior rounds (each can be 10-50KB)
          for (const msg of conversationMessages) {
            if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
            for (const block of msg.content) {
@ -214,8 +201,9 @@ export async function POST(request: NextRequest): Promise<Response> {

          if (response.stop_reason !== "tool_use") {
            for (const block of response.content) {
-              if (block.type === "text") {
+              if (block.type === "text" && block.text) {
                enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
+                emittedText = true
              }
            }
            break
@ -227,6 +215,34 @@ export async function POST(request: NextRequest): Promise<Response> {
          toolRounds++
        }

+        // Force a text response if the model only produced thinking blocks
+        if (!emittedText) {
+          enqueue(`data: ${JSON.stringify({ type: "status", message: "Summarizing…" })}\n\n`)
+
+          for (const msg of conversationMessages) {
+            if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
+            for (const block of msg.content) {
+              if ((block as { type: string }).type === "thinking") {
+                (block as { thinking: string }).thinking = ""
+              }
+            }
+          }
+
+          const { tools: _, ...noToolsParams } = baseParams(systemPrompt, conversationMessages)
+          const finalStream = client.messages.stream(noToolsParams)
+          const timeout = setTimeout(() => finalStream.abort(), ROUND_TIMEOUT_MS)
+          try {
+            const finalResponse = await finalStream.finalMessage()
+            for (const block of finalResponse.content) {
+              if (block.type === "text") {
+                enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
+              }
+            }
+          } finally {
+            clearTimeout(timeout)
+          }
+        }
+
        enqueue("data: [DONE]\n\n")
      } catch (err) {
        const message = err instanceof Anthropic.APIError