fix: guarantee text response when agent loop produces only thinking blocks

Remove MAX_TOOL_ROUNDS cap so the model decides when to stop calling
tools. Add a safety net that makes a final tool-free API call if the
loop ends without emitting any visible text, fixing empty assistant
bubbles. Clean up redundant comments.
This commit is contained in:
Kevin Turcios 2026-02-15 02:36:49 -05:00
parent 870968e7a7
commit a3f9c655f9

View file

@ -14,7 +14,6 @@ interface ChatMessage {
content: string
}
const MAX_TOOL_ROUNDS = 15
const KEEPALIVE_INTERVAL_MS = 15_000
const ROUND_TIMEOUT_MS = 3 * 60_000 // 3 minutes per API round
@ -76,7 +75,6 @@ async function processToolCalls(
)
if (toolUseBlocks.length === 0) return []
// Emit tool_start events for all tools
for (const block of toolUseBlocks) {
enqueue(
`data: ${JSON.stringify({
@ -87,7 +85,6 @@ async function processToolCalls(
)
}
// Execute all tool calls in parallel
const results = await Promise.all(
toolUseBlocks.map(async (block) => {
const result = await resolveToolCall(
@ -116,10 +113,6 @@ async function processToolCalls(
return results
}
// Shared base params for both streaming and non-streaming calls.
// Adaptive thinking lets Claude decide how much reasoning is needed per request.
// On Opus 4.6, this automatically enables interleaved thinking (thinking between
// tool calls) without needing a beta header.
function baseParams(
systemPrompt: string,
conversationMessages: Anthropic.MessageParam[],
@ -181,19 +174,13 @@ export async function POST(request: NextRequest): Promise<Response> {
const keepalive = setInterval(() => enqueue(": keepalive\n\n"), KEEPALIVE_INTERVAL_MS)
try {
// Unified agent loop — each iteration either processes tool calls or
// extracts the final text. Uses stream()+finalMessage() to avoid the
// SDK's non-streaming timeout (max_tokens 32k estimates >10min).
// Each round has a timeout to catch silent connection drops from the
// Azure AI Foundry proxy. Thinking blocks from older rounds are
// redacted (emptied) before each call to keep context size manageable.
let toolRounds = 0
while (toolRounds <= MAX_TOOL_ROUNDS) {
let emittedText = false
// eslint-disable-next-line no-constant-condition
while (true) {
enqueue(`data: ${JSON.stringify({ type: "status", message: toolRounds === 0 ? "Thinking…" : "Analyzing…" })}\n\n`)
// Redact thinking content from previous assistant messages to prevent
// context blowup. The API requires the block structure but allows
// empty content. Each round's thinking can be 10-50KB.
// Redact thinking blocks from prior rounds (each can be 10-50KB)
for (const msg of conversationMessages) {
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
for (const block of msg.content) {
@ -214,8 +201,9 @@ export async function POST(request: NextRequest): Promise<Response> {
if (response.stop_reason !== "tool_use") {
for (const block of response.content) {
if (block.type === "text") {
if (block.type === "text" && block.text) {
enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
emittedText = true
}
}
break
@ -227,6 +215,34 @@ export async function POST(request: NextRequest): Promise<Response> {
toolRounds++
}
// Force a text response if the model only produced thinking blocks
if (!emittedText) {
enqueue(`data: ${JSON.stringify({ type: "status", message: "Summarizing…" })}\n\n`)
for (const msg of conversationMessages) {
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
for (const block of msg.content) {
if ((block as { type: string }).type === "thinking") {
(block as { thinking: string }).thinking = ""
}
}
}
const { tools: _, ...noToolsParams } = baseParams(systemPrompt, conversationMessages)
const finalStream = client.messages.stream(noToolsParams)
const timeout = setTimeout(() => finalStream.abort(), ROUND_TIMEOUT_MS)
try {
const finalResponse = await finalStream.finalMessage()
for (const block of finalResponse.content) {
if (block.type === "text") {
enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
}
}
} finally {
clearTimeout(timeout)
}
}
enqueue("data: [DONE]\n\n")
} catch (err) {
const message = err instanceof Anthropic.APIError