mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
fix: guarantee text response when agent loop produces only thinking blocks
Remove MAX_TOOL_ROUNDS cap so the model decides when to stop calling tools. Add a safety net that makes a final tool-free API call if the loop ends without emitting any visible text, fixing empty assistant bubbles. Clean up redundant comments.
This commit is contained in:
parent
870968e7a7
commit
a3f9c655f9
1 changed files with 34 additions and 18 deletions
|
|
@ -14,7 +14,6 @@ interface ChatMessage {
|
|||
content: string
|
||||
}
|
||||
|
||||
const MAX_TOOL_ROUNDS = 15
|
||||
const KEEPALIVE_INTERVAL_MS = 15_000
|
||||
const ROUND_TIMEOUT_MS = 3 * 60_000 // 3 minutes per API round
|
||||
|
||||
|
|
@ -76,7 +75,6 @@ async function processToolCalls(
|
|||
)
|
||||
if (toolUseBlocks.length === 0) return []
|
||||
|
||||
// Emit tool_start events for all tools
|
||||
for (const block of toolUseBlocks) {
|
||||
enqueue(
|
||||
`data: ${JSON.stringify({
|
||||
|
|
@ -87,7 +85,6 @@ async function processToolCalls(
|
|||
)
|
||||
}
|
||||
|
||||
// Execute all tool calls in parallel
|
||||
const results = await Promise.all(
|
||||
toolUseBlocks.map(async (block) => {
|
||||
const result = await resolveToolCall(
|
||||
|
|
@ -116,10 +113,6 @@ async function processToolCalls(
|
|||
return results
|
||||
}
|
||||
|
||||
// Shared base params for both streaming and non-streaming calls.
|
||||
// Adaptive thinking lets Claude decide how much reasoning is needed per request.
|
||||
// On Opus 4.6, this automatically enables interleaved thinking (thinking between
|
||||
// tool calls) without needing a beta header.
|
||||
function baseParams(
|
||||
systemPrompt: string,
|
||||
conversationMessages: Anthropic.MessageParam[],
|
||||
|
|
@ -181,19 +174,13 @@ export async function POST(request: NextRequest): Promise<Response> {
|
|||
const keepalive = setInterval(() => enqueue(": keepalive\n\n"), KEEPALIVE_INTERVAL_MS)
|
||||
|
||||
try {
|
||||
// Unified agent loop — each iteration either processes tool calls or
|
||||
// extracts the final text. Uses stream()+finalMessage() to avoid the
|
||||
// SDK's non-streaming timeout (max_tokens 32k estimates >10min).
|
||||
// Each round has a timeout to catch silent connection drops from the
|
||||
// Azure AI Foundry proxy. Thinking blocks from older rounds are
|
||||
// redacted (emptied) before each call to keep context size manageable.
|
||||
let toolRounds = 0
|
||||
while (toolRounds <= MAX_TOOL_ROUNDS) {
|
||||
let emittedText = false
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
enqueue(`data: ${JSON.stringify({ type: "status", message: toolRounds === 0 ? "Thinking…" : "Analyzing…" })}\n\n`)
|
||||
|
||||
// Redact thinking content from previous assistant messages to prevent
|
||||
// context blowup. The API requires the block structure but allows
|
||||
// empty content. Each round's thinking can be 10-50KB.
|
||||
// Redact thinking blocks from prior rounds (each can be 10-50KB)
|
||||
for (const msg of conversationMessages) {
|
||||
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
|
||||
for (const block of msg.content) {
|
||||
|
|
@ -214,8 +201,9 @@ export async function POST(request: NextRequest): Promise<Response> {
|
|||
|
||||
if (response.stop_reason !== "tool_use") {
|
||||
for (const block of response.content) {
|
||||
if (block.type === "text") {
|
||||
if (block.type === "text" && block.text) {
|
||||
enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
|
||||
emittedText = true
|
||||
}
|
||||
}
|
||||
break
|
||||
|
|
@ -227,6 +215,34 @@ export async function POST(request: NextRequest): Promise<Response> {
|
|||
toolRounds++
|
||||
}
|
||||
|
||||
// Force a text response if the model only produced thinking blocks
|
||||
if (!emittedText) {
|
||||
enqueue(`data: ${JSON.stringify({ type: "status", message: "Summarizing…" })}\n\n`)
|
||||
|
||||
for (const msg of conversationMessages) {
|
||||
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
|
||||
for (const block of msg.content) {
|
||||
if ((block as { type: string }).type === "thinking") {
|
||||
(block as { thinking: string }).thinking = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const { tools: _, ...noToolsParams } = baseParams(systemPrompt, conversationMessages)
|
||||
const finalStream = client.messages.stream(noToolsParams)
|
||||
const timeout = setTimeout(() => finalStream.abort(), ROUND_TIMEOUT_MS)
|
||||
try {
|
||||
const finalResponse = await finalStream.finalMessage()
|
||||
for (const block of finalResponse.content) {
|
||||
if (block.type === "text") {
|
||||
enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
}
|
||||
|
||||
enqueue("data: [DONE]\n\n")
|
||||
} catch (err) {
|
||||
const message = err instanceof Anthropic.APIError
|
||||
|
|
|
|||
Loading…
Reference in a new issue