fix: prevent context blowup by redacting thinking blocks between rounds

Thinking blocks from previous tool rounds (10-50KB each) were
accumulating in conversation history, causing Azure AI Foundry to hang
after 4+ rounds. Redact thinking content before each API call while
preserving required block structure. Also adds per-round timeout safety
net and status indicators between rounds.
This commit is contained in:
Kevin Turcios 2026-02-15 02:12:48 -05:00
parent fbcc283e97
commit ae2bff113d
2 changed files with 42 additions and 5 deletions

View file

@ -16,6 +16,7 @@ interface ChatMessage {
const MAX_TOOL_ROUNDS = 15
const KEEPALIVE_INTERVAL_MS = 15_000
const ROUND_TIMEOUT_MS = 3 * 60_000 // 3 minutes per API round
const TOOL_DISPLAY_NAMES: Record<string, string> = {
get_original_code: "Reading original code",
@ -183,15 +184,35 @@ export async function POST(request: NextRequest): Promise<Response> {
// Unified agent loop — each iteration either processes tool calls or
// extracts the final text. Uses stream()+finalMessage() to avoid the
// SDK's non-streaming timeout (max_tokens 32k estimates >10min).
// No separate "final streaming call" — when the loop gets a non-tool
// response, it extracts text directly, saving an extra API round-trip.
// Each round has a timeout to catch silent connection drops from the
// Azure AI Foundry proxy. Thinking blocks from older rounds are
// redacted (emptied) before each call to keep context size manageable.
let toolRounds = 0
while (toolRounds <= MAX_TOOL_ROUNDS) {
enqueue(`data: ${JSON.stringify({ type: "status", message: toolRounds === 0 ? "Thinking…" : "Analyzing…" })}\n\n`)
// Redact thinking content from previous assistant messages to prevent
// context blowup. The API requires the block structure but allows
// empty content. Each round's thinking can be 10-50KB.
for (const msg of conversationMessages) {
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
for (const block of msg.content) {
if ((block as { type: string }).type === "thinking") {
(block as { thinking: string }).thinking = ""
}
}
}
const messageStream = client.messages.stream(baseParams(systemPrompt, conversationMessages))
const response = await messageStream.finalMessage()
const timeout = setTimeout(() => messageStream.abort(), ROUND_TIMEOUT_MS)
let response: Anthropic.Message
try {
response = await messageStream.finalMessage()
} finally {
clearTimeout(timeout)
}
if (response.stop_reason !== "tool_use") {
// Final response — extract text blocks and send to client
for (const block of response.content) {
if (block.type === "text") {
enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)

View file

@ -34,6 +34,7 @@ export const TimelineChat = memo(function TimelineChat({
const [isStreaming, setIsStreaming] = useState(false)
const [completedRounds, setCompletedRounds] = useState<ToolStep[][]>([])
const [activeSteps, setActiveSteps] = useState<ToolStep[]>([])
const [statusMessage, setStatusMessage] = useState<string | null>(null)
const messagesEndRef = useRef<HTMLDivElement>(null)
const inputRef = useRef<HTMLTextAreaElement>(null)
const abortRef = useRef<AbortController | null>(null)
@ -44,7 +45,7 @@ export const TimelineChat = memo(function TimelineChat({
useEffect(() => {
scrollToBottom()
}, [messages, completedRounds, activeSteps, scrollToBottom])
}, [messages, completedRounds, activeSteps, statusMessage, scrollToBottom])
useEffect(() => {
if (isOpen) {
@ -104,8 +105,14 @@ export const TimelineChat = memo(function TimelineChat({
try {
const parsed = JSON.parse(data)
if (parsed.type === "status") {
setStatusMessage(parsed.message ?? null)
continue
}
// Handle typed events (new protocol)
if (parsed.type === "tool_start") {
setStatusMessage(null)
setActiveSteps((prev) => {
// If all previous steps are done, this is a new round — commit previous steps
if (prev.length > 0 && prev.every((s) => s.status === "done")) {
@ -142,6 +149,7 @@ export const TimelineChat = memo(function TimelineChat({
// Handle text — both new {type: "text", text} and old {text} formats
const textContent = parsed.type === "text" ? parsed.text : parsed.text
if (textContent) {
setStatusMessage(null)
setMessages((prev) => {
const updated = [...prev]
const last = updated[updated.length - 1]
@ -185,6 +193,7 @@ export const TimelineChat = memo(function TimelineChat({
}
return []
})
setStatusMessage(null)
setIsStreaming(false)
abortRef.current = null
}
@ -271,6 +280,13 @@ export const TimelineChat = memo(function TimelineChat({
{activeSteps.length > 0 && <ToolRoundBubble steps={activeSteps} isActive />}
{statusMessage && (
<div className="flex items-center gap-2 text-xs text-zinc-400 dark:text-zinc-500 px-2">
<Loader2 className="h-3 w-3 animate-spin" />
<span>{statusMessage}</span>
</div>
)}
<div ref={messagesEndRef} />
</div>