mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
fix: prevent context blowup by redacting thinking blocks between rounds
Thinking blocks from previous tool rounds (10-50KB each) were accumulating in conversation history, causing Azure AI Foundry to hang after 4+ rounds. Redact thinking content before each API call while preserving required block structure. Also adds per-round timeout safety net and status indicators between rounds.
This commit is contained in:
parent
fbcc283e97
commit
ae2bff113d
2 changed files with 42 additions and 5 deletions
|
|
@ -16,6 +16,7 @@ interface ChatMessage {
|
|||
|
||||
const MAX_TOOL_ROUNDS = 15
|
||||
const KEEPALIVE_INTERVAL_MS = 15_000
|
||||
const ROUND_TIMEOUT_MS = 3 * 60_000 // 3 minutes per API round
|
||||
|
||||
const TOOL_DISPLAY_NAMES: Record<string, string> = {
|
||||
get_original_code: "Reading original code",
|
||||
|
|
@ -183,15 +184,35 @@ export async function POST(request: NextRequest): Promise<Response> {
|
|||
// Unified agent loop — each iteration either processes tool calls or
|
||||
// extracts the final text. Uses stream()+finalMessage() to avoid the
|
||||
// SDK's non-streaming timeout (max_tokens 32k estimates >10min).
|
||||
// No separate "final streaming call" — when the loop gets a non-tool
|
||||
// response, it extracts text directly, saving an extra API round-trip.
|
||||
// Each round has a timeout to catch silent connection drops from the
|
||||
// Azure AI Foundry proxy. Thinking blocks from older rounds are
|
||||
// redacted (emptied) before each call to keep context size manageable.
|
||||
let toolRounds = 0
|
||||
while (toolRounds <= MAX_TOOL_ROUNDS) {
|
||||
enqueue(`data: ${JSON.stringify({ type: "status", message: toolRounds === 0 ? "Thinking…" : "Analyzing…" })}\n\n`)
|
||||
|
||||
// Redact thinking content from previous assistant messages to prevent
|
||||
// context blowup. The API requires the block structure but allows
|
||||
// empty content. Each round's thinking can be 10-50KB.
|
||||
for (const msg of conversationMessages) {
|
||||
if (msg.role !== "assistant" || !Array.isArray(msg.content)) continue
|
||||
for (const block of msg.content) {
|
||||
if ((block as { type: string }).type === "thinking") {
|
||||
(block as { thinking: string }).thinking = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const messageStream = client.messages.stream(baseParams(systemPrompt, conversationMessages))
|
||||
const response = await messageStream.finalMessage()
|
||||
const timeout = setTimeout(() => messageStream.abort(), ROUND_TIMEOUT_MS)
|
||||
let response: Anthropic.Message
|
||||
try {
|
||||
response = await messageStream.finalMessage()
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
|
||||
if (response.stop_reason !== "tool_use") {
|
||||
// Final response — extract text blocks and send to client
|
||||
for (const block of response.content) {
|
||||
if (block.type === "text") {
|
||||
enqueue(`data: ${JSON.stringify({ type: "text", text: block.text })}\n\n`)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ export const TimelineChat = memo(function TimelineChat({
|
|||
const [isStreaming, setIsStreaming] = useState(false)
|
||||
const [completedRounds, setCompletedRounds] = useState<ToolStep[][]>([])
|
||||
const [activeSteps, setActiveSteps] = useState<ToolStep[]>([])
|
||||
const [statusMessage, setStatusMessage] = useState<string | null>(null)
|
||||
const messagesEndRef = useRef<HTMLDivElement>(null)
|
||||
const inputRef = useRef<HTMLTextAreaElement>(null)
|
||||
const abortRef = useRef<AbortController | null>(null)
|
||||
|
|
@ -44,7 +45,7 @@ export const TimelineChat = memo(function TimelineChat({
|
|||
|
||||
useEffect(() => {
|
||||
scrollToBottom()
|
||||
}, [messages, completedRounds, activeSteps, scrollToBottom])
|
||||
}, [messages, completedRounds, activeSteps, statusMessage, scrollToBottom])
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen) {
|
||||
|
|
@ -104,8 +105,14 @@ export const TimelineChat = memo(function TimelineChat({
|
|||
try {
|
||||
const parsed = JSON.parse(data)
|
||||
|
||||
if (parsed.type === "status") {
|
||||
setStatusMessage(parsed.message ?? null)
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle typed events (new protocol)
|
||||
if (parsed.type === "tool_start") {
|
||||
setStatusMessage(null)
|
||||
setActiveSteps((prev) => {
|
||||
// If all previous steps are done, this is a new round — commit previous steps
|
||||
if (prev.length > 0 && prev.every((s) => s.status === "done")) {
|
||||
|
|
@ -142,6 +149,7 @@ export const TimelineChat = memo(function TimelineChat({
|
|||
// Handle text — both new {type: "text", text} and old {text} formats
|
||||
const textContent = parsed.type === "text" ? parsed.text : parsed.text
|
||||
if (textContent) {
|
||||
setStatusMessage(null)
|
||||
setMessages((prev) => {
|
||||
const updated = [...prev]
|
||||
const last = updated[updated.length - 1]
|
||||
|
|
@ -185,6 +193,7 @@ export const TimelineChat = memo(function TimelineChat({
|
|||
}
|
||||
return []
|
||||
})
|
||||
setStatusMessage(null)
|
||||
setIsStreaming(false)
|
||||
abortRef.current = null
|
||||
}
|
||||
|
|
@ -271,6 +280,13 @@ export const TimelineChat = memo(function TimelineChat({
|
|||
|
||||
{activeSteps.length > 0 && <ToolRoundBubble steps={activeSteps} isActive />}
|
||||
|
||||
{statusMessage && (
|
||||
<div className="flex items-center gap-2 text-xs text-zinc-400 dark:text-zinc-500 px-2">
|
||||
<Loader2 className="h-3 w-3 animate-spin" />
|
||||
<span>{statusMessage}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div ref={messagesEndRef} />
|
||||
</div>
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue