feat: add codebase browsing and LLM call inspection to observability chat

Give the observability chat agent four new tools: get_llm_call_detail
(full prompt/response for any LLM call), read_file, search_code, and
list_directory for navigating the codeflash-internal and codeflash CLI
repos. This lets the agent trace problems end-to-end from trace data
through actual prompts to pipeline source code.

- Add id to IndexedTraceData.llmCalls so the agent can reference calls
- Make resolveToolCall async (Prisma + fs + child_process)
- Make processToolUseResponse async to match
- Bump MAX_TOOL_ROUNDS from 5 to 15 for multi-step code browsing
- Add CODEFLASH_INTERNAL_REPO_PATH / CODEFLASH_CLI_REPO_PATH env vars
- Path traversal protection, file size caps, search result limits
This commit is contained in:
Kevin Turcios 2026-02-15 00:03:25 -05:00
parent eecd3ba4ce
commit 782ee508de
3 changed files with 334 additions and 11 deletions

View file

@ -8,3 +8,5 @@ NPM_TOKEN
SCM_DO_BUILD_DURING_DEPLOYMENT
WEBSITE_HEALTHCHECK_MAXPINGFAILURES
WEBSITE_HTTPLOGGING_RETENTION_DAYS
CODEFLASH_INTERNAL_REPO_PATH=
CODEFLASH_CLI_REPO_PATH=

View file

@ -14,7 +14,7 @@ interface ChatMessage {
content: string
}
const MAX_TOOL_ROUNDS = 5
const MAX_TOOL_ROUNDS = 15
const KEEPALIVE_INTERVAL_MS = 15_000
function getClient(): Anthropic {
@ -26,14 +26,14 @@ function getClient(): Anthropic {
return new Anthropic({ baseURL, apiKey })
}
function processToolUseResponse(
async function processToolUseResponse(
response: Anthropic.Message,
indexed: IndexedTraceData
): Anthropic.ToolResultBlockParam[] {
): Promise<Anthropic.ToolResultBlockParam[]> {
const toolResults: Anthropic.ToolResultBlockParam[] = []
for (const block of response.content) {
if (block.type === "tool_use") {
const result = resolveToolCall(
const result = await resolveToolCall(
block.name,
(block.input as Record<string, unknown>) ?? {},
indexed
@ -112,7 +112,7 @@ export async function POST(request: NextRequest): Promise<Response> {
}
conversationMessages.push({ role: "assistant", content: response.content })
const toolResults = processToolUseResponse(response, indexed)
const toolResults = await processToolUseResponse(response, indexed)
conversationMessages.push({ role: "user", content: toolResults })
toolRounds++
}
@ -134,7 +134,7 @@ export async function POST(request: NextRequest): Promise<Response> {
if (finalMessage.stop_reason === "tool_use") {
conversationMessages.push({ role: "assistant", content: finalMessage.content })
const toolResults = processToolUseResponse(finalMessage, indexed)
const toolResults = await processToolUseResponse(finalMessage, indexed)
conversationMessages.push({ role: "user", content: toolResults })
const followUpStream = client.messages.stream({

View file

@ -1,3 +1,7 @@
import path from "node:path"
import { readdir, readFile, stat } from "node:fs/promises"
import { execFile } from "node:child_process"
import { prisma } from "@/lib/prisma"
import type { TraceData } from "./get-trace-data"
export interface IndexedTraceData {
@ -27,6 +31,7 @@ export interface IndexedTraceData {
context: Record<string, unknown> | null
}>
llmCalls: Array<{
id: string
call_type: string | null
model_name: string | null
status: string
@ -107,6 +112,7 @@ export function indexTraceData(traceData: TraceData): IndexedTraceData {
context: e.context as Record<string, unknown> | null,
})),
llmCalls: rawLlmCalls.map((c) => ({
id: c.id,
call_type: c.call_type,
model_name: c.model_name,
status: c.status,
@ -129,6 +135,36 @@ function findModelForTestGroup(testIndex: number, data: IndexedTraceData): strin
return match?.model_name ?? null
}
// --- Codebase browsing helpers ---
function getRepoRoot(repo: string): string | null {
if (repo === "codeflash-internal") return process.env.CODEFLASH_INTERNAL_REPO_PATH || null
if (repo === "codeflash") return process.env.CODEFLASH_CLI_REPO_PATH || null
return null
}
function resolveAndValidatePath(
repoRoot: string,
relativePath: string,
): { resolved: string } | { error: string } {
const normalized = path.normalize(relativePath)
if (normalized.startsWith("..") || path.isAbsolute(normalized)) {
return { error: "Path traversal is not allowed." }
}
const resolved = path.resolve(repoRoot, normalized)
if (!resolved.startsWith(repoRoot)) {
return { error: "Path traversal is not allowed." }
}
return { resolved }
}
function truncate(s: string, max: number): string {
if (s.length <= max) return s
return s.slice(0, max) + `\n... [truncated at ${max} chars]`
}
// --- System prompt ---
export function buildSummaryPrompt(data: IndexedTraceData): string {
const lines: string[] = []
@ -141,7 +177,12 @@ export function buildSummaryPrompt(data: IndexedTraceData): string {
"record everything that happens during an optimization run. When the user shares information, trust " +
"it as ground truth even if the trace data doesn't confirm it. Never say 'the trace shows no errors' " +
"to contradict what the user is reporting. Instead, use the trace data you DO have to help explain " +
"and investigate what they're seeing."
"and investigate what they're seeing.\n\n" +
"You also have codebase browsing tools (read_file, search_code, list_directory) that let you " +
"navigate the codeflash-internal and codeflash CLI source code, and a get_llm_call_detail tool " +
"to inspect the full prompts and responses of any LLM call in this trace. Use these to trace " +
"problems end-to-end: see what went wrong in the trace, read the actual prompts sent, then " +
"navigate to the pipeline code to suggest a concrete fix.",
)
lines.push("")
@ -157,7 +198,7 @@ export function buildSummaryPrompt(data: IndexedTraceData): string {
"Import the real classes and call their real constructors with real arguments. This ensures " +
"attribute access, method calls, and object protocols behave identically to production.\n" +
"- If many candidates fail or are unranked, check whether mock/fake objects in tests could be " +
"the cause before blaming the optimizations."
"the cause before blaming the optimizations.",
)
lines.push("")
@ -212,6 +253,15 @@ export function buildSummaryPrompt(data: IndexedTraceData): string {
}
}
if (data.llmCalls.length > 0) {
lines.push("")
lines.push("=== LLM CALL IDS (for get_llm_call_detail) ===")
for (const c of data.llmCalls) {
const cost = c.llm_cost != null ? ` $${c.llm_cost.toFixed(4)}` : ""
lines.push(` - ${c.id}: ${c.call_type ?? "unknown"} (${c.model_name ?? "unknown"}, ${c.status}${cost})`)
}
}
return lines.join("\n")
}
@ -271,13 +321,106 @@ export const anthropicToolDefinitions = [
"Returns all errors encountered during the optimization, including test failures and their context.",
input_schema: { type: "object" as const, properties: {} },
},
{
name: "get_llm_call_detail",
description:
"Returns the full prompt, response, and parsing results for a specific LLM call. " +
"Use the call_id from the LLM call IDs listed in the trace overview.",
input_schema: {
type: "object" as const,
properties: {
call_id: {
type: "string",
description: "UUID of the LLM call (from the trace overview).",
},
},
required: ["call_id"],
},
},
{
name: "read_file",
description:
"Read a file from the codeflash-internal or codeflash CLI repository. " +
"Returns file content with line numbers. Use start_line/end_line for large files.",
input_schema: {
type: "object" as const,
properties: {
repo: {
type: "string",
enum: ["codeflash-internal", "codeflash"],
description: "Which repository to read from.",
},
path: {
type: "string",
description: "Relative path within the repo (e.g., 'django/aiservice/core/shared/optimizer_router.py').",
},
start_line: {
type: "number",
description: "1-based start line (default: 1).",
},
end_line: {
type: "number",
description: "1-based end line (default: start_line + 499).",
},
},
required: ["repo", "path"],
},
},
{
name: "search_code",
description:
"Search for a pattern across a repository using ripgrep. Returns matching lines with file paths and line numbers.",
input_schema: {
type: "object" as const,
properties: {
repo: {
type: "string",
enum: ["codeflash-internal", "codeflash"],
description: "Which repository to search.",
},
pattern: {
type: "string",
description: "Regex pattern to search for (ripgrep syntax).",
},
glob: {
type: "string",
description: "File glob filter (e.g., '*.py', '*.ts').",
},
max_results: {
type: "number",
description: "Maximum number of matching lines to return (default: 30, max: 100).",
},
},
required: ["repo", "pattern"],
},
},
{
name: "list_directory",
description:
"List files and directories in a repository path. Directories are listed first, sorted alphabetically.",
input_schema: {
type: "object" as const,
properties: {
repo: {
type: "string",
enum: ["codeflash-internal", "codeflash"],
description: "Which repository to browse.",
},
path: {
type: "string",
description: "Relative path within the repo (default: root).",
},
},
required: ["repo"],
},
},
]
export function resolveToolCall(
export async function resolveToolCall(
name: string,
args: Record<string, unknown>,
data: IndexedTraceData
): string {
data: IndexedTraceData,
): Promise<string> {
switch (name) {
case "get_original_code":
return data.originalCode ?? "No original code available."
@ -344,6 +487,184 @@ export function resolveToolCall(
return parts.join("\n\n")
}
case "get_llm_call_detail": {
const callId = args.call_id as string
if (!data.llmCalls.some((c) => c.id === callId)) {
return `LLM call ${callId} not found in this trace. Available IDs: ${data.llmCalls.map((c) => c.id).join(", ")}`
}
const row = await prisma.llm_calls.findUnique({
where: { id: callId },
select: {
call_type: true,
model_name: true,
status: true,
system_prompt: true,
user_prompt: true,
messages: true,
raw_response: true,
parsed_response: true,
temperature: true,
n_candidates: true,
max_tokens: true,
prompt_tokens: true,
completion_tokens: true,
total_tokens: true,
llm_cost: true,
latency_ms: true,
parsing_status: true,
candidates_generated: true,
candidates_valid: true,
parsing_errors: true,
error_type: true,
error_message: true,
},
})
if (!row) return `LLM call ${callId} not found in database.`
const MAX_FIELD = 15_000
const parts: string[] = []
parts.push(`Call type: ${row.call_type}`)
parts.push(`Model: ${row.model_name}`)
parts.push(`Status: ${row.status}`)
if (row.temperature != null) parts.push(`Temperature: ${row.temperature}`)
if (row.n_candidates != null) parts.push(`N candidates requested: ${row.n_candidates}`)
if (row.max_tokens != null) parts.push(`Max tokens: ${row.max_tokens}`)
parts.push(`Tokens: ${row.prompt_tokens ?? "?"}p / ${row.completion_tokens ?? "?"}c / ${row.total_tokens ?? "?"}t`)
if (row.llm_cost != null) parts.push(`Cost: $${row.llm_cost.toFixed(4)}`)
if (row.latency_ms != null) parts.push(`Latency: ${row.latency_ms}ms`)
if (row.system_prompt) {
parts.push(`\n--- System Prompt ---\n${truncate(row.system_prompt, MAX_FIELD)}`)
}
if (row.user_prompt) {
parts.push(`\n--- User Prompt ---\n${truncate(row.user_prompt, MAX_FIELD)}`)
}
if (row.messages) {
const messagesStr = JSON.stringify(row.messages, null, 2)
parts.push(`\n--- Messages ---\n${truncate(messagesStr, MAX_FIELD)}`)
}
if (row.raw_response) {
parts.push(`\n--- Raw Response ---\n${truncate(row.raw_response, MAX_FIELD)}`)
}
if (row.parsed_response) {
const parsedStr = JSON.stringify(row.parsed_response, null, 2)
parts.push(`\n--- Parsed Response ---\n${truncate(parsedStr, MAX_FIELD)}`)
}
if (row.parsing_status) {
parts.push(`\nParsing status: ${row.parsing_status}`)
if (row.candidates_generated != null) parts.push(`Candidates generated: ${row.candidates_generated}`)
if (row.candidates_valid != null) parts.push(`Candidates valid: ${row.candidates_valid}`)
if (row.parsing_errors) {
parts.push(`Parsing errors: ${truncate(JSON.stringify(row.parsing_errors, null, 2), 5000)}`)
}
}
if (row.error_type) parts.push(`\nError: [${row.error_type}] ${row.error_message ?? ""}`)
return parts.join("\n")
}
case "read_file": {
const repoRoot = getRepoRoot(args.repo as string)
if (!repoRoot) return `Repository path not configured. Set CODEFLASH_INTERNAL_REPO_PATH or CODEFLASH_CLI_REPO_PATH env var.`
const pathResult = resolveAndValidatePath(repoRoot, args.path as string)
if ("error" in pathResult) return pathResult.error
try {
const fileStat = await stat(pathResult.resolved)
if (!fileStat.isFile()) return `Not a file: ${args.path}`
if (fileStat.size > 1_000_000) return `File too large (${fileStat.size} bytes). Max 1MB.`
const content = await readFile(pathResult.resolved, "utf-8")
const allLines = content.split("\n")
const startLine = Math.max(1, (args.start_line as number) || 1)
const endLine = Math.min(allLines.length, (args.end_line as number) || startLine + 499)
const slice = allLines.slice(startLine - 1, endLine)
const numbered = slice.map((line, i) => `${startLine + i}: ${line}`).join("\n")
const header = `File: ${args.path} (lines ${startLine}-${endLine} of ${allLines.length})`
return `${header}\n\n${numbered}`
} catch (err) {
if ((err as NodeJS.ErrnoException).code === "ENOENT") return `File not found: ${args.path}`
return `Error reading file: ${(err as Error).message}`
}
}
case "search_code": {
const repoRoot = getRepoRoot(args.repo as string)
if (!repoRoot) return `Repository path not configured. Set CODEFLASH_INTERNAL_REPO_PATH or CODEFLASH_CLI_REPO_PATH env var.`
const maxResults = Math.min(Math.max(1, (args.max_results as number) || 30), 100)
const rgArgs = [
"--no-heading",
"--line-number",
"--color=never",
"--max-count", String(maxResults),
"--glob", "!.git",
"--glob", "!node_modules",
"--glob", "!__pycache__",
"--glob", "!.next",
"--glob", "!dist",
]
if (args.glob) {
rgArgs.push("--glob", args.glob as string)
}
rgArgs.push(args.pattern as string, repoRoot)
return new Promise<string>((resolve) => {
execFile("rg", rgArgs, { timeout: 10_000, maxBuffer: 1_000_000 }, (err, stdout, stderr) => {
if (err && !stdout) {
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
resolve("ripgrep (rg) not found. Install it to use search_code.")
return
}
if (stderr) {
resolve(`Search error: ${stderr.slice(0, 500)}`)
return
}
resolve("No matches found.")
return
}
// Strip the repo root prefix from output paths for readability
const cleaned = stdout.replace(new RegExp(repoRoot.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/", "g"), "")
resolve(cleaned.trim() || "No matches found.")
})
})
}
case "list_directory": {
const repoRoot = getRepoRoot(args.repo as string)
if (!repoRoot) return `Repository path not configured. Set CODEFLASH_INTERNAL_REPO_PATH or CODEFLASH_CLI_REPO_PATH env var.`
const relativePath = (args.path as string) || "."
const pathResult = resolveAndValidatePath(repoRoot, relativePath)
if ("error" in pathResult) return pathResult.error
try {
const entries = await readdir(pathResult.resolved, { withFileTypes: true })
const dirs: string[] = []
const files: string[] = []
for (const entry of entries) {
if (entry.name === ".git") continue
if (entry.isDirectory()) dirs.push(entry.name + "/")
else files.push(entry.name)
}
dirs.sort()
files.sort()
const all = [...dirs, ...files]
if (all.length === 0) return `Empty directory: ${relativePath}`
const capped = all.slice(0, 200)
const suffix = all.length > 200 ? `\n... and ${all.length - 200} more entries` : ""
return `Directory: ${relativePath}\n\n${capped.join("\n")}${suffix}`
} catch (err) {
if ((err as NodeJS.ErrnoException).code === "ENOENT") return `Directory not found: ${relativePath}`
if ((err as NodeJS.ErrnoException).code === "ENOTDIR") return `Not a directory: ${relativePath}`
return `Error listing directory: ${(err as Error).message}`
}
}
default:
return `Unknown tool: ${name}`
}