mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
feat: add codebase browsing and LLM call inspection to observability chat
Give the observability chat agent four new tools: get_llm_call_detail (full prompt/response for any LLM call), read_file, search_code, and list_directory for navigating the codeflash-internal and codeflash CLI repos. This lets the agent trace problems end-to-end from trace data through actual prompts to pipeline source code. - Add id to IndexedTraceData.llmCalls so the agent can reference calls - Make resolveToolCall async (Prisma + fs + child_process) - Make processToolUseResponse async to match - Bump MAX_TOOL_ROUNDS from 5 to 15 for multi-step code browsing - Add CODEFLASH_INTERNAL_REPO_PATH / CODEFLASH_CLI_REPO_PATH env vars - Path traversal protection, file size caps, search result limits
This commit is contained in:
parent
eecd3ba4ce
commit
782ee508de
3 changed files with 334 additions and 11 deletions
|
|
@ -8,3 +8,5 @@ NPM_TOKEN
|
|||
SCM_DO_BUILD_DURING_DEPLOYMENT
|
||||
WEBSITE_HEALTHCHECK_MAXPINGFAILURES
|
||||
WEBSITE_HTTPLOGGING_RETENTION_DAYS
|
||||
CODEFLASH_INTERNAL_REPO_PATH=
|
||||
CODEFLASH_CLI_REPO_PATH=
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ interface ChatMessage {
|
|||
content: string
|
||||
}
|
||||
|
||||
const MAX_TOOL_ROUNDS = 5
|
||||
const MAX_TOOL_ROUNDS = 15
|
||||
const KEEPALIVE_INTERVAL_MS = 15_000
|
||||
|
||||
function getClient(): Anthropic {
|
||||
|
|
@ -26,14 +26,14 @@ function getClient(): Anthropic {
|
|||
return new Anthropic({ baseURL, apiKey })
|
||||
}
|
||||
|
||||
function processToolUseResponse(
|
||||
async function processToolUseResponse(
|
||||
response: Anthropic.Message,
|
||||
indexed: IndexedTraceData
|
||||
): Anthropic.ToolResultBlockParam[] {
|
||||
): Promise<Anthropic.ToolResultBlockParam[]> {
|
||||
const toolResults: Anthropic.ToolResultBlockParam[] = []
|
||||
for (const block of response.content) {
|
||||
if (block.type === "tool_use") {
|
||||
const result = resolveToolCall(
|
||||
const result = await resolveToolCall(
|
||||
block.name,
|
||||
(block.input as Record<string, unknown>) ?? {},
|
||||
indexed
|
||||
|
|
@ -112,7 +112,7 @@ export async function POST(request: NextRequest): Promise<Response> {
|
|||
}
|
||||
|
||||
conversationMessages.push({ role: "assistant", content: response.content })
|
||||
const toolResults = processToolUseResponse(response, indexed)
|
||||
const toolResults = await processToolUseResponse(response, indexed)
|
||||
conversationMessages.push({ role: "user", content: toolResults })
|
||||
toolRounds++
|
||||
}
|
||||
|
|
@ -134,7 +134,7 @@ export async function POST(request: NextRequest): Promise<Response> {
|
|||
|
||||
if (finalMessage.stop_reason === "tool_use") {
|
||||
conversationMessages.push({ role: "assistant", content: finalMessage.content })
|
||||
const toolResults = processToolUseResponse(finalMessage, indexed)
|
||||
const toolResults = await processToolUseResponse(finalMessage, indexed)
|
||||
conversationMessages.push({ role: "user", content: toolResults })
|
||||
|
||||
const followUpStream = client.messages.stream({
|
||||
|
|
|
|||
|
|
@ -1,3 +1,7 @@
|
|||
import path from "node:path"
|
||||
import { readdir, readFile, stat } from "node:fs/promises"
|
||||
import { execFile } from "node:child_process"
|
||||
import { prisma } from "@/lib/prisma"
|
||||
import type { TraceData } from "./get-trace-data"
|
||||
|
||||
export interface IndexedTraceData {
|
||||
|
|
@ -27,6 +31,7 @@ export interface IndexedTraceData {
|
|||
context: Record<string, unknown> | null
|
||||
}>
|
||||
llmCalls: Array<{
|
||||
id: string
|
||||
call_type: string | null
|
||||
model_name: string | null
|
||||
status: string
|
||||
|
|
@ -107,6 +112,7 @@ export function indexTraceData(traceData: TraceData): IndexedTraceData {
|
|||
context: e.context as Record<string, unknown> | null,
|
||||
})),
|
||||
llmCalls: rawLlmCalls.map((c) => ({
|
||||
id: c.id,
|
||||
call_type: c.call_type,
|
||||
model_name: c.model_name,
|
||||
status: c.status,
|
||||
|
|
@ -129,6 +135,36 @@ function findModelForTestGroup(testIndex: number, data: IndexedTraceData): strin
|
|||
return match?.model_name ?? null
|
||||
}
|
||||
|
||||
// --- Codebase browsing helpers ---
|
||||
|
||||
function getRepoRoot(repo: string): string | null {
|
||||
if (repo === "codeflash-internal") return process.env.CODEFLASH_INTERNAL_REPO_PATH || null
|
||||
if (repo === "codeflash") return process.env.CODEFLASH_CLI_REPO_PATH || null
|
||||
return null
|
||||
}
|
||||
|
||||
function resolveAndValidatePath(
|
||||
repoRoot: string,
|
||||
relativePath: string,
|
||||
): { resolved: string } | { error: string } {
|
||||
const normalized = path.normalize(relativePath)
|
||||
if (normalized.startsWith("..") || path.isAbsolute(normalized)) {
|
||||
return { error: "Path traversal is not allowed." }
|
||||
}
|
||||
const resolved = path.resolve(repoRoot, normalized)
|
||||
if (!resolved.startsWith(repoRoot)) {
|
||||
return { error: "Path traversal is not allowed." }
|
||||
}
|
||||
return { resolved }
|
||||
}
|
||||
|
||||
function truncate(s: string, max: number): string {
|
||||
if (s.length <= max) return s
|
||||
return s.slice(0, max) + `\n... [truncated at ${max} chars]`
|
||||
}
|
||||
|
||||
// --- System prompt ---
|
||||
|
||||
export function buildSummaryPrompt(data: IndexedTraceData): string {
|
||||
const lines: string[] = []
|
||||
|
||||
|
|
@ -141,7 +177,12 @@ export function buildSummaryPrompt(data: IndexedTraceData): string {
|
|||
"record everything that happens during an optimization run. When the user shares information, trust " +
|
||||
"it as ground truth even if the trace data doesn't confirm it. Never say 'the trace shows no errors' " +
|
||||
"to contradict what the user is reporting. Instead, use the trace data you DO have to help explain " +
|
||||
"and investigate what they're seeing."
|
||||
"and investigate what they're seeing.\n\n" +
|
||||
"You also have codebase browsing tools (read_file, search_code, list_directory) that let you " +
|
||||
"navigate the codeflash-internal and codeflash CLI source code, and a get_llm_call_detail tool " +
|
||||
"to inspect the full prompts and responses of any LLM call in this trace. Use these to trace " +
|
||||
"problems end-to-end: see what went wrong in the trace, read the actual prompts sent, then " +
|
||||
"navigate to the pipeline code to suggest a concrete fix.",
|
||||
)
|
||||
|
||||
lines.push("")
|
||||
|
|
@ -157,7 +198,7 @@ export function buildSummaryPrompt(data: IndexedTraceData): string {
|
|||
"Import the real classes and call their real constructors with real arguments. This ensures " +
|
||||
"attribute access, method calls, and object protocols behave identically to production.\n" +
|
||||
"- If many candidates fail or are unranked, check whether mock/fake objects in tests could be " +
|
||||
"the cause before blaming the optimizations."
|
||||
"the cause before blaming the optimizations.",
|
||||
)
|
||||
|
||||
lines.push("")
|
||||
|
|
@ -212,6 +253,15 @@ export function buildSummaryPrompt(data: IndexedTraceData): string {
|
|||
}
|
||||
}
|
||||
|
||||
if (data.llmCalls.length > 0) {
|
||||
lines.push("")
|
||||
lines.push("=== LLM CALL IDS (for get_llm_call_detail) ===")
|
||||
for (const c of data.llmCalls) {
|
||||
const cost = c.llm_cost != null ? ` $${c.llm_cost.toFixed(4)}` : ""
|
||||
lines.push(` - ${c.id}: ${c.call_type ?? "unknown"} (${c.model_name ?? "unknown"}, ${c.status}${cost})`)
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join("\n")
|
||||
}
|
||||
|
||||
|
|
@ -271,13 +321,106 @@ export const anthropicToolDefinitions = [
|
|||
"Returns all errors encountered during the optimization, including test failures and their context.",
|
||||
input_schema: { type: "object" as const, properties: {} },
|
||||
},
|
||||
{
|
||||
name: "get_llm_call_detail",
|
||||
description:
|
||||
"Returns the full prompt, response, and parsing results for a specific LLM call. " +
|
||||
"Use the call_id from the LLM call IDs listed in the trace overview.",
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
call_id: {
|
||||
type: "string",
|
||||
description: "UUID of the LLM call (from the trace overview).",
|
||||
},
|
||||
},
|
||||
required: ["call_id"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "read_file",
|
||||
description:
|
||||
"Read a file from the codeflash-internal or codeflash CLI repository. " +
|
||||
"Returns file content with line numbers. Use start_line/end_line for large files.",
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
repo: {
|
||||
type: "string",
|
||||
enum: ["codeflash-internal", "codeflash"],
|
||||
description: "Which repository to read from.",
|
||||
},
|
||||
path: {
|
||||
type: "string",
|
||||
description: "Relative path within the repo (e.g., 'django/aiservice/core/shared/optimizer_router.py').",
|
||||
},
|
||||
start_line: {
|
||||
type: "number",
|
||||
description: "1-based start line (default: 1).",
|
||||
},
|
||||
end_line: {
|
||||
type: "number",
|
||||
description: "1-based end line (default: start_line + 499).",
|
||||
},
|
||||
},
|
||||
required: ["repo", "path"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_code",
|
||||
description:
|
||||
"Search for a pattern across a repository using ripgrep. Returns matching lines with file paths and line numbers.",
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
repo: {
|
||||
type: "string",
|
||||
enum: ["codeflash-internal", "codeflash"],
|
||||
description: "Which repository to search.",
|
||||
},
|
||||
pattern: {
|
||||
type: "string",
|
||||
description: "Regex pattern to search for (ripgrep syntax).",
|
||||
},
|
||||
glob: {
|
||||
type: "string",
|
||||
description: "File glob filter (e.g., '*.py', '*.ts').",
|
||||
},
|
||||
max_results: {
|
||||
type: "number",
|
||||
description: "Maximum number of matching lines to return (default: 30, max: 100).",
|
||||
},
|
||||
},
|
||||
required: ["repo", "pattern"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "list_directory",
|
||||
description:
|
||||
"List files and directories in a repository path. Directories are listed first, sorted alphabetically.",
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
repo: {
|
||||
type: "string",
|
||||
enum: ["codeflash-internal", "codeflash"],
|
||||
description: "Which repository to browse.",
|
||||
},
|
||||
path: {
|
||||
type: "string",
|
||||
description: "Relative path within the repo (default: root).",
|
||||
},
|
||||
},
|
||||
required: ["repo"],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
export function resolveToolCall(
|
||||
export async function resolveToolCall(
|
||||
name: string,
|
||||
args: Record<string, unknown>,
|
||||
data: IndexedTraceData
|
||||
): string {
|
||||
data: IndexedTraceData,
|
||||
): Promise<string> {
|
||||
switch (name) {
|
||||
case "get_original_code":
|
||||
return data.originalCode ?? "No original code available."
|
||||
|
|
@ -344,6 +487,184 @@ export function resolveToolCall(
|
|||
return parts.join("\n\n")
|
||||
}
|
||||
|
||||
case "get_llm_call_detail": {
|
||||
const callId = args.call_id as string
|
||||
if (!data.llmCalls.some((c) => c.id === callId)) {
|
||||
return `LLM call ${callId} not found in this trace. Available IDs: ${data.llmCalls.map((c) => c.id).join(", ")}`
|
||||
}
|
||||
const row = await prisma.llm_calls.findUnique({
|
||||
where: { id: callId },
|
||||
select: {
|
||||
call_type: true,
|
||||
model_name: true,
|
||||
status: true,
|
||||
system_prompt: true,
|
||||
user_prompt: true,
|
||||
messages: true,
|
||||
raw_response: true,
|
||||
parsed_response: true,
|
||||
temperature: true,
|
||||
n_candidates: true,
|
||||
max_tokens: true,
|
||||
prompt_tokens: true,
|
||||
completion_tokens: true,
|
||||
total_tokens: true,
|
||||
llm_cost: true,
|
||||
latency_ms: true,
|
||||
parsing_status: true,
|
||||
candidates_generated: true,
|
||||
candidates_valid: true,
|
||||
parsing_errors: true,
|
||||
error_type: true,
|
||||
error_message: true,
|
||||
},
|
||||
})
|
||||
if (!row) return `LLM call ${callId} not found in database.`
|
||||
|
||||
const MAX_FIELD = 15_000
|
||||
const parts: string[] = []
|
||||
parts.push(`Call type: ${row.call_type}`)
|
||||
parts.push(`Model: ${row.model_name}`)
|
||||
parts.push(`Status: ${row.status}`)
|
||||
if (row.temperature != null) parts.push(`Temperature: ${row.temperature}`)
|
||||
if (row.n_candidates != null) parts.push(`N candidates requested: ${row.n_candidates}`)
|
||||
if (row.max_tokens != null) parts.push(`Max tokens: ${row.max_tokens}`)
|
||||
parts.push(`Tokens: ${row.prompt_tokens ?? "?"}p / ${row.completion_tokens ?? "?"}c / ${row.total_tokens ?? "?"}t`)
|
||||
if (row.llm_cost != null) parts.push(`Cost: $${row.llm_cost.toFixed(4)}`)
|
||||
if (row.latency_ms != null) parts.push(`Latency: ${row.latency_ms}ms`)
|
||||
|
||||
if (row.system_prompt) {
|
||||
parts.push(`\n--- System Prompt ---\n${truncate(row.system_prompt, MAX_FIELD)}`)
|
||||
}
|
||||
if (row.user_prompt) {
|
||||
parts.push(`\n--- User Prompt ---\n${truncate(row.user_prompt, MAX_FIELD)}`)
|
||||
}
|
||||
if (row.messages) {
|
||||
const messagesStr = JSON.stringify(row.messages, null, 2)
|
||||
parts.push(`\n--- Messages ---\n${truncate(messagesStr, MAX_FIELD)}`)
|
||||
}
|
||||
if (row.raw_response) {
|
||||
parts.push(`\n--- Raw Response ---\n${truncate(row.raw_response, MAX_FIELD)}`)
|
||||
}
|
||||
if (row.parsed_response) {
|
||||
const parsedStr = JSON.stringify(row.parsed_response, null, 2)
|
||||
parts.push(`\n--- Parsed Response ---\n${truncate(parsedStr, MAX_FIELD)}`)
|
||||
}
|
||||
|
||||
if (row.parsing_status) {
|
||||
parts.push(`\nParsing status: ${row.parsing_status}`)
|
||||
if (row.candidates_generated != null) parts.push(`Candidates generated: ${row.candidates_generated}`)
|
||||
if (row.candidates_valid != null) parts.push(`Candidates valid: ${row.candidates_valid}`)
|
||||
if (row.parsing_errors) {
|
||||
parts.push(`Parsing errors: ${truncate(JSON.stringify(row.parsing_errors, null, 2), 5000)}`)
|
||||
}
|
||||
}
|
||||
|
||||
if (row.error_type) parts.push(`\nError: [${row.error_type}] ${row.error_message ?? ""}`)
|
||||
|
||||
return parts.join("\n")
|
||||
}
|
||||
|
||||
case "read_file": {
|
||||
const repoRoot = getRepoRoot(args.repo as string)
|
||||
if (!repoRoot) return `Repository path not configured. Set CODEFLASH_INTERNAL_REPO_PATH or CODEFLASH_CLI_REPO_PATH env var.`
|
||||
|
||||
const pathResult = resolveAndValidatePath(repoRoot, args.path as string)
|
||||
if ("error" in pathResult) return pathResult.error
|
||||
|
||||
try {
|
||||
const fileStat = await stat(pathResult.resolved)
|
||||
if (!fileStat.isFile()) return `Not a file: ${args.path}`
|
||||
if (fileStat.size > 1_000_000) return `File too large (${fileStat.size} bytes). Max 1MB.`
|
||||
|
||||
const content = await readFile(pathResult.resolved, "utf-8")
|
||||
const allLines = content.split("\n")
|
||||
const startLine = Math.max(1, (args.start_line as number) || 1)
|
||||
const endLine = Math.min(allLines.length, (args.end_line as number) || startLine + 499)
|
||||
const slice = allLines.slice(startLine - 1, endLine)
|
||||
|
||||
const numbered = slice.map((line, i) => `${startLine + i}: ${line}`).join("\n")
|
||||
const header = `File: ${args.path} (lines ${startLine}-${endLine} of ${allLines.length})`
|
||||
return `${header}\n\n${numbered}`
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === "ENOENT") return `File not found: ${args.path}`
|
||||
return `Error reading file: ${(err as Error).message}`
|
||||
}
|
||||
}
|
||||
|
||||
case "search_code": {
|
||||
const repoRoot = getRepoRoot(args.repo as string)
|
||||
if (!repoRoot) return `Repository path not configured. Set CODEFLASH_INTERNAL_REPO_PATH or CODEFLASH_CLI_REPO_PATH env var.`
|
||||
|
||||
const maxResults = Math.min(Math.max(1, (args.max_results as number) || 30), 100)
|
||||
const rgArgs = [
|
||||
"--no-heading",
|
||||
"--line-number",
|
||||
"--color=never",
|
||||
"--max-count", String(maxResults),
|
||||
"--glob", "!.git",
|
||||
"--glob", "!node_modules",
|
||||
"--glob", "!__pycache__",
|
||||
"--glob", "!.next",
|
||||
"--glob", "!dist",
|
||||
]
|
||||
if (args.glob) {
|
||||
rgArgs.push("--glob", args.glob as string)
|
||||
}
|
||||
rgArgs.push(args.pattern as string, repoRoot)
|
||||
|
||||
return new Promise<string>((resolve) => {
|
||||
execFile("rg", rgArgs, { timeout: 10_000, maxBuffer: 1_000_000 }, (err, stdout, stderr) => {
|
||||
if (err && !stdout) {
|
||||
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
|
||||
resolve("ripgrep (rg) not found. Install it to use search_code.")
|
||||
return
|
||||
}
|
||||
if (stderr) {
|
||||
resolve(`Search error: ${stderr.slice(0, 500)}`)
|
||||
return
|
||||
}
|
||||
resolve("No matches found.")
|
||||
return
|
||||
}
|
||||
// Strip the repo root prefix from output paths for readability
|
||||
const cleaned = stdout.replace(new RegExp(repoRoot.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "/", "g"), "")
|
||||
resolve(cleaned.trim() || "No matches found.")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
case "list_directory": {
|
||||
const repoRoot = getRepoRoot(args.repo as string)
|
||||
if (!repoRoot) return `Repository path not configured. Set CODEFLASH_INTERNAL_REPO_PATH or CODEFLASH_CLI_REPO_PATH env var.`
|
||||
|
||||
const relativePath = (args.path as string) || "."
|
||||
const pathResult = resolveAndValidatePath(repoRoot, relativePath)
|
||||
if ("error" in pathResult) return pathResult.error
|
||||
|
||||
try {
|
||||
const entries = await readdir(pathResult.resolved, { withFileTypes: true })
|
||||
const dirs: string[] = []
|
||||
const files: string[] = []
|
||||
for (const entry of entries) {
|
||||
if (entry.name === ".git") continue
|
||||
if (entry.isDirectory()) dirs.push(entry.name + "/")
|
||||
else files.push(entry.name)
|
||||
}
|
||||
dirs.sort()
|
||||
files.sort()
|
||||
const all = [...dirs, ...files]
|
||||
if (all.length === 0) return `Empty directory: ${relativePath}`
|
||||
const capped = all.slice(0, 200)
|
||||
const suffix = all.length > 200 ? `\n... and ${all.length - 200} more entries` : ""
|
||||
return `Directory: ${relativePath}\n\n${capped.join("\n")}${suffix}`
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === "ENOENT") return `Directory not found: ${relativePath}`
|
||||
if ((err as NodeJS.ErrnoException).code === "ENOTDIR") return `Not a directory: ${relativePath}`
|
||||
return `Error listing directory: ${(err as Error).message}`
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return `Unknown tool: ${name}`
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue