mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
26 lines
1.6 KiB
JSON
26 lines
1.6 KiB
JSON
{
|
|
"context": "Tests whether the agent understands the LLM provider abstraction (dataclass, client setup, provider-specific handling), observability patterns, and test generation framework detection from the llm-provider-abstraction and test-generation-pipeline docs.",
|
|
"type": "weighted_checklist",
|
|
"checklist": [
|
|
{
|
|
"name": "LLM dataclass and model instances",
|
|
"description": "LLM is a pydantic_dataclass with all 6 fields (name, max_tokens, model_type as Literal, input_cost, cached_input_cost, output_cost). Concrete instances use correct pricing from the docs.",
|
|
"max_score": 20
|
|
},
|
|
{
|
|
"name": "Provider-specific call handling",
|
|
"description": "call_llm dispatches by model_type. OpenAI uses client.chat.completions.create() with max_completion_tokens for GPT-5-mini and max_tokens for older models. Anthropic extracts system prompt from messages and passes via system= kwarg, and concatenates text blocks from the response.",
|
|
"max_score": 30
|
|
},
|
|
{
|
|
"name": "Observability in finally block",
|
|
"description": "record_llm_call() is called in a finally block (not just after success), ensuring every LLM call is recorded to the database regardless of success or failure. Includes trace_id, call_type, model, cost, and latency.",
|
|
"max_score": 25
|
|
},
|
|
{
|
|
"name": "Framework detection",
|
|
"description": "detect_frameworks_from_code parses import statements to identify PyTorch, TensorFlow, and JAX. Handles both 'import torch' and aliased imports like 'import tensorflow as tf'.",
|
|
"max_score": 25
|
|
}
|
|
]
|
|
}
|