perf: use Haiku model for testgen repair (#2471)
## Summary - Switch testgen repair endpoint from `EXECUTE_MODEL` (GPT-5-Mini) to `HAIKU_MODEL` (Haiku 4.5) - Matches the review endpoint which already uses Haiku - Repair is a structured task (splice functions, fix assertions) that doesn't need a frontier model - Should reduce latency (was timing out at 90s in CI) and cost
This commit is contained in:
parent
8d1dfd9bdb
commit
4edd183d82
1 changed files with 3 additions and 3 deletions
|
|
@ -25,7 +25,7 @@ from openai.types.chat import (
|
|||
|
||||
from aiservice.analytics.posthog import ph
|
||||
from aiservice.common.markdown_utils import extract_code_block_with_context
|
||||
from aiservice.llm import EXECUTE_MODEL, calculate_llm_cost, call_llm
|
||||
from aiservice.llm import HAIKU_MODEL, calculate_llm_cost, call_llm
|
||||
from authapp.auth import AuthenticatedRequest
|
||||
from core.shared.testgen_review.models import TestRepairErrorSchema, TestRepairResponseSchema, TestRepairSchema
|
||||
|
||||
|
|
@ -65,7 +65,7 @@ async def testgen_repair(
|
|||
obs_context["call_sequence"] = data.call_sequence
|
||||
|
||||
response = await call_llm(
|
||||
llm=EXECUTE_MODEL,
|
||||
llm=HAIKU_MODEL,
|
||||
messages=messages,
|
||||
call_type="testgen_repair",
|
||||
trace_id=data.trace_id,
|
||||
|
|
@ -73,7 +73,7 @@ async def testgen_repair(
|
|||
context=obs_context,
|
||||
)
|
||||
|
||||
cost = calculate_llm_cost(response.raw_response, EXECUTE_MODEL)
|
||||
cost = calculate_llm_cost(response.raw_response, HAIKU_MODEL)
|
||||
logging.debug(f"testgen_repair LLM cost: {cost}")
|
||||
|
||||
repair_text = response.content.strip()
|
||||
|
|
|
|||
Loading…
Reference in a new issue