perf: use Haiku model for testgen repair (#2471)

## Summary
- Switch testgen repair endpoint from `EXECUTE_MODEL` (GPT-5-Mini) to
`HAIKU_MODEL` (Haiku 4.5)
- Matches the review endpoint which already uses Haiku
- Repair is a structured task (splice functions, fix assertions) that
doesn't need a frontier model
- Should reduce latency (was timing out at 90s in CI) and cost
This commit is contained in:
Kevin Turcios 2026-03-06 07:10:44 +00:00 committed by GitHub
parent 8d1dfd9bdb
commit 4edd183d82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -25,7 +25,7 @@ from openai.types.chat import (
from aiservice.analytics.posthog import ph
from aiservice.common.markdown_utils import extract_code_block_with_context
from aiservice.llm import EXECUTE_MODEL, calculate_llm_cost, call_llm
from aiservice.llm import HAIKU_MODEL, calculate_llm_cost, call_llm
from authapp.auth import AuthenticatedRequest
from core.shared.testgen_review.models import TestRepairErrorSchema, TestRepairResponseSchema, TestRepairSchema
@ -65,7 +65,7 @@ async def testgen_repair(
obs_context["call_sequence"] = data.call_sequence
response = await call_llm(
llm=EXECUTE_MODEL,
llm=HAIKU_MODEL,
messages=messages,
call_type="testgen_repair",
trace_id=data.trace_id,
@ -73,7 +73,7 @@ async def testgen_repair(
context=obs_context,
)
cost = calculate_llm_cost(response.raw_response, EXECUTE_MODEL)
cost = calculate_llm_cost(response.raw_response, HAIKU_MODEL)
logging.debug(f"testgen_repair LLM cost: {cost}")
repair_text = response.content.strip()