codeflash-agent/codeflash-evals/templates/memory-balanced/tests/test_orders.py
Kevin Turcios 37efa524d7 feat: improve skill, eval system, and tessl config
- Optimize codeflash-optimize SKILL.md (review score 17% → 98%, eval 87% → 100%)
  - Fix frontmatter (allowed-tools format, argument-hint under metadata)
  - Lead description with concrete actions, explicit agent launch parameters
- Add multi-run variance detection to eval system (--runs N flag)
  - score.py aggregate command: min/max/avg/stddev per criterion, flaky detection
  - check-regression.sh defaults to 3 runs for reliable regression detection
- Add per-criterion regression tracking to baseline-scores.json (v3)
  - Reports exactly which criteria regressed, not just total score drops
- Rename evals/ → codeflash-evals/ to avoid tessl directory conflicts
- Switch tessl to managed mode, gitignore vendored tiles and symlinks
2026-03-27 11:30:17 -05:00

60 lines
2.6 KiB
Python

import pytest
from orders.core import process_orders
def test_basic():
raw = [
{"id": 1, "customer": "Alice", "product": "Widget", "category": "tools",
"quantity": 2, "price": 25.0, "shipping_address": "123 Main St",
"metadata": {"payment_method": "credit_card"}},
{"id": 2, "customer": "Bob", "product": "Gadget", "category": "tools",
"quantity": 1, "price": 50.0, "shipping_address": "456 Oak Ave",
"metadata": {"payment_method": "paypal"}},
{"id": 3, "customer": "Carol", "product": "Gizmo", "category": "electronics",
"quantity": 5, "price": 10.0, "shipping_address": "789 Elm Dr",
"metadata": {"payment_method": "debit"}},
]
result = process_orders(raw)
assert len(result["summary"]) == 2
assert result["summary"]["tools"]["count"] == 2
assert result["summary"]["electronics"]["count"] == 1
assert result["plan_size"] == 3
def test_large_batch():
"""Production-scale batch — process_orders uses too much memory.
With 50k orders, peak memory is far higher than the input data size.
The goal is to reduce memory usage while preserving correctness.
"""
categories = [f"cat-{i}" for i in range(25)]
products = [f"product-{i}" for i in range(40)]
customers = [f"customer-{i}" for i in range(1000)]
raw = []
for i in range(50_000):
raw.append(
{
"id": i,
"customer": customers[i % len(customers)],
"product": products[i % len(products)],
"category": categories[i % len(categories)],
"quantity": 1 + (i % 100),
"price": round(10.0 + (i % 500) * 0.5, 2),
"shipping_address": f"{100 + i % 999} Main St, City-{i % 50}, ST {10000 + i % 90000}",
"metadata": {
"payment_method": ["credit_card", "debit", "paypal", "bank_transfer"][i % 4],
"order_source": ["web", "mobile", "api", "in_store"][i % 4],
"loyalty_tier": ["bronze", "silver", "gold"][i % 3],
"promo_code": f"PROMO-{i % 20:03d}" if i % 5 == 0 else None,
"gift_wrap": i % 7 == 0,
"notes": f"Order note #{i}" if i % 10 == 0 else "",
"tracking": {"email": True, "sms": i % 3 == 0},
},
}
)
result = process_orders(raw)
assert len(result["summary"]) == 25
assert all(s["count"] == 2000 for s in result["summary"].values())
assert all(s["total_revenue"] > 0 for s in result["summary"].values())
assert result["plan_size"] == 50_000