- Optimize codeflash-optimize SKILL.md (review score 17% → 98%, eval 87% → 100%) - Fix frontmatter (allowed-tools format, argument-hint under metadata) - Lead description with concrete actions, explicit agent launch parameters - Add multi-run variance detection to eval system (--runs N flag) - score.py aggregate command: min/max/avg/stddev per criterion, flaky detection - check-regression.sh defaults to 3 runs for reliable regression detection - Add per-criterion regression tracking to baseline-scores.json (v3) - Reports exactly which criteria regressed, not just total score drops - Rename evals/ → codeflash-evals/ to avoid tessl directory conflicts - Switch tessl to managed mode, gitignore vendored tiles and symlinks
60 lines
2.6 KiB
Python
60 lines
2.6 KiB
Python
import pytest
|
|
from orders.core import process_orders
|
|
|
|
|
|
def test_basic():
|
|
raw = [
|
|
{"id": 1, "customer": "Alice", "product": "Widget", "category": "tools",
|
|
"quantity": 2, "price": 25.0, "shipping_address": "123 Main St",
|
|
"metadata": {"payment_method": "credit_card"}},
|
|
{"id": 2, "customer": "Bob", "product": "Gadget", "category": "tools",
|
|
"quantity": 1, "price": 50.0, "shipping_address": "456 Oak Ave",
|
|
"metadata": {"payment_method": "paypal"}},
|
|
{"id": 3, "customer": "Carol", "product": "Gizmo", "category": "electronics",
|
|
"quantity": 5, "price": 10.0, "shipping_address": "789 Elm Dr",
|
|
"metadata": {"payment_method": "debit"}},
|
|
]
|
|
result = process_orders(raw)
|
|
assert len(result["summary"]) == 2
|
|
assert result["summary"]["tools"]["count"] == 2
|
|
assert result["summary"]["electronics"]["count"] == 1
|
|
assert result["plan_size"] == 3
|
|
|
|
|
|
def test_large_batch():
|
|
"""Production-scale batch — process_orders uses too much memory.
|
|
|
|
With 50k orders, peak memory is far higher than the input data size.
|
|
The goal is to reduce memory usage while preserving correctness.
|
|
"""
|
|
categories = [f"cat-{i}" for i in range(25)]
|
|
products = [f"product-{i}" for i in range(40)]
|
|
customers = [f"customer-{i}" for i in range(1000)]
|
|
|
|
raw = []
|
|
for i in range(50_000):
|
|
raw.append(
|
|
{
|
|
"id": i,
|
|
"customer": customers[i % len(customers)],
|
|
"product": products[i % len(products)],
|
|
"category": categories[i % len(categories)],
|
|
"quantity": 1 + (i % 100),
|
|
"price": round(10.0 + (i % 500) * 0.5, 2),
|
|
"shipping_address": f"{100 + i % 999} Main St, City-{i % 50}, ST {10000 + i % 90000}",
|
|
"metadata": {
|
|
"payment_method": ["credit_card", "debit", "paypal", "bank_transfer"][i % 4],
|
|
"order_source": ["web", "mobile", "api", "in_store"][i % 4],
|
|
"loyalty_tier": ["bronze", "silver", "gold"][i % 3],
|
|
"promo_code": f"PROMO-{i % 20:03d}" if i % 5 == 0 else None,
|
|
"gift_wrap": i % 7 == 0,
|
|
"notes": f"Order note #{i}" if i % 10 == 0 else "",
|
|
"tracking": {"email": True, "sms": i % 3 == 0},
|
|
},
|
|
}
|
|
)
|
|
result = process_orders(raw)
|
|
assert len(result["summary"]) == 25
|
|
assert all(s["count"] == 2000 for s in result["summary"].values())
|
|
assert all(s["total_revenue"] > 0 for s in result["summary"].values())
|
|
assert result["plan_size"] == 50_000
|