codeflash-agent/evals/templates/crossdomain-hard/tests/test_formatter.py

from pipeline.formatter import format_results


def test_format_basic():
    schema = {"version": "1.0", "env": "prod"}
    records = [
        {"id": 1, "category": "web", "value": 42},
        {"id": 2, "category": "api", "value": 10},
    ]
    result = format_results(records, schema)
    assert len(result) == 2


def test_format_large_batch():
    """Large formatting job — too slow for 50k records."""
    schema = {
        "version": "2.0",
        "env": "production",
        "metadata": {
            "pipeline": "v3",
            "config": {
                "retry": {"count": 3, "backoff": [1, 2, 4, 8, 16]},
                "timeouts": {"connect": 5, "read": 30, "write": 10},
                "features": ["dedup", "enrich", "validate", "compress"],
            },
        },
        "tags": ["processed", "validated", "v2"],
        "audit": {
            "created_by": "pipeline-worker",
            "approved": True,
            "reviewers": ["system", "auto-qa"],
        },
    }
    records = []
    for i in range(50_000):
        records.append(
            {
                "id": i,
                "category": f"cat-{i % 100}",
                "value": float(i % 1000),
                "timestamp": f"2024-01-{(i % 28) + 1:02d}T{i % 24:02d}:00:00Z",
                "source": f"source-{i % 50}",
            }
        )
    result = format_results(records, schema)
    assert len(result) == 50_000