47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
import pytest
|
|
from pipeline.formatter import format_results
|
|
|
|
|
|
def test_format_basic():
|
|
schema = {"version": "1.0", "env": "prod"}
|
|
records = [
|
|
{"id": 1, "category": "web", "value": 42},
|
|
{"id": 2, "category": "api", "value": 10},
|
|
]
|
|
result = format_results(records, schema)
|
|
assert len(result) == 2
|
|
|
|
|
|
def test_format_large_batch():
|
|
"""Large formatting job — too slow for 50k records."""
|
|
schema = {
|
|
"version": "2.0",
|
|
"env": "production",
|
|
"metadata": {
|
|
"pipeline": "v3",
|
|
"config": {
|
|
"retry": {"count": 3, "backoff": [1, 2, 4, 8, 16]},
|
|
"timeouts": {"connect": 5, "read": 30, "write": 10},
|
|
"features": ["dedup", "enrich", "validate", "compress"],
|
|
},
|
|
},
|
|
"tags": ["processed", "validated", "v2"],
|
|
"audit": {
|
|
"created_by": "pipeline-worker",
|
|
"approved": True,
|
|
"reviewers": ["system", "auto-qa"],
|
|
},
|
|
}
|
|
records = []
|
|
for i in range(50_000):
|
|
records.append(
|
|
{
|
|
"id": i,
|
|
"category": f"cat-{i % 100}",
|
|
"value": float(i % 1000),
|
|
"timestamp": f"2024-01-{(i % 28) + 1:02d}T{i % 24:02d}:00:00Z",
|
|
"source": f"source-{i % 50}",
|
|
}
|
|
)
|
|
result = format_results(records, schema)
|
|
assert len(result) == 50_000
|