33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
import pytest
|
|
from pipeline.aggregator import aggregate_by_category
|
|
|
|
|
|
def test_aggregate_basic():
|
|
events = [
|
|
{"category": "web", "source": "s1", "value": 10.0},
|
|
{"category": "web", "source": "s2", "value": 20.0},
|
|
{"category": "api", "source": "s1", "value": 5.0},
|
|
{"category": "api", "source": "s1", "value": 15.0},
|
|
]
|
|
result = aggregate_by_category(events)
|
|
assert len(result) == 2
|
|
assert result["web"]["count"] == 2
|
|
assert result["api"]["count"] == 2
|
|
|
|
|
|
def test_aggregate_large_batch():
|
|
"""Large event batch — uses too much memory for production volumes."""
|
|
categories = [f"cat-{i}" for i in range(500)]
|
|
sources = [f"source-{i}" for i in range(50)]
|
|
events = []
|
|
for i in range(200_000):
|
|
events.append(
|
|
{
|
|
"category": categories[i % len(categories)],
|
|
"source": sources[i % len(sources)],
|
|
"value": float(i % 1000),
|
|
}
|
|
)
|
|
result = aggregate_by_category(events)
|
|
assert len(result) == 500
|
|
assert all(r["count"] == 400 for r in result.values())
|