codeflash-agent/evals/templates/crossdomain-hard/src/pipeline/aggregator.py
2026-04-03 17:36:50 -05:00

46 lines
1.4 KiB
Python

"""Event aggregation by category with summary statistics."""
import statistics
def aggregate_by_category(events: list[dict]) -> dict:
"""Group events by category and compute per-category summaries.
For each category, computes count, mean/median/stdev of values,
and collects the set of unique sources.
"""
categories = _get_unique_categories(events)
result = {}
for cat in categories:
cat_events = _filter_events(events, cat)
result[cat] = _summarize(cat_events)
return result
def _get_unique_categories(events: list[dict]) -> list[str]:
"""Extract unique category names preserving insertion order."""
seen = []
for e in events:
cat = e["category"]
if cat not in seen:
seen.append(cat)
return seen
def _filter_events(events: list[dict], category: str) -> list[dict]:
"""Filter events to a single category."""
return [e for e in events if e["category"] == category]
def _summarize(events: list[dict]) -> dict:
"""Compute summary statistics for a group of events."""
values = [e["value"] for e in events]
sources = list({e["source"] for e in events})
return {
"count": len(events),
"mean": statistics.mean(values),
"median": statistics.median(values),
"stdev": statistics.stdev(values) if len(values) > 1 else 0.0,
"sources": sources,
}