codeflash-agent/evals/templates/crossdomain-hard/src/pipeline/formatter.py
2026-04-03 17:36:50 -05:00

44 lines
1.3 KiB
Python

"""Output formatting for aggregated pipeline results."""
import copy
import json
import hashlib
def format_results(
records: list[dict], schema: dict
) -> list[str]:
"""Format records according to schema, adding computed fields.
Each record gets merged with the schema defaults, then enriched with
a content hash and a normalized sort key. Returns JSON lines.
"""
enriched = []
for record in records:
# Defensive copy to avoid mutating the input or schema
safe = copy.deepcopy(schema)
safe.update(copy.deepcopy(record))
# Add computed fields
safe["_hash"] = _content_hash(safe)
safe["_sort_key"] = _build_sort_key(safe)
enriched.append(safe)
# Sort by the computed key
enriched.sort(key=lambda r: r["_sort_key"])
return [json.dumps(r) for r in enriched]
def _content_hash(record: dict) -> str:
"""Compute a hash of the record content for integrity checks."""
raw = json.dumps(record, sort_keys=True)
return hashlib.sha256(raw.encode()).hexdigest()[:16]
def _build_sort_key(record: dict) -> str:
"""Build a compound sort key from category + timestamp + id."""
cat = str(record.get("category", ""))
ts = str(record.get("timestamp", ""))
rid = str(record.get("id", ""))
return f"{cat}|{ts}|{rid}"