- Optimize codeflash-optimize SKILL.md (review score 17% → 98%, eval 87% → 100%) - Fix frontmatter (allowed-tools format, argument-hint under metadata) - Lead description with concrete actions, explicit agent launch parameters - Add multi-run variance detection to eval system (--runs N flag) - score.py aggregate command: min/max/avg/stddev per criterion, flaky detection - check-regression.sh defaults to 3 runs for reliable regression detection - Add per-criterion regression tracking to baseline-scores.json (v3) - Reports exactly which criteria regressed, not just total score drops - Rename evals/ → codeflash-evals/ to avoid tessl directory conflicts - Switch tessl to managed mode, gitignore vendored tiles and symlinks
62 lines
2.5 KiB
Python
62 lines
2.5 KiB
Python
import pytest
|
|
from pipeline.core import process_readings
|
|
|
|
|
|
def test_basic():
|
|
raw = [
|
|
{"id": 1, "sensor_type": "temp", "timestamp": "2024-01-01T00:00:00",
|
|
"value": 22.5, "metadata": {"location": {"lat": 0, "lng": 0}}},
|
|
{"id": 2, "sensor_type": "temp", "timestamp": "2024-01-01T01:00:00",
|
|
"value": 23.0, "metadata": {"location": {"lat": 0, "lng": 0}}},
|
|
{"id": 3, "sensor_type": "humidity", "timestamp": "2024-01-01T00:00:00",
|
|
"value": 45.0, "metadata": {"location": {"lat": 0, "lng": 0}}},
|
|
]
|
|
result = process_readings(raw)
|
|
assert len(result) == 2
|
|
assert result["temp"]["count"] == 2
|
|
assert result["humidity"]["count"] == 1
|
|
|
|
|
|
def test_large_dataset():
|
|
"""Production-scale dataset — process_readings uses too much memory.
|
|
|
|
With 50k readings, peak memory is far higher than the input data size.
|
|
The goal is to reduce memory usage while preserving correctness.
|
|
"""
|
|
sensor_types = [f"sensor-{i}" for i in range(50)]
|
|
raw = []
|
|
for i in range(50_000):
|
|
raw.append(
|
|
{
|
|
"id": i,
|
|
"sensor_type": sensor_types[i % len(sensor_types)],
|
|
"timestamp": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}T{(i % 24):02d}:00:00",
|
|
"value": round(20.0 + (i % 100) * 0.1, 2),
|
|
"unit": "celsius",
|
|
"metadata": {
|
|
"location": {
|
|
"lat": round(37.0 + (i % 10) * 0.1, 4),
|
|
"lng": round(-122.0 + (i % 10) * 0.1, 4),
|
|
"altitude": float(i % 50),
|
|
},
|
|
"firmware": f"v{1 + i % 3}.{i % 10}.0",
|
|
"calibration_date": "2024-01-15",
|
|
"sensor_config": {
|
|
"sample_rate": 100,
|
|
"precision": "high",
|
|
"filter": "kalman",
|
|
},
|
|
"deployment": {
|
|
"site": f"site-{i % 10}",
|
|
"rack": f"rack-{chr(65 + i % 8)}",
|
|
"position": i % 20,
|
|
},
|
|
"tags": [f"tag-{i % 5}", f"tag-{(i + 1) % 5}"],
|
|
"history": [f"event-{j}" for j in range(3)],
|
|
},
|
|
}
|
|
)
|
|
result = process_readings(raw)
|
|
assert len(result) == 50
|
|
assert all(r["count"] == 1000 for r in result.values())
|
|
assert all(r["mean"] > 0 for r in result.values())
|