codeflash-agent/evals/templates/memory-hard/tests/test_pipeline.py

from pipeline.core import process_readings


def test_basic():
    raw = [
        {
            "id": 1,
            "sensor_type": "temp",
            "timestamp": "2024-01-01T00:00:00",
            "value": 22.5,
            "metadata": {"location": {"lat": 0, "lng": 0}},
        },
        {
            "id": 2,
            "sensor_type": "temp",
            "timestamp": "2024-01-01T01:00:00",
            "value": 23.0,
            "metadata": {"location": {"lat": 0, "lng": 0}},
        },
        {
            "id": 3,
            "sensor_type": "humidity",
            "timestamp": "2024-01-01T00:00:00",
            "value": 45.0,
            "metadata": {"location": {"lat": 0, "lng": 0}},
        },
    ]
    result = process_readings(raw)
    assert len(result) == 2
    assert result["temp"]["count"] == 2
    assert result["humidity"]["count"] == 1


def test_large_dataset():
    """Production-scale dataset — process_readings uses too much memory.

    With 50k readings, peak memory is far higher than the input data size.
    The goal is to reduce memory usage while preserving correctness.
    """
    sensor_types = [f"sensor-{i}" for i in range(50)]
    raw = []
    for i in range(50_000):
        raw.append(
            {
                "id": i,
                "sensor_type": sensor_types[i % len(sensor_types)],
                "timestamp": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}T{(i % 24):02d}:00:00",
                "value": round(20.0 + (i % 100) * 0.1, 2),
                "unit": "celsius",
                "metadata": {
                    "location": {
                        "lat": round(37.0 + (i % 10) * 0.1, 4),
                        "lng": round(-122.0 + (i % 10) * 0.1, 4),
                        "altitude": float(i % 50),
                    },
                    "firmware": f"v{1 + i % 3}.{i % 10}.0",
                    "calibration_date": "2024-01-15",
                    "sensor_config": {
                        "sample_rate": 100,
                        "precision": "high",
                        "filter": "kalman",
                    },
                    "deployment": {
                        "site": f"site-{i % 10}",
                        "rack": f"rack-{chr(65 + i % 8)}",
                        "position": i % 20,
                    },
                    "tags": [f"tag-{i % 5}", f"tag-{(i + 1) % 5}"],
                    "history": [f"event-{j}" for j in range(3)],
                },
            }
        )
    result = process_readings(raw)
    assert len(result) == 50
    assert all(r["count"] == 1000 for r in result.values())
    assert all(r["mean"] > 0 for r in result.values())