codeflash-agent/packages/blackbox/tests/test_models.py
Kevin Turcios 0ad5e60523
Add blackbox package: session flight recorder with HTMX dashboard (#39)
* feat(blackbox): add package with models, CLI, and HTMX dashboard

* test(blackbox): add comprehensive test coverage for dashboard

* feat(blackbox): cache session scanning via watcher invalidation

* docs(blackbox): add README and use fastapi[standard] for dev server

* refactor(blackbox): extract presentation logic into formatter classes

* refactor(blackbox): extract classify_error helpers

* feat(blackbox): wire analytics into session detail view

Show token usage, tool breakdowns, and session stats in a
collapsible panel when viewing a session.

* feat(blackbox): add codeflash plugin detection

Detect codeflash agent names, skills, and commands in transcripts.
Surface language, optimization domain, and capability badges in
the analytics panel.

* refactor(blackbox): remove underscore prefixes from internal functions

* chore: add ty python-version to root pyproject.toml

* chore(blackbox): fix lint errors in test files

* style(blackbox): apply ruff formatting to analytics

* feat(blackbox): add Playwright E2E tests for dashboard

Refactor app.py to expose create_app() factory accepting a projects_dir
override, enabling tests to run against fixture data instead of the real
~/.claude/projects/ directory. Routes now read projects_dir from
app.state instead of the module-level constant.

Add 26 Playwright tests across 5 files covering dashboard loading,
session list, session detail with filters and analytics, sidebar
collapse/localStorage persistence, and SSE log streaming. All tests
pass on chromium, firefox, and webkit (78 total).

CI gets a new e2e-blackbox job with a browser matrix strategy running
all three engines in parallel, conditional on blackbox path changes,
with trace upload on failure.

* fix(ci): sync only blackbox package in e2e job

* fix(ci): exclude e2e tests from unit test job

The test job doesn't install Playwright browsers, so e2e tests error
when pytest collects them. Ignore tests/e2e/ directories in the test
job — those are handled by the dedicated e2e-blackbox job.
2026-04-28 19:58:43 -05:00

305 lines
10 KiB
Python

from __future__ import annotations
import json
from typing import Any
import attrs
import pytest
from blackbox.models import (
ProjectStats,
Recommendation,
SessionAudit,
SessionDigest,
SessionEvent,
WeekStats,
arrow,
sparkline,
)
from tests.conftest import make_audit, make_meta
# ---------------------------------------------------------------------------
# sparkline and arrow
# ---------------------------------------------------------------------------
class TestSparkline:
def test_empty_or_single_returns_empty(self) -> None:
assert "" == sparkline([])
assert "" == sparkline([1.0])
def test_ascending_produces_increasing_chars(self) -> None:
result = sparkline([0.0, 0.5, 1.0])
assert len(result) == 3
assert result[0] <= result[-1]
def test_descending_produces_decreasing_chars(self) -> None:
result = sparkline([1.0, 0.5, 0.0])
assert result[0] >= result[-1]
def test_constant_values_produce_middle_char(self) -> None:
result = sparkline([5.0, 5.0, 5.0])
assert len(result) == 3
assert len(set(result)) == 1
def test_two_values_uses_full_range(self) -> None:
result = sparkline([0.0, 1.0])
assert len(result) == 2
assert result[0] != result[-1]
class TestArrow:
def test_near_zero_delta_returns_equals(self) -> None:
assert "=" == arrow(0.0)
assert "=" == arrow(0.04)
assert "=" == arrow(-0.04)
def test_positive_delta_returns_up(self) -> None:
assert "^" == arrow(0.1)
def test_negative_delta_returns_down(self) -> None:
assert "v" == arrow(-0.1)
def test_invert_flips_positive(self) -> None:
assert "v" == arrow(0.1, invert=True)
def test_invert_flips_negative(self) -> None:
assert "^" == arrow(-0.1, invert=True)
def test_invert_near_zero_still_equals(self) -> None:
assert "=" == arrow(0.0, invert=True)
# ---------------------------------------------------------------------------
# SessionEvent
# ---------------------------------------------------------------------------
class TestSessionEvent:
def test_construction(self) -> None:
e = SessionEvent(
timestamp="2024-01-01T00:00:00Z",
speaker="user",
text="hello",
tool_name=None,
file_path=None,
command=None,
is_error=False,
error_category=None,
attachment_type=None,
)
assert e.speaker == "user"
assert e.text == "hello"
assert not e.is_error
def test_frozen(self) -> None:
e = SessionEvent("ts", "user", "hi", None, None, None, False, None, None)
with pytest.raises(attrs.exceptions.FrozenInstanceError):
e.speaker = "assistant" # type: ignore[misc]
def test_equality(self) -> None:
e1 = SessionEvent("ts", "user", "hi", None, None, None, False, None, None)
e2 = SessionEvent("ts", "user", "hi", None, None, None, False, None, None)
assert e1 == e2
def test_attrs_asdict(self) -> None:
e = SessionEvent("ts", "user", "hi", None, None, None, False, None, None)
d = attrs.asdict(e)
assert d["speaker"] == "user"
assert d["text"] == "hi"
assert json.dumps(d) # JSON-serializable
# ---------------------------------------------------------------------------
# SessionMeta — properties
# ---------------------------------------------------------------------------
class TestSessionMetaProperties:
def test_duration_minutes(self) -> None:
assert make_meta(duration_s=3600.0).duration_minutes == 60.0
def test_duration_minutes_zero(self) -> None:
assert make_meta(duration_s=0.0).duration_minutes == 0.0
def test_total_tokens(self) -> None:
assert make_meta(input_tokens=1000, output_tokens=500).total_tokens == 1500
def test_total_tokens_default(self) -> None:
assert make_meta().total_tokens == 0
def test_cache_hit_rate(self) -> None:
meta = make_meta(input_tokens=500, cache_read_tokens=300, cache_creation_tokens=200)
assert meta.cache_hit_rate == 0.3
def test_cache_hit_rate_zero_tokens(self) -> None:
assert make_meta().cache_hit_rate == 0.0
def test_cache_hit_rate_full(self) -> None:
meta = make_meta(input_tokens=0, cache_read_tokens=1000, cache_creation_tokens=0)
assert meta.cache_hit_rate == 1.0
class TestSessionMetaFrozen:
def test_frozen(self) -> None:
meta = make_meta()
with pytest.raises(attrs.exceptions.FrozenInstanceError):
meta.session_id = "new" # type: ignore[misc]
class TestSessionMetaAsDict:
def test_returns_dict(self) -> None:
d = attrs.asdict(make_meta())
assert isinstance(d, dict)
assert d["session_id"] == "abcd1234-5678-9012-3456-789012345678"
assert d["duration_s"] == 3600.0
def test_includes_optional_fields(self) -> None:
d = attrs.asdict(make_meta(git_branch="feature", git_commits=3))
assert d["git_branch"] == "feature"
assert d["git_commits"] == 3
# ---------------------------------------------------------------------------
# SessionAudit
# ---------------------------------------------------------------------------
class TestSessionAuditDefaults:
def test_defaults(self) -> None:
a = SessionAudit(session_id="x")
assert a.outcome == "unclear"
assert a.satisfaction == "neutral"
assert a.session_type == "single_task"
assert a.goal_categories == {}
assert a.friction_counts == {}
assert a.user_instructions == ()
assert a.summary == ""
def test_frozen(self) -> None:
a = SessionAudit(session_id="x")
with pytest.raises(attrs.exceptions.FrozenInstanceError):
a.outcome = "success" # type: ignore[misc]
class TestSessionAuditAsDict:
def test_returns_dict(self) -> None:
a = make_audit()
d = attrs.asdict(a)
assert isinstance(d, dict)
assert d["outcome"] == "mostly_achieved"
def test_reflects_values(self) -> None:
a = make_audit(outcome="success", satisfaction="positive", session_type="multi_task")
d = attrs.asdict(a)
assert d["outcome"] == "success"
assert d["session_type"] == "multi_task"
# ---------------------------------------------------------------------------
# ProjectStats — mutable is_outlier
# ---------------------------------------------------------------------------
class TestProjectStats:
def make(self, **kw: Any) -> ProjectStats:
defaults: dict[str, Any] = {
"project_path": "/proj/myapp",
"project_name": "myapp",
"session_count": 10,
"success_rate": 0.9,
"avg_tool_errors": 2.5,
"avg_duration_s": 600.0,
"top_error_categories": (),
"top_friction": (),
}
defaults.update(kw)
return ProjectStats(**defaults)
def test_is_outlier_default_false(self) -> None:
assert not self.make().is_outlier
def test_is_outlier_mutable(self) -> None:
p = self.make()
p.is_outlier = True
assert p.is_outlier
# ---------------------------------------------------------------------------
# WeekStats + Recommendation
# ---------------------------------------------------------------------------
class TestWeekStats:
def test_frozen(self) -> None:
w = WeekStats(
week="2026-W17", session_count=3, success_rate=0.7, avg_errors_per_session=1.0, avg_duration_s=600.0
)
with pytest.raises(attrs.exceptions.FrozenInstanceError):
w.session_count = 5 # type: ignore[misc]
def test_default_error_counts(self) -> None:
w = WeekStats(
week="2026-W17", session_count=3, success_rate=0.7, avg_errors_per_session=1.0, avg_duration_s=600.0
)
assert w.error_category_counts == {}
class TestRecommendation:
def test_frozen(self) -> None:
r = Recommendation(suggestion="do X", evidence="50%", frequency=0.5, source_sessions=5)
with pytest.raises(attrs.exceptions.FrozenInstanceError):
r.suggestion = "do Y" # type: ignore[misc]
# ---------------------------------------------------------------------------
# SessionDigest
# ---------------------------------------------------------------------------
class TestSessionDigest:
def make(self, **kw: Any) -> SessionDigest:
defaults: dict[str, Any] = {"session_count": 10, "date_range": (100.0, 500.0), "success_rate": 0.8}
defaults.update(kw)
return SessionDigest(**defaults)
def test_attrs_asdict(self) -> None:
d = attrs.asdict(self.make())
assert d["session_count"] == 10
assert d["success_rate"] == 0.8
def test_frozen(self) -> None:
d = self.make()
with pytest.raises(attrs.exceptions.FrozenInstanceError):
d.session_count = 99 # type: ignore[misc]
def test_json_serializable(self) -> None:
j = json.dumps(attrs.asdict(self.make(session_count=5)), indent=2, default=str)
parsed = json.loads(j)
assert parsed["session_count"] == 5
def test_json_with_nested_weeks(self) -> None:
w = WeekStats(
week="2026-W17", session_count=3, success_rate=0.7, avg_errors_per_session=1.0, avg_duration_s=600.0
)
j = json.dumps(attrs.asdict(self.make(weeks=(w,))), indent=2, default=str)
parsed = json.loads(j)
assert len(parsed["weeks"]) == 1
assert parsed["weeks"][0]["week"] == "2026-W17"
def test_json_with_nested_projects(self) -> None:
p = ProjectStats(
project_path="/p",
project_name="p",
session_count=5,
success_rate=0.8,
avg_tool_errors=1.0,
avg_duration_s=300.0,
top_error_categories=(),
top_friction=(),
is_outlier=False,
)
j = json.dumps(attrs.asdict(self.make(projects=(p,))), indent=2, default=str)
parsed = json.loads(j)
assert len(parsed["projects"]) == 1
assert parsed["projects"][0]["project_name"] == "p"