codeflash-agent/packages/blackbox/tests/test_rendering.py
Kevin Turcios 0ad5e60523
Add blackbox package: session flight recorder with HTMX dashboard (#39)
* feat(blackbox): add package with models, CLI, and HTMX dashboard

* test(blackbox): add comprehensive test coverage for dashboard

* feat(blackbox): cache session scanning via watcher invalidation

* docs(blackbox): add README and use fastapi[standard] for dev server

* refactor(blackbox): extract presentation logic into formatter classes

* refactor(blackbox): extract classify_error helpers

* feat(blackbox): wire analytics into session detail view

Show token usage, tool breakdowns, and session stats in a
collapsible panel when viewing a session.

* feat(blackbox): add codeflash plugin detection

Detect codeflash agent names, skills, and commands in transcripts.
Surface language, optimization domain, and capability badges in
the analytics panel.

* refactor(blackbox): remove underscore prefixes from internal functions

* chore: add ty python-version to root pyproject.toml

* chore(blackbox): fix lint errors in test files

* style(blackbox): apply ruff formatting to analytics

* feat(blackbox): add Playwright E2E tests for dashboard

Refactor app.py to expose create_app() factory accepting a projects_dir
override, enabling tests to run against fixture data instead of the real
~/.claude/projects/ directory. Routes now read projects_dir from
app.state instead of the module-level constant.

Add 26 Playwright tests across 5 files covering dashboard loading,
session list, session detail with filters and analytics, sidebar
collapse/localStorage persistence, and SSE log streaming. All tests
pass on chromium, firefox, and webkit (78 total).

CI gets a new e2e-blackbox job with a browser matrix strategy running
all three engines in parallel, conditional on blackbox path changes,
with trace upload on failure.

* fix(ci): sync only blackbox package in e2e job

* fix(ci): exclude e2e tests from unit test job

The test job doesn't install Playwright browsers, so e2e tests error
when pytest collects them. Ignore tests/e2e/ directories in the test
job — those are handled by the dedicated e2e-blackbox job.
2026-04-28 19:58:43 -05:00

268 lines
8.9 KiB
Python

from __future__ import annotations
import time
from blackbox.dashboard.rendering import (
esc,
esc_md,
fmt_duration,
fmt_relative,
fmt_time,
passes_filter,
render_log_html,
shorten_paths,
tool_call_html,
)
from blackbox.models import LogEntry
# ---------------------------------------------------------------------------
# fmt_time
# ---------------------------------------------------------------------------
class TestFmtTime:
def test_epoch_zero(self) -> None:
assert "00:00:00" == fmt_time(0.0)
def test_known_timestamp(self) -> None:
assert "01:46:40" == fmt_time(1_000_000_000.0)
def test_fractional_seconds_truncated(self) -> None:
assert "00:00:00" == fmt_time(0.999)
# ---------------------------------------------------------------------------
# fmt_duration
# ---------------------------------------------------------------------------
class TestFmtDuration:
def test_zero_seconds(self) -> None:
assert "0s" == fmt_duration(100.0, 100.0)
def test_seconds_only(self) -> None:
assert "45s" == fmt_duration(0.0, 45.0)
def test_minutes_and_seconds(self) -> None:
assert "2m30s" == fmt_duration(0.0, 150.0)
def test_hours_and_minutes(self) -> None:
assert "1h30m" == fmt_duration(0.0, 5400.0)
def test_negative_clamps_to_zero(self) -> None:
assert "0s" == fmt_duration(100.0, 50.0)
def test_none_finished_uses_current_time(self) -> None:
result = fmt_duration(time.time() - 10, None)
assert result.endswith("s")
def test_exactly_60_seconds(self) -> None:
assert "1m00s" == fmt_duration(0.0, 60.0)
def test_exactly_one_hour(self) -> None:
assert "1h00m" == fmt_duration(0.0, 3600.0)
# ---------------------------------------------------------------------------
# fmt_relative
# ---------------------------------------------------------------------------
class TestFmtRelative:
def test_just_now(self) -> None:
assert "just now" == fmt_relative(time.time())
def test_minutes_ago(self) -> None:
assert "5m ago" == fmt_relative(time.time() - 300)
def test_hours_ago(self) -> None:
assert "2h ago" == fmt_relative(time.time() - 7200)
def test_days_ago(self) -> None:
assert "3d ago" == fmt_relative(time.time() - 259200)
# ---------------------------------------------------------------------------
# esc / esc_md
# ---------------------------------------------------------------------------
class TestEsc:
def test_ampersand(self) -> None:
assert "a & b" == esc("a & b")
def test_angle_brackets(self) -> None:
assert "&lt;div&gt;" == esc("<div>")
def test_newlines_become_br(self) -> None:
assert "a<br>b" == esc("a\nb")
def test_combined(self) -> None:
assert "&lt;b&gt;hi&lt;/b&gt;<br>&amp;" == esc("<b>hi</b>\n&")
class TestEscMd:
def test_bold_converted(self) -> None:
result = esc_md("hello **world**")
assert '<strong class="text-white">world</strong>' in result
def test_html_still_escaped(self) -> None:
result = esc_md("<script>**bold**")
assert "&lt;script&gt;" in result
assert '<strong class="text-white">bold</strong>' in result
def test_no_bold(self) -> None:
assert "plain text" == esc_md("plain text")
# ---------------------------------------------------------------------------
# shorten_paths
# ---------------------------------------------------------------------------
class TestShortenPaths:
def test_removes_tmp_paths(self) -> None:
assert "file: " == shorten_paths("file: /tmp/abc123/foo.py")
def test_removes_private_tmp_paths(self) -> None:
assert "file: " == shorten_paths("file: /private/tmp/abc123/bar.py")
def test_no_match_unchanged(self) -> None:
assert "/home/user/code" == shorten_paths("/home/user/code")
# ---------------------------------------------------------------------------
# passes_filter
# ---------------------------------------------------------------------------
class TestPassesFilter:
def make(self, level: str = "info", message: str = "hi", source: str = "user") -> LogEntry:
return LogEntry(timestamp=0.0, source=source, level=level, message=message)
def test_empty_message_rejected(self) -> None:
assert not passes_filter(self.make(message=""), "all", None)
def test_whitespace_only_rejected(self) -> None:
assert not passes_filter(self.make(message=" "), "all", None)
def test_all_filter_accepts_everything(self) -> None:
assert passes_filter(self.make(), "all", None)
def test_skip_levels_rejected_in_compact(self) -> None:
for level in ("delta", "stream", "block_stop", "block_start", "thinking_delta", "tool_start"):
assert not passes_filter(self.make(level=level), "compact", None)
def test_allowed_set_filters(self) -> None:
allowed = {"error"}
assert passes_filter(self.make(level="error"), "errors", allowed)
assert not passes_filter(self.make(level="info"), "errors", allowed)
def test_thinking_rejected_in_compact(self) -> None:
entry = self.make(level="assistant", message="(thinking)")
assert not passes_filter(entry, "compact", None)
def test_assistant_non_thinking_accepted(self) -> None:
entry = self.make(level="assistant", message="Hello there")
assert passes_filter(entry, "compact", None)
def test_skip_levels_pass_in_all_mode(self) -> None:
entry = self.make(level="delta", message="x")
assert passes_filter(entry, "all", None)
# ---------------------------------------------------------------------------
# tool_call_html
# ---------------------------------------------------------------------------
class TestToolCallHtml:
def test_short_preview_no_details(self) -> None:
html = tool_call_html("ls -la")
assert "<details" not in html
assert "ls -la" in html
def test_three_lines_no_details(self) -> None:
html = tool_call_html("line1\nline2\nline3")
assert "<details" not in html
def test_long_preview_has_details(self) -> None:
text = "\n".join(f"line {i}" for i in range(10))
html = tool_call_html(text)
assert "<details" in html
assert "+9 lines" in html
def test_tmp_paths_shortened(self) -> None:
html = tool_call_html("/tmp/abc123/foo.py")
assert "/tmp/" not in html
# ---------------------------------------------------------------------------
# render_log_html
# ---------------------------------------------------------------------------
class TestRenderLogHtml:
def make(self, **kw: object) -> LogEntry:
defaults = {
"timestamp": 1_000_000_000.0,
"source": "claude",
"level": "assistant",
"message": "hello",
"data": {},
}
defaults.update(kw)
return LogEntry(**defaults) # type: ignore[arg-type]
def test_user_message_green(self) -> None:
html = render_log_html(self.make(source="user", level="info"))
assert "text-green-300" in html
def test_assistant_message(self) -> None:
html = render_log_html(self.make(level="assistant", message="hi"))
assert "text-gray-100" in html
def test_thinking_italic(self) -> None:
html = render_log_html(self.make(level="assistant", message="(thinking)"))
assert "italic" in html
def test_error_red(self) -> None:
html = render_log_html(self.make(level="error", message="fail"))
assert "text-red-400" in html
def test_tool_call_amber_badge(self) -> None:
html = render_log_html(
self.make(
level="tool_call",
message="Bash: ls",
data={"tool": "Bash", "input_preview": "ls"},
)
)
assert "bg-amber-500" in html
assert "Bash" in html
def test_tool_result_has_res_badge(self) -> None:
html = render_log_html(self.make(level="tool_result", message="output"))
assert "RES" in html
def test_tool_result_truncates_long_messages(self) -> None:
html = render_log_html(self.make(level="tool_result", message="x" * 600))
assert "..." in html
def test_contains_timestamp(self) -> None:
html = render_log_html(self.make())
assert "01:46:40" in html
def test_source_badges(self) -> None:
for source, label in [("claude", "CLU"), ("user", "USR"), ("system", "SYS")]:
html = render_log_html(self.make(source=source, level="info"))
assert label in html
def test_tool_levels_have_indent_and_opacity(self) -> None:
html = render_log_html(self.make(level="tool_call", data={"tool": "Read", "input_preview": "x"}))
assert "opacity-60" in html
assert "pl-4" in html
def test_non_tool_no_indent(self) -> None:
html = render_log_html(self.make(level="info"))
assert "pl-4" not in html