mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
* feat(blackbox): add package with models, CLI, and HTMX dashboard * test(blackbox): add comprehensive test coverage for dashboard * feat(blackbox): cache session scanning via watcher invalidation * docs(blackbox): add README and use fastapi[standard] for dev server * refactor(blackbox): extract presentation logic into formatter classes * refactor(blackbox): extract classify_error helpers * feat(blackbox): wire analytics into session detail view Show token usage, tool breakdowns, and session stats in a collapsible panel when viewing a session. * feat(blackbox): add codeflash plugin detection Detect codeflash agent names, skills, and commands in transcripts. Surface language, optimization domain, and capability badges in the analytics panel. * refactor(blackbox): remove underscore prefixes from internal functions * chore: add ty python-version to root pyproject.toml * chore(blackbox): fix lint errors in test files * style(blackbox): apply ruff formatting to analytics * feat(blackbox): add Playwright E2E tests for dashboard Refactor app.py to expose create_app() factory accepting a projects_dir override, enabling tests to run against fixture data instead of the real ~/.claude/projects/ directory. Routes now read projects_dir from app.state instead of the module-level constant. Add 26 Playwright tests across 5 files covering dashboard loading, session list, session detail with filters and analytics, sidebar collapse/localStorage persistence, and SSE log streaming. All tests pass on chromium, firefox, and webkit (78 total). CI gets a new e2e-blackbox job with a browser matrix strategy running all three engines in parallel, conditional on blackbox path changes, with trace upload on failure. * fix(ci): sync only blackbox package in e2e job * fix(ci): exclude e2e tests from unit test job The test job doesn't install Playwright browsers, so e2e tests error when pytest collects them. Ignore tests/e2e/ directories in the test job — those are handled by the dedicated e2e-blackbox job.
268 lines
8.9 KiB
Python
268 lines
8.9 KiB
Python
from __future__ import annotations
|
|
|
|
import time
|
|
|
|
from blackbox.dashboard.rendering import (
|
|
esc,
|
|
esc_md,
|
|
fmt_duration,
|
|
fmt_relative,
|
|
fmt_time,
|
|
passes_filter,
|
|
render_log_html,
|
|
shorten_paths,
|
|
tool_call_html,
|
|
)
|
|
from blackbox.models import LogEntry
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# fmt_time
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFmtTime:
|
|
def test_epoch_zero(self) -> None:
|
|
assert "00:00:00" == fmt_time(0.0)
|
|
|
|
def test_known_timestamp(self) -> None:
|
|
assert "01:46:40" == fmt_time(1_000_000_000.0)
|
|
|
|
def test_fractional_seconds_truncated(self) -> None:
|
|
assert "00:00:00" == fmt_time(0.999)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# fmt_duration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFmtDuration:
|
|
def test_zero_seconds(self) -> None:
|
|
assert "0s" == fmt_duration(100.0, 100.0)
|
|
|
|
def test_seconds_only(self) -> None:
|
|
assert "45s" == fmt_duration(0.0, 45.0)
|
|
|
|
def test_minutes_and_seconds(self) -> None:
|
|
assert "2m30s" == fmt_duration(0.0, 150.0)
|
|
|
|
def test_hours_and_minutes(self) -> None:
|
|
assert "1h30m" == fmt_duration(0.0, 5400.0)
|
|
|
|
def test_negative_clamps_to_zero(self) -> None:
|
|
assert "0s" == fmt_duration(100.0, 50.0)
|
|
|
|
def test_none_finished_uses_current_time(self) -> None:
|
|
result = fmt_duration(time.time() - 10, None)
|
|
assert result.endswith("s")
|
|
|
|
def test_exactly_60_seconds(self) -> None:
|
|
assert "1m00s" == fmt_duration(0.0, 60.0)
|
|
|
|
def test_exactly_one_hour(self) -> None:
|
|
assert "1h00m" == fmt_duration(0.0, 3600.0)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# fmt_relative
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFmtRelative:
|
|
def test_just_now(self) -> None:
|
|
assert "just now" == fmt_relative(time.time())
|
|
|
|
def test_minutes_ago(self) -> None:
|
|
assert "5m ago" == fmt_relative(time.time() - 300)
|
|
|
|
def test_hours_ago(self) -> None:
|
|
assert "2h ago" == fmt_relative(time.time() - 7200)
|
|
|
|
def test_days_ago(self) -> None:
|
|
assert "3d ago" == fmt_relative(time.time() - 259200)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# esc / esc_md
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestEsc:
|
|
def test_ampersand(self) -> None:
|
|
assert "a & b" == esc("a & b")
|
|
|
|
def test_angle_brackets(self) -> None:
|
|
assert "<div>" == esc("<div>")
|
|
|
|
def test_newlines_become_br(self) -> None:
|
|
assert "a<br>b" == esc("a\nb")
|
|
|
|
def test_combined(self) -> None:
|
|
assert "<b>hi</b><br>&" == esc("<b>hi</b>\n&")
|
|
|
|
|
|
class TestEscMd:
|
|
def test_bold_converted(self) -> None:
|
|
result = esc_md("hello **world**")
|
|
assert '<strong class="text-white">world</strong>' in result
|
|
|
|
def test_html_still_escaped(self) -> None:
|
|
result = esc_md("<script>**bold**")
|
|
assert "<script>" in result
|
|
assert '<strong class="text-white">bold</strong>' in result
|
|
|
|
def test_no_bold(self) -> None:
|
|
assert "plain text" == esc_md("plain text")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# shorten_paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestShortenPaths:
|
|
def test_removes_tmp_paths(self) -> None:
|
|
assert "file: " == shorten_paths("file: /tmp/abc123/foo.py")
|
|
|
|
def test_removes_private_tmp_paths(self) -> None:
|
|
assert "file: " == shorten_paths("file: /private/tmp/abc123/bar.py")
|
|
|
|
def test_no_match_unchanged(self) -> None:
|
|
assert "/home/user/code" == shorten_paths("/home/user/code")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# passes_filter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestPassesFilter:
|
|
def make(self, level: str = "info", message: str = "hi", source: str = "user") -> LogEntry:
|
|
return LogEntry(timestamp=0.0, source=source, level=level, message=message)
|
|
|
|
def test_empty_message_rejected(self) -> None:
|
|
assert not passes_filter(self.make(message=""), "all", None)
|
|
|
|
def test_whitespace_only_rejected(self) -> None:
|
|
assert not passes_filter(self.make(message=" "), "all", None)
|
|
|
|
def test_all_filter_accepts_everything(self) -> None:
|
|
assert passes_filter(self.make(), "all", None)
|
|
|
|
def test_skip_levels_rejected_in_compact(self) -> None:
|
|
for level in ("delta", "stream", "block_stop", "block_start", "thinking_delta", "tool_start"):
|
|
assert not passes_filter(self.make(level=level), "compact", None)
|
|
|
|
def test_allowed_set_filters(self) -> None:
|
|
allowed = {"error"}
|
|
assert passes_filter(self.make(level="error"), "errors", allowed)
|
|
assert not passes_filter(self.make(level="info"), "errors", allowed)
|
|
|
|
def test_thinking_rejected_in_compact(self) -> None:
|
|
entry = self.make(level="assistant", message="(thinking)")
|
|
assert not passes_filter(entry, "compact", None)
|
|
|
|
def test_assistant_non_thinking_accepted(self) -> None:
|
|
entry = self.make(level="assistant", message="Hello there")
|
|
assert passes_filter(entry, "compact", None)
|
|
|
|
def test_skip_levels_pass_in_all_mode(self) -> None:
|
|
entry = self.make(level="delta", message="x")
|
|
assert passes_filter(entry, "all", None)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# tool_call_html
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestToolCallHtml:
|
|
def test_short_preview_no_details(self) -> None:
|
|
html = tool_call_html("ls -la")
|
|
assert "<details" not in html
|
|
assert "ls -la" in html
|
|
|
|
def test_three_lines_no_details(self) -> None:
|
|
html = tool_call_html("line1\nline2\nline3")
|
|
assert "<details" not in html
|
|
|
|
def test_long_preview_has_details(self) -> None:
|
|
text = "\n".join(f"line {i}" for i in range(10))
|
|
html = tool_call_html(text)
|
|
assert "<details" in html
|
|
assert "+9 lines" in html
|
|
|
|
def test_tmp_paths_shortened(self) -> None:
|
|
html = tool_call_html("/tmp/abc123/foo.py")
|
|
assert "/tmp/" not in html
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# render_log_html
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestRenderLogHtml:
|
|
def make(self, **kw: object) -> LogEntry:
|
|
defaults = {
|
|
"timestamp": 1_000_000_000.0,
|
|
"source": "claude",
|
|
"level": "assistant",
|
|
"message": "hello",
|
|
"data": {},
|
|
}
|
|
defaults.update(kw)
|
|
return LogEntry(**defaults) # type: ignore[arg-type]
|
|
|
|
def test_user_message_green(self) -> None:
|
|
html = render_log_html(self.make(source="user", level="info"))
|
|
assert "text-green-300" in html
|
|
|
|
def test_assistant_message(self) -> None:
|
|
html = render_log_html(self.make(level="assistant", message="hi"))
|
|
assert "text-gray-100" in html
|
|
|
|
def test_thinking_italic(self) -> None:
|
|
html = render_log_html(self.make(level="assistant", message="(thinking)"))
|
|
assert "italic" in html
|
|
|
|
def test_error_red(self) -> None:
|
|
html = render_log_html(self.make(level="error", message="fail"))
|
|
assert "text-red-400" in html
|
|
|
|
def test_tool_call_amber_badge(self) -> None:
|
|
html = render_log_html(
|
|
self.make(
|
|
level="tool_call",
|
|
message="Bash: ls",
|
|
data={"tool": "Bash", "input_preview": "ls"},
|
|
)
|
|
)
|
|
assert "bg-amber-500" in html
|
|
assert "Bash" in html
|
|
|
|
def test_tool_result_has_res_badge(self) -> None:
|
|
html = render_log_html(self.make(level="tool_result", message="output"))
|
|
assert "RES" in html
|
|
|
|
def test_tool_result_truncates_long_messages(self) -> None:
|
|
html = render_log_html(self.make(level="tool_result", message="x" * 600))
|
|
assert "..." in html
|
|
|
|
def test_contains_timestamp(self) -> None:
|
|
html = render_log_html(self.make())
|
|
assert "01:46:40" in html
|
|
|
|
def test_source_badges(self) -> None:
|
|
for source, label in [("claude", "CLU"), ("user", "USR"), ("system", "SYS")]:
|
|
html = render_log_html(self.make(source=source, level="info"))
|
|
assert label in html
|
|
|
|
def test_tool_levels_have_indent_and_opacity(self) -> None:
|
|
html = render_log_html(self.make(level="tool_call", data={"tool": "Read", "input_preview": "x"}))
|
|
assert "opacity-60" in html
|
|
assert "pl-4" in html
|
|
|
|
def test_non_tool_no_indent(self) -> None:
|
|
html = render_log_html(self.make(level="info"))
|
|
assert "pl-4" not in html
|