codeflash-agent/packages/blackbox/tests/test_transcript.py
Kevin Turcios 0ad5e60523
Add blackbox package: session flight recorder with HTMX dashboard (#39)
* feat(blackbox): add package with models, CLI, and HTMX dashboard

* test(blackbox): add comprehensive test coverage for dashboard

* feat(blackbox): cache session scanning via watcher invalidation

* docs(blackbox): add README and use fastapi[standard] for dev server

* refactor(blackbox): extract presentation logic into formatter classes

* refactor(blackbox): extract classify_error helpers

* feat(blackbox): wire analytics into session detail view

Show token usage, tool breakdowns, and session stats in a
collapsible panel when viewing a session.

* feat(blackbox): add codeflash plugin detection

Detect codeflash agent names, skills, and commands in transcripts.
Surface language, optimization domain, and capability badges in
the analytics panel.

* refactor(blackbox): remove underscore prefixes from internal functions

* chore: add ty python-version to root pyproject.toml

* chore(blackbox): fix lint errors in test files

* style(blackbox): apply ruff formatting to analytics

* feat(blackbox): add Playwright E2E tests for dashboard

Refactor app.py to expose create_app() factory accepting a projects_dir
override, enabling tests to run against fixture data instead of the real
~/.claude/projects/ directory. Routes now read projects_dir from
app.state instead of the module-level constant.

Add 26 Playwright tests across 5 files covering dashboard loading,
session list, session detail with filters and analytics, sidebar
collapse/localStorage persistence, and SSE log streaming. All tests
pass on chromium, firefox, and webkit (78 total).

CI gets a new e2e-blackbox job with a browser matrix strategy running
all three engines in parallel, conditional on blackbox path changes,
with trace upload on failure.

* fix(ci): sync only blackbox package in e2e job

* fix(ci): exclude e2e tests from unit test job

The test job doesn't install Playwright browsers, so e2e tests error
when pytest collects them. Ignore tests/e2e/ directories in the test
job — those are handled by the dedicated e2e-blackbox job.
2026-04-28 19:58:43 -05:00

552 lines
21 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from blackbox.dashboard.transcript import (
decode_project_name,
extract_text_content,
extract_tool_results,
extract_tool_uses,
parse_assistant_entry,
parse_entry,
parse_transcript,
parse_transcript_tail,
parse_user_entry,
quick_session_info,
scan_sessions,
tool_input_preview,
ts_to_epoch,
)
# ---------------------------------------------------------------------------
# ts_to_epoch
# ---------------------------------------------------------------------------
class TestTsToEpoch:
def test_none_returns_zero(self) -> None:
assert 0.0 == ts_to_epoch(None)
def test_empty_string_returns_zero(self) -> None:
assert 0.0 == ts_to_epoch("")
def test_valid_iso_timestamp(self) -> None:
result = ts_to_epoch("2024-01-01T00:00:00Z")
assert result > 0
def test_naive_datetime_treated_as_utc(self) -> None:
result = ts_to_epoch("2024-01-01T00:00:00")
assert result > 0
def test_invalid_format_returns_zero(self) -> None:
assert 0.0 == ts_to_epoch("not-a-date")
# ---------------------------------------------------------------------------
# extract_text_content
# ---------------------------------------------------------------------------
class TestExtractTextContent:
def test_string_passthrough(self) -> None:
assert "hello" == extract_text_content("hello")
def test_list_of_text_blocks(self) -> None:
content = [
{"type": "text", "text": "hello"},
{"type": "text", "text": "world"},
]
assert "hello\nworld" == extract_text_content(content)
def test_non_text_blocks_skipped(self) -> None:
content = [
{"type": "tool_use", "name": "Bash"},
{"type": "text", "text": "only this"},
]
assert "only this" == extract_text_content(content)
def test_empty_list(self) -> None:
assert "" == extract_text_content([])
def test_non_string_non_list(self) -> None:
assert "" == extract_text_content(42)
def test_non_dict_items_skipped(self) -> None:
assert "" == extract_text_content(["not a dict"])
# ---------------------------------------------------------------------------
# extract_tool_uses / extract_tool_results
# ---------------------------------------------------------------------------
class TestExtractToolUses:
def test_extracts_tool_use_blocks(self) -> None:
content = [{"type": "tool_use", "name": "Read"}, {"type": "text", "text": "x"}]
assert [{"type": "tool_use", "name": "Read"}] == extract_tool_uses(content)
def test_non_list_returns_empty(self) -> None:
assert [] == extract_tool_uses("string")
def test_empty_list(self) -> None:
assert [] == extract_tool_uses([])
class TestExtractToolResults:
def test_extracts_tool_result_blocks(self) -> None:
content = [{"type": "tool_result", "content": "ok"}, {"type": "text", "text": "x"}]
assert [{"type": "tool_result", "content": "ok"}] == extract_tool_results(content)
def test_non_list_returns_empty(self) -> None:
assert [] == extract_tool_results(42)
# ---------------------------------------------------------------------------
# tool_input_preview
# ---------------------------------------------------------------------------
class TestToolInputPreview:
def test_bash_shows_command(self) -> None:
assert "ls -la" == tool_input_preview("Bash", {"command": "ls -la"})
def test_read_shows_path(self) -> None:
assert "/foo.py" == tool_input_preview("Read", {"file_path": "/foo.py"})
def test_write_shows_path(self) -> None:
assert "/bar.py" == tool_input_preview("Write", {"file_path": "/bar.py"})
def test_edit_shows_path_and_old_string(self) -> None:
result = tool_input_preview("Edit", {"file_path": "/f.py", "old_string": "x" * 100})
assert "/f.py" in result
assert result.endswith("...")
def test_agent_shows_description(self) -> None:
result = tool_input_preview("Agent", {"description": "find bugs", "prompt": "long prompt"})
assert "find bugs" == result
def test_agent_falls_back_to_prompt(self) -> None:
result = tool_input_preview("Agent", {"prompt": "do stuff"})
assert "do stuff" == result
def test_skill_shows_skill_name(self) -> None:
assert "commit" == tool_input_preview("Skill", {"skill": "commit"})
def test_unknown_tool_json_preview(self) -> None:
result = tool_input_preview("CustomTool", {"key": "value"})
assert "key" in result
assert "value" in result
def test_unknown_tool_truncated_at_200(self) -> None:
result = tool_input_preview("CustomTool", {"key": "x" * 300})
assert len(result) <= 200
# ---------------------------------------------------------------------------
# parse_entry / parse_user_entry / parse_assistant_entry
# ---------------------------------------------------------------------------
class TestParseEntry:
def test_user_entry(self) -> None:
raw = {"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hello"}}
entries = parse_entry(raw)
assert 1 == len(entries)
assert "user" == entries[0].source
assert "hello" == entries[0].message
def test_assistant_text_entry(self) -> None:
raw = {
"type": "assistant",
"timestamp": "2024-01-01T00:00:00Z",
"message": {"content": [{"type": "text", "text": "hi"}]},
}
entries = parse_entry(raw)
assert 1 == len(entries)
assert "claude" == entries[0].source
assert "assistant" == entries[0].level
def test_assistant_tool_use(self) -> None:
raw = {
"type": "assistant",
"timestamp": "2024-01-01T00:00:00Z",
"message": {"content": [{"type": "tool_use", "name": "Read", "input": {"file_path": "/x.py"}}]},
}
entries = parse_entry(raw)
assert 1 == len(entries)
assert "tool_call" == entries[0].level
assert "Read" in entries[0].message
def test_assistant_thinking_block(self) -> None:
raw = {
"type": "assistant",
"timestamp": "2024-01-01T00:00:00Z",
"message": {"content": [{"type": "thinking"}]},
}
entries = parse_entry(raw)
assert 1 == len(entries)
assert "(thinking)" == entries[0].message
def test_system_entry(self) -> None:
raw = {"type": "system", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "init"}}
entries = parse_entry(raw)
assert 1 == len(entries)
assert "system" == entries[0].source
def test_unknown_type_returns_empty(self) -> None:
assert [] == parse_entry({"type": "unknown"})
class TestParseUserEntry:
def test_tool_result_with_error(self) -> None:
content = [{"type": "tool_result", "content": "fail", "is_error": True}]
raw: dict[str, Any] = {"toolUseResult": {"stderr": "bad command"}}
entries = parse_user_entry(0.0, {"content": content}, raw)
assert 1 == len(entries)
assert "error" == entries[0].level
assert "bad command" == entries[0].message
def test_tool_result_success(self) -> None:
content = [{"type": "tool_result", "content": "output text"}]
entries = parse_user_entry(0.0, {"content": content}, {})
assert 1 == len(entries)
assert "tool_result" == entries[0].level
def test_tool_result_with_stdout(self) -> None:
content = [{"type": "tool_result", "content": "ignored"}]
raw: dict[str, Any] = {"toolUseResult": {"stdout": "real output"}}
entries = parse_user_entry(0.0, {"content": content}, raw)
assert "real output" == entries[0].message
def test_tool_result_content_as_list(self) -> None:
content = [{"type": "tool_result", "content": [{"text": "a"}, {"text": "b"}]}]
entries = parse_user_entry(0.0, {"content": content}, {})
assert "a b" == entries[0].message
def test_non_dict_message_returns_empty(self) -> None:
assert [] == parse_user_entry(0.0, "not a dict", {})
def test_tool_result_message_truncated_at_2000(self) -> None:
content = [{"type": "tool_result", "content": "x" * 3000}]
entries = parse_user_entry(0.0, {"content": content}, {})
assert 2000 == len(entries[0].message)
def test_tool_use_result_non_dict_ignored(self) -> None:
content = [{"type": "tool_result", "content": "ok", "is_error": True}]
raw: dict[str, Any] = {"toolUseResult": "not a dict"}
entries = parse_user_entry(0.0, {"content": content}, raw)
assert 1 == len(entries)
class TestParseAssistantEntry:
def test_non_dict_message_returns_empty(self) -> None:
assert [] == parse_assistant_entry(0.0, "not a dict")
def test_string_content(self) -> None:
entries = parse_assistant_entry(0.0, {"content": "hello"})
assert 1 == len(entries)
assert "hello" == entries[0].message
def test_empty_content(self) -> None:
assert [] == parse_assistant_entry(0.0, {"content": ""})
assert [] == parse_assistant_entry(0.0, {"content": []})
def test_non_dict_blocks_skipped(self) -> None:
entries = parse_assistant_entry(0.0, {"content": ["not a dict"]})
assert [] == entries
def test_mixed_content(self) -> None:
content = [
{"type": "text", "text": "thinking about it"},
{"type": "tool_use", "name": "Bash", "input": {"command": "ls"}},
{"type": "thinking"},
]
entries = parse_assistant_entry(0.0, {"content": content})
assert 3 == len(entries)
assert "assistant" == entries[0].level
assert "tool_call" == entries[1].level
assert "(thinking)" == entries[2].message
# ---------------------------------------------------------------------------
# parse_transcript
# ---------------------------------------------------------------------------
class TestParseTranscript:
def test_parses_jsonl(self, tmp_path: Path) -> None:
lines = [
json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hi"}}),
json.dumps(
{
"type": "assistant",
"timestamp": "2024-01-01T00:00:01Z",
"message": {"content": [{"type": "text", "text": "hello"}]},
}
),
]
path = tmp_path / "session.jsonl"
path.write_text("\n".join(lines))
entries = parse_transcript(path)
assert 2 == len(entries)
assert "user" == entries[0].source
assert "claude" == entries[1].source
def test_skips_blank_lines(self, tmp_path: Path) -> None:
lines = [
json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hi"}}),
"",
" ",
json.dumps(
{
"type": "assistant",
"timestamp": "2024-01-01T00:00:01Z",
"message": {"content": [{"type": "text", "text": "ok"}]},
}
),
]
path = tmp_path / "session.jsonl"
path.write_text("\n".join(lines))
assert 2 == len(parse_transcript(path))
def test_skips_invalid_json(self, tmp_path: Path) -> None:
path = tmp_path / "session.jsonl"
path.write_text("not json\n{bad json}\n")
assert [] == parse_transcript(path)
def test_empty_file(self, tmp_path: Path) -> None:
path = tmp_path / "session.jsonl"
path.write_text("")
assert [] == parse_transcript(path)
# ---------------------------------------------------------------------------
# parse_transcript_tail
# ---------------------------------------------------------------------------
class TestParseTranscriptTail:
def test_reads_from_offset(self, tmp_path: Path) -> None:
line1 = json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "first"}})
line2 = json.dumps({"type": "user", "timestamp": "2024-01-01T00:01:00Z", "message": {"content": "second"}})
path = tmp_path / "session.jsonl"
path.write_text(line1 + "\n")
offset = path.stat().st_size
with path.open("a") as f:
f.write(line2 + "\n")
entries, new_offset = parse_transcript_tail(path, offset)
assert 1 == len(entries)
assert "second" == entries[0].message
assert new_offset > offset
def test_offset_zero_reads_full_file(self, tmp_path: Path) -> None:
line = json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hi"}})
path = tmp_path / "session.jsonl"
path.write_text(line + "\n")
entries, offset = parse_transcript_tail(path, 0)
assert 1 == len(entries)
assert offset == path.stat().st_size
def test_no_new_data_returns_empty(self, tmp_path: Path) -> None:
line = json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hi"}})
path = tmp_path / "session.jsonl"
path.write_text(line + "\n")
offset = path.stat().st_size
entries, new_offset = parse_transcript_tail(path, offset)
assert [] == entries
assert new_offset == offset
def test_skips_invalid_json_in_tail(self, tmp_path: Path) -> None:
path = tmp_path / "session.jsonl"
path.write_text("")
offset = 0
with path.open("a") as f:
f.write("bad json\n")
f.write(
json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "ok"}}) + "\n"
)
entries, _ = parse_transcript_tail(path, offset)
assert 1 == len(entries)
assert "ok" == entries[0].message
def test_multiple_appends(self, tmp_path: Path) -> None:
path = tmp_path / "session.jsonl"
path.write_text("")
offset = 0
for i in range(3):
with path.open("a") as f:
f.write(
json.dumps(
{"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": f"msg-{i}"}}
)
+ "\n"
)
entries, offset = parse_transcript_tail(path, offset)
assert 1 == len(entries)
assert f"msg-{i}" == entries[0].message
# ---------------------------------------------------------------------------
# decode_project_name
# ---------------------------------------------------------------------------
class TestDecodeProjectName:
def test_encoded_path(self) -> None:
assert "work/myproject" == decode_project_name("-Users-kevin-Desktop-work-myproject")
def test_filters_common_parts(self) -> None:
result = decode_project_name("-Users-private-tmp-")
assert result == "-Users-private-tmp-"
def test_simple_name_passthrough(self) -> None:
assert "myproject" == decode_project_name("myproject")
def test_short_meaningful_parts(self) -> None:
assert "kevin/myproject" == decode_project_name("-Users-kevin-myproject")
# ---------------------------------------------------------------------------
# quick_session_info
# ---------------------------------------------------------------------------
class TestQuickSessionInfo:
def write_transcript(self, path: Path, entries: list[dict[str, Any]]) -> None:
path.write_text("\n".join(json.dumps(e) for e in entries))
def test_basic_session(self, tmp_path: Path) -> None:
path = tmp_path / "abc123.jsonl"
self.write_transcript(
path,
[
{
"type": "user",
"timestamp": "2024-01-01T00:00:00Z",
"message": {"content": "help me debug this"},
"cwd": "/home/user/projects/myapp",
},
{
"type": "assistant",
"timestamp": "2024-01-01T00:05:00Z",
"message": {"content": [{"type": "text", "text": "sure"}]},
},
],
)
info = quick_session_info(path, "abc123", "encoded-project", "myproject")
assert info is not None
assert "abc123" == info.session_id
assert "help me debug this" == info.first_prompt
assert 1 == info.message_count
assert "projects/myapp" == info.project_name
def test_skips_tool_results_for_first_prompt(self, tmp_path: Path) -> None:
path = tmp_path / "abc123.jsonl"
self.write_transcript(
path,
[
{
"type": "user",
"timestamp": "2024-01-01T00:00:00Z",
"message": {"content": [{"type": "tool_result", "content": "output"}]},
},
{
"type": "user",
"timestamp": "2024-01-01T00:01:00Z",
"message": {"content": "real prompt"},
},
],
)
info = quick_session_info(path, "abc123", "enc", "proj")
assert info is not None
assert "real prompt" == info.first_prompt
def test_returns_none_for_empty_file(self, tmp_path: Path) -> None:
path = tmp_path / "empty.jsonl"
path.write_text("")
assert quick_session_info(path, "empty", "enc", "proj") is None
def test_returns_none_for_missing_file(self, tmp_path: Path) -> None:
path = tmp_path / "missing.jsonl"
assert quick_session_info(path, "missing", "enc", "proj") is None
def test_first_prompt_truncated_at_120(self, tmp_path: Path) -> None:
path = tmp_path / "abc.jsonl"
self.write_transcript(
path,
[{"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "x" * 200}}],
)
info = quick_session_info(path, "abc", "enc", "proj")
assert info is not None
assert 120 == len(info.first_prompt)
def test_uses_mtime_for_finished_at(self, tmp_path: Path) -> None:
path = tmp_path / "abc.jsonl"
self.write_transcript(
path,
[{"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hi"}}],
)
info = quick_session_info(path, "abc", "enc", "proj")
assert info is not None
assert info.finished_at is not None
assert info.finished_at >= info.started_at
def test_cwd_used_for_display_name(self, tmp_path: Path) -> None:
path = tmp_path / "abc.jsonl"
self.write_transcript(
path,
[
{
"type": "user",
"timestamp": "2024-01-01T00:00:00Z",
"message": {"content": "hi"},
"cwd": "/Users/kevin/Desktop/work/myapp",
},
],
)
info = quick_session_info(path, "abc", "enc", "proj")
assert info is not None
assert "work/myapp" == info.project_name
# ---------------------------------------------------------------------------
# scan_sessions
# ---------------------------------------------------------------------------
class TestScanSessions:
def test_empty_dir(self, tmp_path: Path) -> None:
assert [] == scan_sessions(tmp_path)
def test_nonexistent_dir(self, tmp_path: Path) -> None:
assert [] == scan_sessions(tmp_path / "nonexistent")
def test_finds_sessions(self, tmp_path: Path) -> None:
project_dir = tmp_path / "encoded-project"
project_dir.mkdir()
transcript = project_dir / "sess-123.jsonl"
transcript.write_text(
json.dumps({"type": "user", "timestamp": "2024-01-01T00:00:00Z", "message": {"content": "hi"}})
)
sessions = scan_sessions(tmp_path)
assert 1 == len(sessions)
assert "sess-123" == sessions[0].session_id
def test_sorted_by_started_at_descending(self, tmp_path: Path) -> None:
project_dir = tmp_path / "proj"
project_dir.mkdir()
for i, ts in enumerate(["2024-01-01T00:00:00Z", "2024-06-01T00:00:00Z", "2024-03-01T00:00:00Z"]):
path = project_dir / f"sess-{i}.jsonl"
path.write_text(json.dumps({"type": "user", "timestamp": ts, "message": {"content": "hi"}}))
sessions = scan_sessions(tmp_path)
assert 3 == len(sessions)
assert sessions[0].started_at >= sessions[1].started_at >= sessions[2].started_at
def test_skips_non_directory_entries(self, tmp_path: Path) -> None:
(tmp_path / "not_a_dir.txt").write_text("hello")
assert [] == scan_sessions(tmp_path)