codeflash-agent/packages/blackbox/tests/test_analytics.py
Kevin Turcios 0ad5e60523
Add blackbox package: session flight recorder with HTMX dashboard (#39)
* feat(blackbox): add package with models, CLI, and HTMX dashboard

* test(blackbox): add comprehensive test coverage for dashboard

* feat(blackbox): cache session scanning via watcher invalidation

* docs(blackbox): add README and use fastapi[standard] for dev server

* refactor(blackbox): extract presentation logic into formatter classes

* refactor(blackbox): extract classify_error helpers

* feat(blackbox): wire analytics into session detail view

Show token usage, tool breakdowns, and session stats in a
collapsible panel when viewing a session.

* feat(blackbox): add codeflash plugin detection

Detect codeflash agent names, skills, and commands in transcripts.
Surface language, optimization domain, and capability badges in
the analytics panel.

* refactor(blackbox): remove underscore prefixes from internal functions

* chore: add ty python-version to root pyproject.toml

* chore(blackbox): fix lint errors in test files

* style(blackbox): apply ruff formatting to analytics

* feat(blackbox): add Playwright E2E tests for dashboard

Refactor app.py to expose create_app() factory accepting a projects_dir
override, enabling tests to run against fixture data instead of the real
~/.claude/projects/ directory. Routes now read projects_dir from
app.state instead of the module-level constant.

Add 26 Playwright tests across 5 files covering dashboard loading,
session list, session detail with filters and analytics, sidebar
collapse/localStorage persistence, and SSE log streaming. All tests
pass on chromium, firefox, and webkit (78 total).

CI gets a new e2e-blackbox job with a browser matrix strategy running
all three engines in parallel, conditional on blackbox path changes,
with trace upload on failure.

* fix(ci): sync only blackbox package in e2e job

* fix(ci): exclude e2e tests from unit test job

The test job doesn't install Playwright browsers, so e2e tests error
when pytest collects them. Ignore tests/e2e/ directories in the test
job — those are handled by the dedicated e2e-blackbox job.
2026-04-28 19:58:43 -05:00

693 lines
24 KiB
Python

"""Tests for analytics extraction and codeflash detection."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from blackbox.analytics import (
classify_error,
count_diff_lines,
detect_codeflash,
extract_meta,
infer_domain,
infer_language,
track_file_changes,
)
def _ts(offset: int = 0) -> str:
return f"2026-04-28T12:00:{offset:02d}Z"
def _write_jsonl(path: Path, entries: list[dict[str, Any]]) -> None:
path.write_text("\n".join(json.dumps(e) for e in entries) + "\n")
# ---------------------------------------------------------------------------
# extract_meta basics
# ---------------------------------------------------------------------------
class TestExtractMeta:
def test_returns_none_for_empty_file(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "empty.jsonl"
p.parent.mkdir()
p.write_text("")
assert extract_meta(p) is None
def test_returns_none_for_missing_file(self, tmp_path: Path) -> None:
assert extract_meta(tmp_path / "missing.jsonl") is None
def test_returns_none_for_no_timestamps(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(p, [{"type": "system", "message": "hello"}])
assert extract_meta(p) is None
def test_basic_session(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "abc123.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "user",
"timestamp": _ts(0),
"message": {"content": "optimize this function"},
},
{
"type": "assistant",
"timestamp": _ts(10),
"message": {
"content": [{"type": "text", "text": "I'll help you."}],
"usage": {"input_tokens": 500, "output_tokens": 200},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.session_id == "abc123"
assert meta.project_path == "proj"
assert meta.user_messages == 1
assert meta.assistant_messages == 1
assert meta.input_tokens == 500
assert meta.output_tokens == 200
assert "optimize this function" in meta.first_prompt
def test_counts_tool_calls(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{"type": "tool_use", "id": "t1", "name": "Read", "input": {"file_path": "/a.py"}},
{
"type": "tool_use",
"id": "t2",
"name": "Edit",
"input": {"file_path": "/a.py", "old_string": "x", "new_string": "y"},
},
],
"usage": {"input_tokens": 100, "output_tokens": 50},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.tool_calls == 2
assert meta.tool_counts == {"Read": 1, "Edit": 1}
def test_counts_git_commits(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{
"type": "tool_use",
"id": "t1",
"name": "Bash",
"input": {"command": "git commit -m 'fix things'"},
}
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.git_commits == 1
def test_amend_not_counted_as_commit(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{
"type": "tool_use",
"id": "t1",
"name": "Bash",
"input": {"command": "git commit --amend -m 'fix'"},
}
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.git_commits == 0
def test_counts_compactions(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{"type": "user", "timestamp": _ts(0), "message": {"content": "hi"}},
{"type": "summary", "timestamp": _ts(5)},
{"type": "summary", "timestamp": _ts(10)},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.compactions == 2
def test_counts_thinking_blocks(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{"type": "thinking", "thinking": "let me think..."},
{"type": "text", "text": "here's my answer"},
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.thinking_blocks == 1
def test_tracks_permission_mode(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{"type": "permission-mode", "timestamp": _ts(0), "permissionMode": "bypassPermissions"},
{"type": "user", "timestamp": _ts(1), "message": {"content": "go"}},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.permission_mode == "bypassPermissions"
def test_tracks_web_usage(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [{"type": "text", "text": "searching"}],
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"server_tool_use": {"web_search_requests": 2, "web_fetch_requests": 1},
},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.web_searches == 2
assert meta.web_fetches == 1
def test_skips_invalid_json_lines(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
p.write_text(
json.dumps({"type": "user", "timestamp": _ts(0), "message": {"content": "hi"}})
+ "\nnot valid json\n"
+ json.dumps(
{
"type": "assistant",
"timestamp": _ts(1),
"message": {"content": [{"type": "text", "text": "ok"}], "usage": {}},
}
)
+ "\n"
)
meta = extract_meta(p)
assert meta is not None
assert meta.user_messages == 1
assert meta.assistant_messages == 1
def test_tracks_tool_errors(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [{"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls /nope"}}],
"usage": {},
},
},
{
"type": "user",
"timestamp": _ts(1),
"message": {
"content": [
{
"type": "tool_result",
"tool_use_id": "t1",
"is_error": True,
"content": "command not found",
}
],
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.tool_errors == 1
assert meta.tool_error_categories["command_not_found"] == 1
# ---------------------------------------------------------------------------
# classify_error
# ---------------------------------------------------------------------------
class TestClassifyError:
def test_edit_always_edit_failed(self) -> None:
assert "edit_failed" == classify_error("Edit", {}, {})
def test_bash_permission_denied(self) -> None:
block = {"content": "Permission denied"}
assert "permission_denied" == classify_error("Bash", block, {})
def test_bash_command_not_found(self) -> None:
block = {"content": "command not found"}
assert "command_not_found" == classify_error("Bash", block, {})
def test_bash_generic_failure(self) -> None:
block = {"content": "exit code 1"}
assert "command_failed" == classify_error("Bash", block, {})
def test_read_file_not_found(self) -> None:
block = {"content": "no such file"}
assert "file_not_found" == classify_error("Read", block, {})
def test_write_file_not_found(self) -> None:
block = {"content": "not found"}
assert "file_not_found" == classify_error("Write", block, {})
def test_read_generic_error(self) -> None:
block = {"content": "some io error"}
assert "file_error" == classify_error("Read", block, {})
def test_unknown_tool(self) -> None:
assert "tool_error" == classify_error("CustomTool", {}, {})
def test_stderr_from_tool_use_result(self) -> None:
raw = {"toolUseResult": {"stderr": "Permission denied"}}
assert "permission_denied" == classify_error("Bash", {"content": ""}, raw)
# ---------------------------------------------------------------------------
# track_file_changes
# ---------------------------------------------------------------------------
class TestTrackFileChanges:
def test_tracks_edit_tool(self) -> None:
from collections import Counter
files: set[str] = set()
langs = Counter[str]()
track_file_changes("Edit", {"file_path": "/app/main.py"}, files, langs)
assert "/app/main.py" in files
assert langs["python"] == 1
def test_ignores_non_edit_tools(self) -> None:
from collections import Counter
files: set[str] = set()
langs = Counter[str]()
track_file_changes("Read", {"file_path": "/app/main.py"}, files, langs)
assert len(files) == 0
def test_unknown_extension(self) -> None:
from collections import Counter
files: set[str] = set()
langs = Counter[str]()
track_file_changes("Write", {"file_path": "/app/data.xyz"}, files, langs)
assert "/app/data.xyz" in files
assert len(langs) == 0
# ---------------------------------------------------------------------------
# count_diff_lines
# ---------------------------------------------------------------------------
class TestCountDiffLines:
def test_edit_adds_lines(self) -> None:
assert (2, 0) == count_diff_lines("Edit", {"old_string": "a\n", "new_string": "a\nb\nc\n"})
def test_edit_removes_lines(self) -> None:
assert (0, 2) == count_diff_lines("Edit", {"old_string": "a\nb\nc\n", "new_string": "a\n"})
def test_write_counts_all_lines(self) -> None:
assert (3, 0) == count_diff_lines("Write", {"content": "a\nb\nc"})
def test_other_tools_zero(self) -> None:
assert (0, 0) == count_diff_lines("Read", {})
# ---------------------------------------------------------------------------
# detect_codeflash
# ---------------------------------------------------------------------------
class TestDetectCodeflash:
def test_returns_none_when_no_signals(self) -> None:
assert detect_codeflash(set(), set(), set(), 0) is None
def test_detects_from_agents(self) -> None:
cf = detect_codeflash({"codeflash-python", "codeflash-deep"}, set(), set(), 0)
assert cf is not None
assert cf.is_codeflash
assert cf.language == "python"
assert cf.optimization_domain == "deep"
assert "codeflash-deep" in cf.agents_used
assert "codeflash-python" in cf.agents_used
def test_detects_from_skills(self) -> None:
cf = detect_codeflash(set(), {"codeflash-optimize"}, set(), 0)
assert cf is not None
assert cf.is_codeflash
assert "codeflash-optimize" in cf.skills_invoked
def test_detects_from_commands(self) -> None:
cf = detect_codeflash(set(), set(), {"codex-review"}, 0)
assert cf is not None
assert "codex-review" in cf.commands_invoked
def test_tracks_teams(self) -> None:
cf = detect_codeflash({"codeflash"}, set(), set(), 3)
assert cf is not None
assert cf.teams_created == 3
def test_detects_researcher(self) -> None:
cf = detect_codeflash({"codeflash-researcher"}, set(), set(), 0)
assert cf is not None
assert cf.has_researcher
def test_detects_reviewer(self) -> None:
cf = detect_codeflash({"codeflash-review"}, set(), set(), 0)
assert cf is not None
assert cf.has_reviewer
def test_detects_ci_handler(self) -> None:
cf = detect_codeflash({"codeflash-ci"}, set(), set(), 0)
assert cf is not None
assert cf.has_ci_handler
def test_detects_pr_prep(self) -> None:
cf = detect_codeflash({"codeflash-pr-prep"}, set(), set(), 0)
assert cf is not None
assert cf.has_pr_prep
def test_infers_javascript_from_prefix(self) -> None:
cf = detect_codeflash({"codeflash-js-cpu"}, set(), set(), 0)
assert cf is not None
assert cf.language == "javascript"
assert cf.optimization_domain == "cpu"
def test_infers_java_from_prefix(self) -> None:
cf = detect_codeflash({"codeflash-java-memory"}, set(), set(), 0)
assert cf is not None
assert cf.language == "java"
assert cf.optimization_domain == "memory"
def test_memory_domain(self) -> None:
cf = detect_codeflash({"codeflash-memory"}, set(), set(), 0)
assert cf is not None
assert cf.optimization_domain == "memory"
def test_async_domain(self) -> None:
cf = detect_codeflash({"codeflash-async"}, set(), set(), 0)
assert cf is not None
assert cf.optimization_domain == "async"
def test_structure_domain(self) -> None:
cf = detect_codeflash({"codeflash-structure"}, set(), set(), 0)
assert cf is not None
assert cf.optimization_domain == "structure"
def test_bundle_domain(self) -> None:
cf = detect_codeflash({"codeflash-js-bundle"}, set(), set(), 0)
assert cf is not None
assert cf.optimization_domain == "bundle"
# ---------------------------------------------------------------------------
# _infer_language / _infer_domain
# ---------------------------------------------------------------------------
class TestInferLanguage:
def test_python_from_marker(self) -> None:
assert "python" == infer_language({"codeflash-python"})
def test_javascript_from_marker(self) -> None:
assert "javascript" == infer_language({"codeflash-javascript"})
def test_javascript_from_js_prefix(self) -> None:
assert "javascript" == infer_language({"codeflash-js-deep"})
def test_java_from_marker(self) -> None:
assert "java" == infer_language({"codeflash-java"})
def test_java_from_prefix(self) -> None:
assert "java" == infer_language({"codeflash-java-cpu"})
def test_none_for_generic_agent(self) -> None:
assert infer_language({"codeflash"}) is None
def test_none_for_empty(self) -> None:
assert infer_language(set()) is None
class TestInferDomain:
def test_cpu(self) -> None:
assert "cpu" == infer_domain({"codeflash-cpu"})
def test_memory(self) -> None:
assert "memory" == infer_domain({"codeflash-memory"})
def test_deep(self) -> None:
assert "deep" == infer_domain({"codeflash-deep"})
def test_async(self) -> None:
assert "async" == infer_domain({"codeflash-async"})
def test_structure(self) -> None:
assert "structure" == infer_domain({"codeflash-structure"})
def test_bundle(self) -> None:
assert "bundle" == infer_domain({"codeflash-js-bundle"})
def test_none_for_router_only(self) -> None:
assert infer_domain({"codeflash-python"}) is None
def test_none_for_empty(self) -> None:
assert infer_domain(set()) is None
# ---------------------------------------------------------------------------
# extract_meta codeflash integration
# ---------------------------------------------------------------------------
class TestExtractMetaCodeflash:
def test_non_codeflash_session_has_none(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{"type": "user", "timestamp": _ts(0), "message": {"content": "hello"}},
{
"type": "assistant",
"timestamp": _ts(1),
"message": {
"content": [{"type": "text", "text": "hi"}],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.codeflash is None
def test_detects_codeflash_agent_spawn(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{
"type": "tool_use",
"id": "t1",
"name": "Agent",
"input": {"name": "codeflash-python", "prompt": "optimize"},
}
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.codeflash is not None
assert meta.codeflash.is_codeflash
assert meta.codeflash.language == "python"
assert "codeflash-python" in meta.codeflash.agents_used
def test_detects_codeflash_skill(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{
"type": "tool_use",
"id": "t1",
"name": "Skill",
"input": {"skill": "codeflash-optimize"},
}
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.codeflash is not None
assert "codeflash-optimize" in meta.codeflash.skills_invoked
def test_detects_team_creates(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{"type": "tool_use", "id": "t1", "name": "TeamCreate", "input": {}},
{
"type": "tool_use",
"id": "t2",
"name": "Agent",
"input": {"name": "codeflash-deep", "prompt": "go"},
},
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
assert meta.codeflash is not None
assert meta.codeflash.teams_created == 1
def test_detects_multiple_agents(self, tmp_path: Path) -> None:
p = tmp_path / "proj" / "sess.jsonl"
p.parent.mkdir()
_write_jsonl(
p,
[
{
"type": "assistant",
"timestamp": _ts(0),
"message": {
"content": [
{
"type": "tool_use",
"id": "t1",
"name": "Agent",
"input": {"name": "codeflash-python", "prompt": "start"},
},
{
"type": "tool_use",
"id": "t2",
"name": "Agent",
"input": {"name": "codeflash-deep", "prompt": "optimize"},
},
{
"type": "tool_use",
"id": "t3",
"name": "Agent",
"input": {"name": "codeflash-researcher", "prompt": "research"},
},
{
"type": "tool_use",
"id": "t4",
"name": "Agent",
"input": {"name": "codeflash-review", "prompt": "review"},
},
],
"usage": {},
},
},
],
)
meta = extract_meta(p)
assert meta is not None
cf = meta.codeflash
assert cf is not None
assert cf.language == "python"
assert cf.optimization_domain == "deep"
assert cf.has_researcher
assert cf.has_reviewer
assert len(cf.agents_used) == 4