mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
* feat(blackbox): add package with models, CLI, and HTMX dashboard * test(blackbox): add comprehensive test coverage for dashboard * feat(blackbox): cache session scanning via watcher invalidation * docs(blackbox): add README and use fastapi[standard] for dev server * refactor(blackbox): extract presentation logic into formatter classes * refactor(blackbox): extract classify_error helpers * feat(blackbox): wire analytics into session detail view Show token usage, tool breakdowns, and session stats in a collapsible panel when viewing a session. * feat(blackbox): add codeflash plugin detection Detect codeflash agent names, skills, and commands in transcripts. Surface language, optimization domain, and capability badges in the analytics panel. * refactor(blackbox): remove underscore prefixes from internal functions * chore: add ty python-version to root pyproject.toml * chore(blackbox): fix lint errors in test files * style(blackbox): apply ruff formatting to analytics * feat(blackbox): add Playwright E2E tests for dashboard Refactor app.py to expose create_app() factory accepting a projects_dir override, enabling tests to run against fixture data instead of the real ~/.claude/projects/ directory. Routes now read projects_dir from app.state instead of the module-level constant. Add 26 Playwright tests across 5 files covering dashboard loading, session list, session detail with filters and analytics, sidebar collapse/localStorage persistence, and SSE log streaming. All tests pass on chromium, firefox, and webkit (78 total). CI gets a new e2e-blackbox job with a browser matrix strategy running all three engines in parallel, conditional on blackbox path changes, with trace upload on failure. * fix(ci): sync only blackbox package in e2e job * fix(ci): exclude e2e tests from unit test job The test job doesn't install Playwright browsers, so e2e tests error when pytest collects them. Ignore tests/e2e/ directories in the test job — those are handled by the dedicated e2e-blackbox job.
188 lines
5.6 KiB
Python
188 lines
5.6 KiB
Python
"""Fixtures for Playwright end-to-end tests."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import socket
|
|
import threading
|
|
import time
|
|
from collections.abc import Iterator
|
|
from typing import TYPE_CHECKING
|
|
|
|
import pytest
|
|
import uvicorn
|
|
|
|
if TYPE_CHECKING:
|
|
from pathlib import Path
|
|
|
|
from playwright.sync_api import Page
|
|
|
|
pytestmark = pytest.mark.e2e
|
|
|
|
SESSION_A_ID = "sess-aaaa1111-2222-3333-4444-555566667777"
|
|
SESSION_B_ID = "sess-bbbb1111-2222-3333-4444-555566667777"
|
|
PROJECT_A_DIR = "-Users-alice-Desktop-work-myapp"
|
|
PROJECT_B_DIR = "-Users-bob-code-webapp"
|
|
|
|
|
|
def _jsonl(*entries: dict) -> str:
|
|
"""Serialize entries as newline-delimited JSON."""
|
|
return "\n".join(json.dumps(e) for e in entries) + "\n"
|
|
|
|
|
|
RICH_SESSION = _jsonl(
|
|
{
|
|
"type": "user",
|
|
"timestamp": "2025-03-15T10:00:00Z",
|
|
"message": {"content": "Help me optimize this function for better performance"},
|
|
"cwd": "/Users/alice/Desktop/work/myapp",
|
|
},
|
|
{
|
|
"type": "assistant",
|
|
"timestamp": "2025-03-15T10:00:05Z",
|
|
"message": {
|
|
"content": [{"type": "text", "text": "Let me look at the code and find optimization opportunities."}],
|
|
"usage": {"input_tokens": 500, "output_tokens": 120, "cache_read_input_tokens": 200},
|
|
},
|
|
},
|
|
{
|
|
"type": "assistant",
|
|
"timestamp": "2025-03-15T10:00:08Z",
|
|
"message": {
|
|
"content": [
|
|
{
|
|
"type": "tool_use",
|
|
"id": "tu_read_1",
|
|
"name": "Read",
|
|
"input": {"file_path": "/Users/alice/Desktop/work/myapp/main.py"},
|
|
}
|
|
],
|
|
"usage": {"input_tokens": 100, "output_tokens": 30},
|
|
},
|
|
},
|
|
{
|
|
"type": "user",
|
|
"timestamp": "2025-03-15T10:00:09Z",
|
|
"message": {
|
|
"content": [
|
|
{"type": "tool_result", "tool_use_id": "tu_read_1", "content": "def sort_items(items):\n pass"}
|
|
]
|
|
},
|
|
},
|
|
{
|
|
"type": "assistant",
|
|
"timestamp": "2025-03-15T10:00:15Z",
|
|
"message": {
|
|
"content": [
|
|
{
|
|
"type": "tool_use",
|
|
"id": "tu_bash_1",
|
|
"name": "Bash",
|
|
"input": {"command": "uv run pytest tests/ -v"},
|
|
}
|
|
],
|
|
"usage": {"input_tokens": 200, "output_tokens": 50},
|
|
},
|
|
},
|
|
{
|
|
"type": "user",
|
|
"timestamp": "2025-03-15T10:00:20Z",
|
|
"message": {
|
|
"content": [{"type": "tool_result", "tool_use_id": "tu_bash_1", "content": "FAILED", "is_error": True}]
|
|
},
|
|
"toolUseResult": {"stderr": "AssertionError: expected 42 got 0"},
|
|
},
|
|
{
|
|
"type": "assistant",
|
|
"timestamp": "2025-03-15T10:00:25Z",
|
|
"message": {
|
|
"content": [
|
|
{"type": "thinking", "thinking": "I need to fix the test."},
|
|
{"type": "text", "text": "The test failed. Let me fix the implementation."},
|
|
],
|
|
"usage": {"input_tokens": 300, "output_tokens": 80},
|
|
},
|
|
},
|
|
{
|
|
"type": "user",
|
|
"timestamp": "2025-03-15T10:01:00Z",
|
|
"message": {"content": "That looks great, thanks!"},
|
|
},
|
|
)
|
|
|
|
MINIMAL_SESSION = _jsonl(
|
|
{
|
|
"type": "user",
|
|
"timestamp": "2025-03-15T09:00:00Z",
|
|
"message": {"content": "What is this project about?"},
|
|
"cwd": "/Users/bob/code/webapp",
|
|
},
|
|
)
|
|
|
|
|
|
def _get_free_port() -> int:
|
|
"""Find a free TCP port on localhost."""
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
s.bind(("127.0.0.1", 0))
|
|
return s.getsockname()[1]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def projects_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
|
|
"""Create a temp directory tree with fixture session transcripts."""
|
|
root = tmp_path_factory.mktemp("projects")
|
|
|
|
project_a = root / PROJECT_A_DIR
|
|
project_a.mkdir()
|
|
(project_a / f"{SESSION_A_ID}.jsonl").write_text(RICH_SESSION)
|
|
|
|
project_b = root / PROJECT_B_DIR
|
|
project_b.mkdir()
|
|
(project_b / f"{SESSION_B_ID}.jsonl").write_text(MINIMAL_SESSION)
|
|
|
|
return root
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def live_server(projects_dir: Path) -> Iterator[str]:
|
|
"""Start the dashboard on a random free port and yield the base URL."""
|
|
from blackbox.dashboard.app import create_app
|
|
|
|
port = _get_free_port()
|
|
application = create_app(projects_dir=projects_dir)
|
|
|
|
config = uvicorn.Config(application, host="127.0.0.1", port=port, log_level="warning")
|
|
server = uvicorn.Server(config)
|
|
thread = threading.Thread(target=server.run, daemon=True)
|
|
thread.start()
|
|
|
|
deadline = time.monotonic() + 10
|
|
while time.monotonic() < deadline:
|
|
try:
|
|
with socket.create_connection(("127.0.0.1", port), timeout=0.5):
|
|
break
|
|
except OSError:
|
|
time.sleep(0.1)
|
|
else:
|
|
msg = "Live server did not start in time"
|
|
raise RuntimeError(msg)
|
|
|
|
yield f"http://127.0.0.1:{port}"
|
|
|
|
server.should_exit = True
|
|
thread.join(timeout=5.0)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def base_url(live_server: str) -> str:
|
|
"""Provide the base URL for pytest-playwright's page.goto()."""
|
|
return live_server
|
|
|
|
|
|
@pytest.fixture
|
|
def dashboard(page: Page, base_url: str) -> Page:
|
|
"""Navigate to the dashboard index and wait for session list to load."""
|
|
page.goto(base_url)
|
|
page.locator("#session-list-container").wait_for(state="attached")
|
|
page.locator("#session-list-container > div").first.wait_for(state="visible", timeout=10_000)
|
|
return page
|