codeflash-agent/packages/blackbox/tests/test_formatting.py

from __future__ import annotations

import json
from typing import Any

from blackbox.formatting import (
    AuditFormatter,
    DigestFormatter,
    MetaFormatter,
    ProjectFormatter,
    RecommendationFormatter,
)
from blackbox.models import (
    ProjectStats,
    Recommendation,
    SessionAudit,
    SessionDigest,
    WeekStats,
)
from tests.conftest import make_meta

# ---------------------------------------------------------------------------
# MetaFormatter
# ---------------------------------------------------------------------------


class TestMetaFormatter:
    def test_basic(self) -> None:
        text = MetaFormatter(make_meta(input_tokens=5000, output_tokens=2000, tool_errors=2)).summary()
        assert "abcd1234" in text
        assert "60min" in text
        assert "10 user / 12 assistant" in text
        assert "25 calls (2 errors)" in text
        assert "5,000 in / 2,000 out" in text

    def test_with_git(self) -> None:
        assert "5 commits on main" in MetaFormatter(make_meta(git_commits=5, git_branch="main")).summary()

    def test_git_without_branch(self) -> None:
        assert "unknown" in MetaFormatter(make_meta(git_commits=1, git_branch=None)).summary()

    def test_with_files(self) -> None:
        text = MetaFormatter(make_meta(files_modified=3, lines_added=100, lines_removed=20)).summary()
        assert "3 modified" in text
        assert "+100/-20" in text

    def test_without_files(self) -> None:
        assert "modified" not in MetaFormatter(make_meta(files_modified=0)).summary()

    def test_with_compactions(self) -> None:
        assert "Compactions: 3" in MetaFormatter(make_meta(compactions=3)).summary()

    def test_without_compactions(self) -> None:
        assert "Compactions" not in MetaFormatter(make_meta(compactions=0)).summary()

    def test_with_interruptions(self) -> None:
        assert "Interruptions: 2" in MetaFormatter(make_meta(user_interruptions=2)).summary()

    def test_without_interruptions(self) -> None:
        assert "Interruptions" not in MetaFormatter(make_meta(user_interruptions=0)).summary()

    def test_top_tools_capped_at_5(self) -> None:
        meta = make_meta(tool_counts={"Read": 20, "Edit": 15, "Bash": 10, "Write": 5, "Grep": 3, "X": 1})
        text = MetaFormatter(meta).summary()
        assert "Read=20" in text
        assert "X=1" not in text

    def test_no_top_tools_when_empty(self) -> None:
        assert "Top tools" not in MetaFormatter(make_meta(tool_counts={})).summary()

    def test_thinking_blocks_shown_when_nonzero(self) -> None:
        assert "Thinking blocks: 5" in MetaFormatter(make_meta(thinking_blocks=5)).summary()

    def test_thinking_blocks_hidden_when_zero(self) -> None:
        assert "Thinking blocks" not in MetaFormatter(make_meta(thinking_blocks=0)).summary()

    def test_web_shown_when_nonzero(self) -> None:
        text = MetaFormatter(make_meta(web_searches=3, web_fetches=1)).summary()
        assert "Web: 3 searches / 1 fetches" in text

    def test_web_hidden_when_zero(self) -> None:
        assert "Web:" not in MetaFormatter(make_meta(web_searches=0, web_fetches=0)).summary()

    def test_permission_mode_shown_when_set(self) -> None:
        text = MetaFormatter(make_meta(permission_mode="bypassPermissions")).summary()
        assert "Permission mode: bypassPermissions" in text

    def test_permission_mode_hidden_when_none(self) -> None:
        assert "Permission mode" not in MetaFormatter(make_meta(permission_mode=None)).summary()


# ---------------------------------------------------------------------------
# AuditFormatter
# ---------------------------------------------------------------------------


class TestAuditFormatter:
    def test_basic(self) -> None:
        a = SessionAudit(
            session_id="abcd1234-5678",
            outcome="success",
            satisfaction="positive",
            session_type="debugging",
        )
        text = AuditFormatter(a).summary()
        assert "abcd1234" in text
        assert "Outcome: success" in text
        assert "Satisfaction: positive" in text
        assert "Type: debugging" in text

    def test_with_goals(self) -> None:
        a = SessionAudit(session_id="x", goal_categories={"bugfix": 5, "refactor": 3})
        text = AuditFormatter(a).summary()
        assert "Goals:" in text
        assert "bugfix(5)" in text

    def test_without_goals(self) -> None:
        assert "Goals" not in AuditFormatter(SessionAudit(session_id="x", goal_categories={})).summary()

    def test_with_friction(self) -> None:
        a = SessionAudit(session_id="x", friction_counts={"permission_denied": 4})
        assert "permission_denied(4)" in AuditFormatter(a).summary()

    def test_without_friction(self) -> None:
        assert "Friction" not in AuditFormatter(SessionAudit(session_id="x")).summary()

    def test_with_instructions(self) -> None:
        a = SessionAudit(session_id="x", user_instructions=("use pytest", "no comments"))
        assert "Instructions: 2 extracted" in AuditFormatter(a).summary()

    def test_without_instructions(self) -> None:
        assert "Instructions" not in AuditFormatter(SessionAudit(session_id="x")).summary()

    def test_summary_truncated_at_120(self) -> None:
        a = SessionAudit(session_id="x", summary="x" * 200)
        text = AuditFormatter(a).summary()
        summary_line = next(line for line in text.split("\n") if "Summary" in line)
        assert len(summary_line.split("Summary: ")[1]) == 120


# ---------------------------------------------------------------------------
# RecommendationFormatter
# ---------------------------------------------------------------------------


class TestRecommendationFormatter:
    def test_basic(self) -> None:
        r = Recommendation(suggestion="do X", evidence="50% failure", frequency=0.5, source_sessions=5)
        text = RecommendationFormatter(r).summary()
        assert "do X" in text
        assert "50% failure" in text


# ---------------------------------------------------------------------------
# ProjectFormatter
# ---------------------------------------------------------------------------


class TestProjectFormatter:
    def make(self, **kw: Any) -> ProjectStats:
        defaults: dict[str, Any] = {
            "project_path": "/proj/myapp",
            "project_name": "myapp",
            "session_count": 10,
            "success_rate": 0.9,
            "avg_tool_errors": 2.5,
            "avg_duration_s": 600.0,
            "top_error_categories": (),
            "top_friction": (),
        }
        defaults.update(kw)
        return ProjectStats(**defaults)

    def test_basic(self) -> None:
        text = ProjectFormatter(self.make()).summary()
        assert "myapp: 10 sessions" in text
        assert "90% success" in text

    def test_outlier_marker(self) -> None:
        assert "[!]" in ProjectFormatter(self.make(is_outlier=True)).summary()

    def test_error_categories_shown(self) -> None:
        p = self.make(top_error_categories=(("edit_failed", 8), ("command_failed", 3)))
        assert "Errors: edit_failed(8)" in ProjectFormatter(p).summary()

    def test_friction_shown(self) -> None:
        p = self.make(top_friction=(("user_rejected", 4),))
        assert "Friction: user_rejected(4)" in ProjectFormatter(p).summary()

    def test_no_sub_lines_when_clean(self) -> None:
        text = ProjectFormatter(self.make()).summary()
        assert len(text.strip().split("\n")) == 1


# ---------------------------------------------------------------------------
# DigestFormatter
# ---------------------------------------------------------------------------


class TestDigestFormatter:
    def make(self, **kw: Any) -> SessionDigest:
        defaults: dict[str, Any] = {"session_count": 10, "date_range": (100.0, 500.0), "success_rate": 0.8}
        defaults.update(kw)
        return SessionDigest(**defaults)

    def test_includes_count(self) -> None:
        assert "42 sessions" in DigestFormatter(self.make(session_count=42)).summary()

    def test_success_rate(self) -> None:
        assert "80% success rate" in DigestFormatter(self.make(success_rate=0.8)).summary()

    def test_outcome_distribution(self) -> None:
        digest = self.make(
            session_count=10,
            outcome_distribution={"fully_achieved": 7, "unclear": 3},
        )
        text = DigestFormatter(digest).summary()
        assert "fully_achieved: 7 (70%)" in text

    def test_no_trends_without_weeks(self) -> None:
        assert "Trends" not in DigestFormatter(self.make()).summary()

    def test_trends_with_weeks(self) -> None:
        w = WeekStats(
            week="2026-W17", session_count=5, success_rate=0.7, avg_errors_per_session=1.0, avg_duration_s=600.0
        )
        digest = self.make(weeks=(w,), rolling_success_rate=0.7)
        text = DigestFormatter(digest).summary()
        assert "Trends" in text
        assert "2026-W17" in text

    def test_no_projects_without_data(self) -> None:
        assert "Projects" not in DigestFormatter(self.make()).summary()

    def test_no_recommendations_without_data(self) -> None:
        assert "Recommendations" not in DigestFormatter(self.make()).summary()

    def test_with_recommendations(self) -> None:
        r = Recommendation(suggestion="Fix the thing", evidence="50% failure", frequency=0.5, source_sessions=10)
        text = DigestFormatter(self.make(recommendations=(r,))).summary()
        assert "Recommendations" in text
        assert "1. Fix the thing" in text

    def test_satisfaction_distribution(self) -> None:
        digest = self.make(
            session_count=10,
            satisfaction_distribution={"happy": 6, "neutral": 4},
        )
        text = DigestFormatter(digest).summary()
        assert "Satisfaction:" in text
        assert "happy: 6" in text

    def test_top_friction(self) -> None:
        digest = self.make(top_friction=(("tool_failed", 12), ("blocked", 3)))
        text = DigestFormatter(digest).summary()
        assert "Top friction:" in text
        assert "tool_failed: 12" in text

    def test_sparkline_with_two_weeks(self) -> None:
        w1 = WeekStats(
            week="2026-W16", session_count=3, success_rate=0.5, avg_errors_per_session=2.0, avg_duration_s=600.0
        )
        w2 = WeekStats(
            week="2026-W17", session_count=4, success_rate=0.9, avg_errors_per_session=0.5, avg_duration_s=400.0
        )
        text = DigestFormatter(self.make(weeks=(w1, w2), rolling_success_rate=0.7)).summary()
        assert "Success: [" in text
        assert "Errors: [" in text

    def test_error_category_deltas(self) -> None:
        w = WeekStats(
            week="2026-W17", session_count=3, success_rate=0.7, avg_errors_per_session=1.0, avg_duration_s=600.0
        )
        digest = self.make(
            weeks=(w,),
            error_category_deltas=(("command_failed", 0.5, 4.0, 6.0),),
        )
        text = DigestFormatter(digest).summary()
        assert "Error category trends:" in text
        assert "command_failed" in text

    def test_with_projects(self) -> None:
        p = ProjectStats(
            project_path="/proj/myapp",
            project_name="myapp",
            session_count=5,
            success_rate=0.8,
            avg_tool_errors=1.0,
            avg_duration_s=300.0,
            top_error_categories=(),
            top_friction=(),
        )
        text = DigestFormatter(self.make(projects=(p,))).summary()
        assert "Projects (1)" in text
        assert "myapp" in text


class TestDigestToJson:
    def make(self, **kw: Any) -> SessionDigest:
        defaults: dict[str, Any] = {"session_count": 10, "date_range": (100.0, 500.0), "success_rate": 0.8}
        defaults.update(kw)
        return SessionDigest(**defaults)

    def test_valid_json(self) -> None:
        j = DigestFormatter(self.make(session_count=5)).to_json()
        parsed = json.loads(j)
        assert parsed["session_count"] == 5

    def test_with_nested_weeks(self) -> None:
        w = WeekStats(
            week="2026-W17", session_count=3, success_rate=0.7, avg_errors_per_session=1.0, avg_duration_s=600.0
        )
        j = DigestFormatter(self.make(weeks=(w,))).to_json()
        parsed = json.loads(j)
        assert len(parsed["weeks"]) == 1
        assert parsed["weeks"][0]["week"] == "2026-W17"

    def test_with_nested_projects(self) -> None:
        p = ProjectStats(
            project_path="/p",
            project_name="p",
            session_count=5,
            success_rate=0.8,
            avg_tool_errors=1.0,
            avg_duration_s=300.0,
            top_error_categories=(),
            top_friction=(),
            is_outlier=False,
        )
        j = DigestFormatter(self.make(projects=(p,))).to_json()
        parsed = json.loads(j)
        assert len(parsed["projects"]) == 1
        assert parsed["projects"][0]["project_name"] == "p"