codeflash-agent/scripts/claude_insights.py

# /// script
# requires-python = ">=3.11"
# ///
"""Portable Python implementation of Claude Code /insights.

This script focuses on the core report pipeline:

1. Scan Claude Code transcript files under ``~/.claude/projects``.
2. Reconstruct leaf conversation chains from append-only JSONL transcripts.
3. Extract deterministic usage metrics from tool calls and user messages.
4. Generate heuristic facets and higher-level insights.
5. Write an HTML report plus a JSON export.

Differences from the TypeScript implementation:

- Uses the Python standard library only.
- Narrative sections are heuristic rather than model-generated.
- Does not implement Anthropic-internal homespace collection or S3 upload.
"""

from __future__ import annotations

import argparse
import difflib
import html
import json
import math
import os
import re
import statistics
import subprocess
from collections import Counter, defaultdict
from collections.abc import Iterable
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from functools import lru_cache
from pathlib import Path
from typing import Any
from uuid import UUID

EXTENSION_TO_LANGUAGE: dict[str, str] = {
    ".ts": "TypeScript",
    ".tsx": "TypeScript",
    ".js": "JavaScript",
    ".jsx": "JavaScript",
    ".py": "Python",
    ".rb": "Ruby",
    ".go": "Go",
    ".rs": "Rust",
    ".java": "Java",
    ".md": "Markdown",
    ".json": "JSON",
    ".yaml": "YAML",
    ".yml": "YAML",
    ".sh": "Shell",
    ".css": "CSS",
    ".html": "HTML",
}


LABEL_MAP: dict[str, str] = {
    "debug_investigate": "Debug/Investigate",
    "implement_feature": "Implement Feature",
    "fix_bug": "Fix Bug",
    "write_script_tool": "Write Script/Tool",
    "refactor_code": "Refactor Code",
    "configure_system": "Configure System",
    "create_pr_commit": "Create PR/Commit",
    "analyze_data": "Analyze Data",
    "understand_codebase": "Understand Codebase",
    "write_tests": "Write Tests",
    "write_docs": "Write Docs",
    "deploy_infra": "Deploy/Infra",
    "warmup_minimal": "Cache Warmup",
    "fast_accurate_search": "Fast/Accurate Search",
    "correct_code_edits": "Correct Code Edits",
    "good_explanations": "Good Explanations",
    "proactive_help": "Proactive Help",
    "multi_file_changes": "Multi-file Changes",
    "handled_complexity": "Multi-file Changes",
    "good_debugging": "Good Debugging",
    "misunderstood_request": "Misunderstood Request",
    "wrong_approach": "Wrong Approach",
    "buggy_code": "Buggy Code",
    "user_rejected_action": "User Rejected Action",
    "claude_got_blocked": "Claude Got Blocked",
    "user_stopped_early": "User Stopped Early",
    "wrong_file_or_location": "Wrong File/Location",
    "excessive_changes": "Excessive Changes",
    "slow_or_verbose": "Slow/Verbose",
    "tool_failed": "Tool Failed",
    "user_unclear": "User Unclear",
    "external_issue": "External Issue",
    "frustrated": "Frustrated",
    "dissatisfied": "Dissatisfied",
    "likely_satisfied": "Likely Satisfied",
    "satisfied": "Satisfied",
    "happy": "Happy",
    "unsure": "Unsure",
    "neutral": "Neutral",
    "delighted": "Delighted",
    "single_task": "Single Task",
    "multi_task": "Multi Task",
    "iterative_refinement": "Iterative Refinement",
    "exploration": "Exploration",
    "quick_question": "Quick Question",
    "fully_achieved": "Fully Achieved",
    "mostly_achieved": "Mostly Achieved",
    "partially_achieved": "Partially Achieved",
    "not_achieved": "Not Achieved",
    "unclear_from_transcript": "Unclear",
    "unhelpful": "Unhelpful",
    "slightly_helpful": "Slightly Helpful",
    "moderately_helpful": "Moderately Helpful",
    "very_helpful": "Very Helpful",
    "essential": "Essential",
}


SATISFACTION_ORDER = [
    "frustrated",
    "dissatisfied",
    "likely_satisfied",
    "satisfied",
    "happy",
    "unsure",
]


OUTCOME_ORDER = [
    "not_achieved",
    "partially_achieved",
    "mostly_achieved",
    "fully_achieved",
    "unclear_from_transcript",
]


AGENT_TOOL_NAMES = {"Agent", "Task"}


GOAL_PATTERNS: dict[str, list[re.Pattern[str]]] = {
    "debug_investigate": [
        re.compile(r"\bdebug\b", re.IGNORECASE),
        re.compile(r"\binvestigat", re.IGNORECASE),
        re.compile(r"\btrace\b", re.IGNORECASE),
        re.compile(r"\bwhy\b", re.IGNORECASE),
        re.compile(r"\berror\b", re.IGNORECASE),
        re.compile(r"\bissue\b", re.IGNORECASE),
    ],
    "implement_feature": [
        re.compile(r"\bimplement\b", re.IGNORECASE),
        re.compile(r"\bbuild\b", re.IGNORECASE),
        re.compile(r"\bfeature\b", re.IGNORECASE),
        re.compile(r"\badd\b", re.IGNORECASE),
        re.compile(r"\bcreate\b", re.IGNORECASE),
    ],
    "fix_bug": [
        re.compile(r"\bfix\b", re.IGNORECASE),
        re.compile(r"\bbug\b", re.IGNORECASE),
        re.compile(r"\bbroken\b", re.IGNORECASE),
        re.compile(r"\bfailing\b", re.IGNORECASE),
    ],
    "write_script_tool": [
        re.compile(r"\bscript\b", re.IGNORECASE),
        re.compile(r"\bcli\b", re.IGNORECASE),
        re.compile(r"\btool\b", re.IGNORECASE),
        re.compile(r"\bautomation\b", re.IGNORECASE),
    ],
    "refactor_code": [
        re.compile(r"\brefactor\b", re.IGNORECASE),
        re.compile(r"\bcleanup\b", re.IGNORECASE),
        re.compile(r"\breorgan", re.IGNORECASE),
        re.compile(r"\bsimplif", re.IGNORECASE),
    ],
    "configure_system": [
        re.compile(r"\bconfigure\b", re.IGNORECASE),
        re.compile(r"\bsetup\b", re.IGNORECASE),
        re.compile(r"\binstall\b", re.IGNORECASE),
        re.compile(r"\bconfig\b", re.IGNORECASE),
        re.compile(r"\benv\b", re.IGNORECASE),
        re.compile(r"\bdocker\b", re.IGNORECASE),
        re.compile(r"\bci\b", re.IGNORECASE),
    ],
    "create_pr_commit": [
        re.compile(r"\bcommit\b", re.IGNORECASE),
        re.compile(r"\bpull request\b", re.IGNORECASE),
        re.compile(r"\bpr\b", re.IGNORECASE),
        re.compile(r"\bmerge\b", re.IGNORECASE),
    ],
    "analyze_data": [
        re.compile(r"\banaly[sz]e\b", re.IGNORECASE),
        re.compile(r"\bmetrics\b", re.IGNORECASE),
        re.compile(r"\breport\b", re.IGNORECASE),
        re.compile(r"\bdata\b", re.IGNORECASE),
    ],
    "understand_codebase": [
        re.compile(r"\bunderstand\b", re.IGNORECASE),
        re.compile(r"\bexplain\b", re.IGNORECASE),
        re.compile(r"\bwalk ?through\b", re.IGNORECASE),
        re.compile(r"\bhow does\b", re.IGNORECASE),
        re.compile(r"\bwhere is\b", re.IGNORECASE),
    ],
    "write_tests": [
        re.compile(r"\btests?\b", re.IGNORECASE),
        re.compile(r"\bpytest\b", re.IGNORECASE),
        re.compile(r"\bunit test\b", re.IGNORECASE),
        re.compile(r"\bintegration test\b", re.IGNORECASE),
    ],
    "write_docs": [
        re.compile(r"\breadme\b", re.IGNORECASE),
        re.compile(r"\bdocs?\b", re.IGNORECASE),
        re.compile(r"\bdocument", re.IGNORECASE),
    ],
    "deploy_infra": [
        re.compile(r"\bdeploy\b", re.IGNORECASE),
        re.compile(r"\binfra\b", re.IGNORECASE),
        re.compile(r"\bterraform\b", re.IGNORECASE),
        re.compile(r"\bkubernetes\b", re.IGNORECASE),
        re.compile(r"\bk8s\b", re.IGNORECASE),
    ],
}


REPEATED_INSTRUCTION_PATTERNS = [
    re.compile(r"\balways\b", re.IGNORECASE),
    re.compile(r"\bnever\b", re.IGNORECASE),
    re.compile(r"\bdon't\b", re.IGNORECASE),
    re.compile(r"\bdo not\b", re.IGNORECASE),
    re.compile(r"\bplease\b", re.IGNORECASE),
    re.compile(r"\bmake sure\b", re.IGNORECASE),
    re.compile(r"\buse\b", re.IGNORECASE),
    re.compile(r"\brun\b", re.IGNORECASE),
    re.compile(r"\bavoid\b", re.IGNORECASE),
]


POSITIVE_STRONG_PATTERNS = [
    re.compile(r"\bperfect\b", re.IGNORECASE),
    re.compile(r"\bgreat\b", re.IGNORECASE),
    re.compile(r"\bawesome\b", re.IGNORECASE),
    re.compile(r"\bexcellent\b", re.IGNORECASE),
    re.compile(r"\blove\b", re.IGNORECASE),
    re.compile(r"\bship it\b", re.IGNORECASE),
]


POSITIVE_MILD_PATTERNS = [
    re.compile(r"\bthanks\b", re.IGNORECASE),
    re.compile(r"\bthat works\b", re.IGNORECASE),
    re.compile(r"\bworks\b", re.IGNORECASE),
    re.compile(r"\blooks good\b", re.IGNORECASE),
    re.compile(r"\bsolid\b", re.IGNORECASE),
]


NEGATIVE_STRONG_PATTERNS = [
    re.compile(r"\bbroken\b", re.IGNORECASE),
    re.compile(r"\bfrustrat", re.IGNORECASE),
    re.compile(r"\bgive up\b", re.IGNORECASE),
    re.compile(r"\buseless\b", re.IGNORECASE),
    re.compile(r"\bterrible\b", re.IGNORECASE),
]


NEGATIVE_MILD_PATTERNS = [
    re.compile(r"\bnot right\b", re.IGNORECASE),
    re.compile(r"\bwrong\b", re.IGNORECASE),
    re.compile(r"\btry again\b", re.IGNORECASE),
    re.compile(r"\bstill failing\b", re.IGNORECASE),
    re.compile(r"\bdoesn't work\b", re.IGNORECASE),
    re.compile(r"\bdoes not work\b", re.IGNORECASE),
    re.compile(r"\bproblem\b", re.IGNORECASE),
]


CONTINUATION_PATTERNS = [
    re.compile(r"\bok\b", re.IGNORECASE),
    re.compile(r"\bokay\b", re.IGNORECASE),
    re.compile(r"\bnow\b", re.IGNORECASE),
    re.compile(r"\bnext\b", re.IGNORECASE),
    re.compile(r"\balso\b", re.IGNORECASE),
    re.compile(r"\bthen\b", re.IGNORECASE),
]


PROMPT_NOISE_RE = re.compile(r"^\s*<[a-z][^>]*>", re.IGNORECASE)


PROJECT_AREA_DESCRIPTIONS = {
    "implement_feature": "You use Claude Code to add or reshape product functionality, usually with code edits followed by a quick validation loop.",
    "fix_bug": "You bring Claude in when something is visibly failing and need a concrete patch rather than a high-level discussion.",
    "debug_investigate": "You lean on Claude to narrow a failure quickly, map the problem space, and turn symptoms into a working diagnosis.",
    "write_script_tool": "You regularly turn repetitive work into scripts and small command-line tools instead of doing it by hand.",
    "refactor_code": "You use Claude to restructure code without changing the goal, especially when the work spans several related files.",
    "configure_system": "You rely on Claude for setup and environment work where config drift and shell details slow you down.",
    "create_pr_commit": "You do not stop at code changes; you also use Claude to package the work into a shippable commit or PR loop.",
    "analyze_data": "You use Claude as an analyst as much as a coder, especially when the task starts with collecting and summarizing signals.",
    "understand_codebase": "You use Claude to orient inside unfamiliar code before deciding what to change.",
    "write_tests": "You turn Claude toward validation work when you need coverage, reproduction, or a guardrail around a fix.",
    "write_docs": "You use Claude to turn implementation details into docs and maintainable explanations.",
    "deploy_infra": "You ask Claude to help with deployment and infrastructure tasks where small mistakes have outsized consequences.",
}


FEATURE_CATALOG = {
    "MCP Servers": {
        "one_liner": "Connect Claude to external tools, databases, and APIs.",
        "example_code": "claude mcp add github -- npx -y @modelcontextprotocol/server-github",
    },
    "Custom Skills": {
        "one_liner": "Package a repeated workflow behind a reusable slash command.",
        "example_code": "mkdir -p .claude/skills/review && $EDITOR .claude/skills/review/SKILL.md",
    },
    "Hooks": {
        "one_liner": "Run validations or formatting automatically at key lifecycle events.",
        "example_code": '{\n  "hooks": {\n    "Stop": ["pytest -q"]\n  }\n}',
    },
    "Headless Mode": {
        "one_liner": "Run Claude non-interactively from scripts or CI.",
        "example_code": 'claude -p "fix the failing tests and explain the diff" --allowedTools "Read,Edit,Bash"',
    },
    "Task Agents": {
        "one_liner": "Use focused sub-agents for exploration or parallel work.",
        "example_code": "Use an agent to explore the auth flow and another agent to inspect the failing tests.",
    },
}


FRICTION_DESCRIPTIONS = {
    "tool_failed": "Tool execution is breaking momentum. When a shell run or file operation fails, the session shifts from solving the task to recovering the environment.",
    "wrong_approach": "The target is usually clear, but the first implementation path is not always the cheapest one. That leads to avoidable retries.",
    "buggy_code": "Claude is producing code that still needs correction, so you spend time validating and steering instead of moving straight to done.",
    "user_stopped_early": "You are stepping in to redirect or cut off a run before the initial plan lands. That usually means the execution path is drifting too far too quickly.",
    "claude_got_blocked": "Some sessions fail for environmental reasons rather than reasoning quality. That still costs turns and makes the workflow feel brittle.",
    "user_rejected_action": "Claude is proposing actions you do not want to approve, which adds friction even when the task itself is understood.",
    "slow_or_verbose": "The session is spending too much time on explanation or intermediate output relative to the value delivered.",
    "wrong_file_or_location": "The implementation work is landing in the wrong place, which forces extra review and cleanup.",
    "excessive_changes": "The patch is larger than the task needed, increasing review cost and raising the chance of regressions.",
}


OUTCOME_VERBS = {
    "fully_achieved": "fully achieved",
    "mostly_achieved": "mostly achieved",
    "partially_achieved": "partially achieved",
    "not_achieved": "not achieved",
    "unclear_from_transcript": "unclear",
}


@dataclass
class SessionLog:
    date: str
    messages: list[dict[str, Any]]
    full_path: str
    created: datetime
    modified: datetime
    first_prompt: str
    message_count: int
    is_sidechain: bool
    session_id: str
    leaf_uuid: str
    summary: str | None = None
    custom_title: str | None = None
    tag: str | None = None
    agent_name: str | None = None
    agent_color: str | None = None
    agent_setting: str | None = None
    mode: str | None = None
    pr_number: int | None = None
    pr_url: str | None = None
    pr_repository: str | None = None
    git_branch: str | None = None
    project_path: str = ""


@dataclass
class SessionMeta:
    session_id: str
    project_path: str
    start_time: str
    duration_minutes: int
    user_message_count: int
    assistant_message_count: int
    tool_counts: dict[str, int]
    languages: dict[str, int]
    git_commits: int
    git_pushes: int
    input_tokens: int
    output_tokens: int
    first_prompt: str
    summary: str | None
    user_interruptions: int
    user_response_times: list[float]
    tool_errors: int
    tool_error_categories: dict[str, int]
    uses_task_agent: bool
    uses_mcp: bool
    uses_web_search: bool
    uses_web_fetch: bool
    lines_added: int
    lines_removed: int
    files_modified: int
    message_hours: list[int]
    user_message_timestamps: list[str]

    @classmethod
    def from_dict(cls, payload: dict[str, Any]) -> SessionMeta:
        return cls(**payload)


@dataclass
class SessionFacets:
    session_id: str
    underlying_goal: str
    goal_categories: dict[str, int]
    outcome: str
    user_satisfaction_counts: dict[str, int]
    claude_helpfulness: str
    session_type: str
    friction_counts: dict[str, int]
    friction_detail: str
    primary_success: str
    brief_summary: str
    user_instructions_to_claude: list[str] = field(default_factory=list)

    @classmethod
    def from_dict(cls, payload: dict[str, Any]) -> SessionFacets:
        return cls(**payload)


@dataclass
class AggregatedData:
    total_sessions: int
    sessions_with_facets: int
    date_range: dict[str, str]
    total_messages: int = 0
    total_duration_hours: float = 0.0
    total_input_tokens: int = 0
    total_output_tokens: int = 0
    tool_counts: dict[str, int] = field(default_factory=dict)
    languages: dict[str, int] = field(default_factory=dict)
    git_commits: int = 0
    git_pushes: int = 0
    projects: dict[str, int] = field(default_factory=dict)
    goal_categories: dict[str, int] = field(default_factory=dict)
    outcomes: dict[str, int] = field(default_factory=dict)
    satisfaction: dict[str, int] = field(default_factory=dict)
    helpfulness: dict[str, int] = field(default_factory=dict)
    session_types: dict[str, int] = field(default_factory=dict)
    friction: dict[str, int] = field(default_factory=dict)
    success: dict[str, int] = field(default_factory=dict)
    session_summaries: list[dict[str, str]] = field(default_factory=list)
    total_interruptions: int = 0
    total_tool_errors: int = 0
    tool_error_categories: dict[str, int] = field(default_factory=dict)
    user_response_times: list[float] = field(default_factory=list)
    median_response_time: float = 0.0
    avg_response_time: float = 0.0
    sessions_using_task_agent: int = 0
    sessions_using_mcp: int = 0
    sessions_using_web_search: int = 0
    sessions_using_web_fetch: int = 0
    total_lines_added: int = 0
    total_lines_removed: int = 0
    total_files_modified: int = 0
    days_active: int = 0
    messages_per_day: float = 0.0
    message_hours: list[int] = field(default_factory=list)
    multi_clauding: dict[str, int] = field(
        default_factory=lambda: {
            "overlap_events": 0,
            "sessions_involved": 0,
            "user_messages_during": 0,
        }
    )
    total_sessions_scanned: int | None = None


@dataclass(frozen=True)
class RepoIdentity:
    root: str
    common_dir: str | None
    remotes: frozenset[str]
    worktrees: tuple[str, ...] = ()


@dataclass(frozen=True)
class ProjectScope:
    target_prefix: str | None
    path_prefixes: frozenset[str]
    common_dirs: frozenset[str]
    remotes: frozenset[str]


def parse_args() -> argparse.Namespace:
    home = Path(os.path.expanduser("~"))
    default_projects = home / ".claude" / "projects"
    default_cache = home / ".claude" / "usage-data-py"

    parser = argparse.ArgumentParser(
        description="Python implementation of Claude Code /insights."
    )
    parser.add_argument(
        "--projects-dir",
        type=Path,
        default=default_projects,
        help="Directory containing Claude Code project transcript directories.",
    )
    parser.add_argument(
        "--cache-dir",
        type=Path,
        default=default_cache,
        help="Directory for cached session metadata, facets, and reports.",
    )
    parser.add_argument(
        "--project-path-prefix",
        type=str,
        default=None,
        help=(
            "Only include sessions whose transcript project_path matches this path "
            "or one of its descendants."
        ),
    )
    parser.add_argument(
        "--output-html",
        type=Path,
        default=None,
        help="Path for the generated HTML report. Defaults to <cache-dir>/report.html.",
    )
    parser.add_argument(
        "--output-json",
        type=Path,
        default=None,
        help="Path for the JSON export. Defaults to <cache-dir>/report.json.",
    )
    parser.add_argument(
        "--max-sessions-load",
        type=int,
        default=200,
        help="Maximum uncached session files to load on a single run.",
    )
    parser.add_argument(
        "--max-facet-extractions",
        type=int,
        default=200,
        help="Maximum sessions to facet-extract on a single run.",
    )
    return parser.parse_args()


def ensure_dir(path: Path) -> None:
    path.mkdir(parents=True, exist_ok=True)


def normalize_path_for_match(path_text: str) -> str:
    return os.path.normpath(os.path.realpath(os.path.expanduser(path_text)))


def run_git(path: str, *args: str) -> str | None:
    try:
        result = subprocess.run(
            ["git", "-C", path, *args],
            check=False,
            capture_output=True,
            text=True,
            timeout=5,
        )
    except (OSError, subprocess.TimeoutExpired):
        return None
    if result.returncode != 0:
        return None
    return result.stdout.strip()


@lru_cache(maxsize=512)
def get_repo_identity(path_text: str) -> RepoIdentity | None:
    normalized_path = normalize_path_for_match(path_text)
    root = run_git(normalized_path, "rev-parse", "--show-toplevel")
    if not root:
        return None
    normalized_root = normalize_path_for_match(root)

    common_dir = run_git(normalized_root, "rev-parse", "--git-common-dir")
    normalized_common_dir: str | None = None
    if common_dir:
        common_path = Path(common_dir)
        if not common_path.is_absolute():
            common_path = Path(normalized_root) / common_dir
        normalized_common_dir = normalize_path_for_match(str(common_path))

    remotes_output = run_git(
        normalized_root,
        "config",
        "--get-regexp",
        r"^remote\..*\.url$",
    )
    remotes: set[str] = set()
    if remotes_output:
        for line in remotes_output.splitlines():
            parts = line.split(None, 1)
            if len(parts) == 2 and parts[1].strip():
                remotes.add(parts[1].strip())

    worktrees_output = run_git(
        normalized_root, "worktree", "list", "--porcelain"
    )
    worktrees: list[str] = []
    if worktrees_output:
        for line in worktrees_output.splitlines():
            if line.startswith("worktree "):
                worktree_path = line.removeprefix("worktree ").strip()
                if worktree_path:
                    worktrees.append(normalize_path_for_match(worktree_path))

    return RepoIdentity(
        root=normalized_root,
        common_dir=normalized_common_dir,
        remotes=frozenset(remotes),
        worktrees=tuple(dict.fromkeys(worktrees)),
    )


def discover_git_roots(base_path: Path, max_depth: int = 4) -> set[str]:
    if not base_path.exists() or not base_path.is_dir():
        return set()

    normalized_base = normalize_path_for_match(str(base_path))
    discovered: set[str] = set()

    for root, dirs, files in os.walk(normalized_base):
        current_path = Path(root)
        try:
            rel_parts = current_path.relative_to(normalized_base).parts
        except ValueError:
            continue
        depth = len(rel_parts)
        if depth > max_depth:
            dirs[:] = []
            continue

        if ".git" in dirs or ".git" in files:
            discovered.add(normalize_path_for_match(root))

    return discovered


def build_project_scope(prefix: str | None) -> ProjectScope:
    if not prefix:
        return ProjectScope(
            target_prefix=None,
            path_prefixes=frozenset(),
            common_dirs=frozenset(),
            remotes=frozenset(),
        )

    normalized_prefix = normalize_path_for_match(prefix)
    path_prefixes: set[str] = {normalized_prefix}
    common_dirs: set[str] = set()
    remotes: set[str] = set()

    candidate_roots = discover_git_roots(Path(normalized_prefix))
    direct_identity = get_repo_identity(normalized_prefix)
    if direct_identity:
        candidate_roots.add(direct_identity.root)

    for repo_root in candidate_roots:
        identity = get_repo_identity(repo_root)
        if not identity:
            continue
        path_prefixes.add(identity.root)
        path_prefixes.update(identity.worktrees)
        if identity.common_dir:
            common_dirs.add(identity.common_dir)
        remotes.update(identity.remotes)

    return ProjectScope(
        target_prefix=normalized_prefix,
        path_prefixes=frozenset(path_prefixes),
        common_dirs=frozenset(common_dirs),
        remotes=frozenset(remotes),
    )


def path_matches_prefix(project_path: str, prefix: str | None) -> bool:
    if not prefix:
        return True
    if not project_path:
        return False
    normalized_project = normalize_path_for_match(project_path)
    normalized_prefix = normalize_path_for_match(prefix)
    return (
        normalized_project == normalized_prefix
        or normalized_project.startswith(normalized_prefix + os.sep)
    )


def matches_project_scope(project_path: str, scope: ProjectScope) -> bool:
    if scope.target_prefix is None:
        return True
    if not project_path:
        return False

    normalized_project = normalize_path_for_match(project_path)
    for prefix in scope.path_prefixes:
        if normalized_project == prefix or normalized_project.startswith(
            prefix + os.sep
        ):
            return True

    identity = get_repo_identity(normalized_project)
    if not identity:
        return False
    if identity.common_dir and identity.common_dir in scope.common_dirs:
        return True
    return bool(scope.remotes and identity.remotes.intersection(scope.remotes))


def truncate(text: str, length: int) -> str:
    stripped = " ".join(text.split())
    if len(stripped) <= length:
        return stripped
    return stripped[: max(0, length - 1)].rstrip() + "…"


def safe_title(key: str) -> str:
    return LABEL_MAP.get(key, key.replace("_", " ").title())


def validate_uuid(text: str) -> bool:
    try:
        UUID(text)
        return True
    except Exception:
        return False


def parse_iso_timestamp(value: str | None) -> datetime:
    if not value:
        return datetime.fromtimestamp(0, tz=timezone.utc)
    normalized = value
    if normalized.endswith("Z"):
        normalized = normalized[:-1] + "+00:00"
    try:
        parsed = datetime.fromisoformat(normalized)
    except ValueError:
        return datetime.fromtimestamp(0, tz=timezone.utc)
    if parsed.tzinfo is None:
        return parsed.replace(tzinfo=timezone.utc)
    return parsed


def iso_date(value: str) -> str:
    return parse_iso_timestamp(value).date().isoformat()


def extract_text_blocks(content: Any) -> list[str]:
    if isinstance(content, str):
        return [content]
    if not isinstance(content, list):
        return []
    values: list[str] = []
    for block in content:
        if not isinstance(block, dict):
            continue
        if block.get("type") == "text" and isinstance(block.get("text"), str):
            values.append(block["text"])
    return values


def extract_user_message_text(message: dict[str, Any]) -> str:
    content = (message.get("message") or {}).get("content")
    return "\n".join(
        part for part in extract_text_blocks(content) if part
    ).strip()


def has_tool_result_block(message: dict[str, Any]) -> bool:
    content = (message.get("message") or {}).get("content")
    if not isinstance(content, list):
        return False
    return any(
        isinstance(block, dict) and block.get("type") == "tool_result"
        for block in content
    )


def has_visible_user_content(message: dict[str, Any]) -> bool:
    if message.get("type") != "user" or message.get("isMeta"):
        return False
    content = (message.get("message") or {}).get("content")
    if isinstance(content, str):
        return bool(content.strip())
    if not isinstance(content, list):
        return False
    for block in content:
        if not isinstance(block, dict):
            continue
        if block.get("type") in {"text", "image", "document"}:
            return True
    return False


def has_visible_assistant_content(message: dict[str, Any]) -> bool:
    if message.get("type") != "assistant":
        return False
    content = (message.get("message") or {}).get("content")
    if not isinstance(content, list):
        return False
    for block in content:
        if (
            isinstance(block, dict)
            and block.get("type") == "text"
            and isinstance(block.get("text"), str)
            and block["text"].strip()
        ):
            return True
    return False


def count_visible_messages(transcript: list[dict[str, Any]]) -> int:
    count = 0
    for message in transcript:
        if message.get("type") == "user":
            if has_visible_user_content(message):
                count += 1
        elif message.get("type") == "assistant":
            if has_visible_assistant_content(message):
                count += 1
    return count


def first_meaningful_user_text(transcript: list[dict[str, Any]]) -> str | None:
    for message in transcript:
        if message.get("type") != "user" or message.get("isMeta"):
            continue
        if message.get("isCompactSummary"):
            continue
        for text in extract_text_blocks(
            (message.get("message") or {}).get("content")
        ):
            stripped = text.strip()
            if not stripped:
                continue
            if PROMPT_NOISE_RE.match(stripped):
                continue
            if stripped.startswith("[Request interrupted by user"):
                continue
            return stripped
    return None


def extract_first_prompt(transcript: list[dict[str, Any]]) -> str:
    text = first_meaningful_user_text(transcript)
    if not text:
        return "No prompt"
    return truncate(text.replace("\n", " "), 200)


def is_transcript_message(entry: dict[str, Any]) -> bool:
    return entry.get("type") in {"user", "assistant", "attachment", "system"}


def is_legacy_progress_entry(entry: dict[str, Any]) -> bool:
    return (
        entry.get("type") == "progress"
        and isinstance(entry.get("uuid"), str)
        and "parentUuid" in entry
    )


def is_compact_boundary_message(entry: dict[str, Any]) -> bool:
    return (
        entry.get("type") == "system"
        and entry.get("subtype") == "compact_boundary"
    )


def sort_by_timestamp(
    messages: Iterable[dict[str, Any]],
) -> list[dict[str, Any]]:
    return sorted(messages, key=lambda msg: msg.get("timestamp") or "")


def apply_preserved_segment_relinks(
    messages: dict[str, dict[str, Any]],
) -> None:
    last_segment: dict[str, Any] | None = None
    last_segment_boundary_index = -1
    absolute_last_boundary_index = -1
    entry_index: dict[str, int] = {}

    for index, entry in enumerate(messages.values()):
        entry_index[entry["uuid"]] = index
        if is_compact_boundary_message(entry):
            absolute_last_boundary_index = index
            segment = (entry.get("compactMetadata") or {}).get(
                "preservedSegment"
            ) or None
            if isinstance(segment, dict):
                last_segment = segment
                last_segment_boundary_index = index

    if not last_segment:
        return

    seg_is_live = last_segment_boundary_index == absolute_last_boundary_index
    preserved_uuids: set[str] = set()

    if seg_is_live:
        walk_seen: set[str] = set()
        current = messages.get(last_segment.get("tailUuid") or "")
        reached_head = False
        while current and current["uuid"] not in walk_seen:
            walk_seen.add(current["uuid"])
            preserved_uuids.add(current["uuid"])
            if current["uuid"] == last_segment.get("headUuid"):
                reached_head = True
                break
            parent_uuid = current.get("parentUuid")
            current = messages.get(parent_uuid) if parent_uuid else None

        if not reached_head:
            return

        head = messages.get(last_segment.get("headUuid") or "")
        anchor_uuid = last_segment.get("anchorUuid")
        tail_uuid = last_segment.get("tailUuid")
        if head and anchor_uuid:
            head["parentUuid"] = anchor_uuid
        if anchor_uuid and tail_uuid:
            for uuid_text, message in list(messages.items()):
                if message.get(
                    "parentUuid"
                ) == anchor_uuid and uuid_text != last_segment.get("headUuid"):
                    message["parentUuid"] = tail_uuid
        for uuid_text in preserved_uuids:
            message = messages.get(uuid_text)
            if not message or message.get("type") != "assistant":
                continue
            usage = ((message.get("message") or {}).get("usage") or {}).copy()
            usage["input_tokens"] = 0
            usage["output_tokens"] = 0
            usage["cache_creation_input_tokens"] = 0
            usage["cache_read_input_tokens"] = 0
            message.setdefault("message", {})["usage"] = usage

    to_delete: list[str] = []
    for uuid_text in list(messages.keys()):
        idx = entry_index.get(uuid_text, math.inf)
        if (
            idx < absolute_last_boundary_index
            and uuid_text not in preserved_uuids
        ):
            to_delete.append(uuid_text)
    for uuid_text in to_delete:
        messages.pop(uuid_text, None)


def apply_snip_removals(messages: dict[str, dict[str, Any]]) -> None:
    to_delete: set[str] = set()
    for entry in messages.values():
        snip_metadata = entry.get("snipMetadata") or {}
        removed_uuids = snip_metadata.get("removedUuids")
        if isinstance(removed_uuids, list):
            for uuid_text in removed_uuids:
                if isinstance(uuid_text, str):
                    to_delete.add(uuid_text)

    if not to_delete:
        return

    deleted_parent: dict[str, str | None] = {}
    for uuid_text in to_delete:
        entry = messages.get(uuid_text)
        if not entry:
            continue
        deleted_parent[uuid_text] = entry.get("parentUuid")
        messages.pop(uuid_text, None)

    def resolve(start: str) -> str | None:
        path: list[str] = []
        current: str | None = start
        while current and current in to_delete:
            path.append(current)
            current = deleted_parent.get(current)
            if current is None:
                break
        for item in path:
            deleted_parent[item] = current
        return current

    for message in messages.values():
        parent_uuid = message.get("parentUuid")
        if parent_uuid and parent_uuid in to_delete:
            message["parentUuid"] = resolve(parent_uuid)


def recover_orphaned_parallel_tool_results(
    messages: dict[str, dict[str, Any]],
    chain: list[dict[str, Any]],
    seen: set[str],
) -> list[dict[str, Any]]:
    chain_assistants = [
        message
        for message in chain
        if message.get("type") == "assistant"
        and isinstance((message.get("message") or {}).get("id"), str)
    ]
    if not chain_assistants:
        return chain

    anchor_by_message_id: dict[str, dict[str, Any]] = {}
    for assistant in chain_assistants:
        message_id = (assistant.get("message") or {}).get("id")
        if isinstance(message_id, str):
            anchor_by_message_id[message_id] = assistant

    siblings_by_message_id: dict[str, list[dict[str, Any]]] = defaultdict(list)
    tool_results_by_assistant: dict[str, list[dict[str, Any]]] = defaultdict(
        list
    )

    for message in messages.values():
        if message.get("type") == "assistant":
            message_id = (message.get("message") or {}).get("id")
            if isinstance(message_id, str):
                siblings_by_message_id[message_id].append(message)
        elif (
            message.get("type") == "user"
            and isinstance(message.get("parentUuid"), str)
            and has_tool_result_block(message)
        ):
            tool_results_by_assistant[message["parentUuid"]].append(message)

    processed_groups: set[str] = set()
    inserts: dict[str, list[dict[str, Any]]] = {}

    for assistant in chain_assistants:
        message_id = (assistant.get("message") or {}).get("id")
        if not isinstance(message_id, str) or message_id in processed_groups:
            continue
        processed_groups.add(message_id)
        group = siblings_by_message_id.get(message_id) or [assistant]
        orphaned_siblings = [
            member for member in group if member["uuid"] not in seen
        ]
        orphaned_tool_results: list[dict[str, Any]] = []
        for member in group:
            for tool_result in tool_results_by_assistant.get(
                member["uuid"], []
            ):
                if tool_result["uuid"] not in seen:
                    orphaned_tool_results.append(tool_result)

        if not orphaned_siblings and not orphaned_tool_results:
            continue

        recovered = sort_by_timestamp(orphaned_siblings) + sort_by_timestamp(
            orphaned_tool_results
        )
        for item in recovered:
            seen.add(item["uuid"])
        anchor = anchor_by_message_id[message_id]
        inserts[anchor["uuid"]] = recovered

    if not inserts:
        return chain

    rebuilt: list[dict[str, Any]] = []
    for message in chain:
        rebuilt.append(message)
        rebuilt.extend(inserts.get(message["uuid"], []))
    return rebuilt


def build_conversation_chain(
    messages: dict[str, dict[str, Any]],
    leaf_message: dict[str, Any],
) -> list[dict[str, Any]]:
    transcript: list[dict[str, Any]] = []
    seen: set[str] = set()
    current: dict[str, Any] | None = leaf_message
    while current:
        uuid_text = current["uuid"]
        if uuid_text in seen:
            break
        seen.add(uuid_text)
        transcript.append(current)
        parent_uuid = current.get("parentUuid")
        current = messages.get(parent_uuid) if parent_uuid else None
    transcript.reverse()
    return recover_orphaned_parallel_tool_results(messages, transcript, seen)


def load_transcript_file(file_path: Path) -> dict[str, Any]:
    messages: dict[str, dict[str, Any]] = {}
    summaries: dict[str, str] = {}
    custom_titles: dict[str, str] = {}
    tags: dict[str, str] = {}
    agent_names: dict[str, str] = {}
    agent_colors: dict[str, str] = {}
    agent_settings: dict[str, str] = {}
    pr_numbers: dict[str, int] = {}
    pr_urls: dict[str, str] = {}
    pr_repositories: dict[str, str] = {}
    modes: dict[str, str] = {}
    progress_bridge: dict[str, str | None] = {}

    try:
        raw_lines = file_path.read_text(
            encoding="utf-8", errors="replace"
        ).splitlines()
    except OSError:
        raw_lines = []

    entries: list[dict[str, Any]] = []
    for line in raw_lines:
        stripped = line.strip()
        if not stripped:
            continue
        try:
            parsed = json.loads(stripped)
        except json.JSONDecodeError:
            continue
        if isinstance(parsed, dict):
            entries.append(parsed)

    for entry in entries:
        if is_legacy_progress_entry(entry):
            parent_uuid = entry.get("parentUuid")
            if parent_uuid and parent_uuid in progress_bridge:
                progress_bridge[entry["uuid"]] = progress_bridge[parent_uuid]
            else:
                progress_bridge[entry["uuid"]] = parent_uuid
            continue

        if is_transcript_message(entry):
            parent_uuid = entry.get("parentUuid")
            if parent_uuid in progress_bridge:
                entry["parentUuid"] = progress_bridge[parent_uuid]
            messages[entry["uuid"]] = entry
        elif entry.get("type") == "summary" and isinstance(
            entry.get("leafUuid"), str
        ):
            summaries[entry["leafUuid"]] = entry.get("summary") or ""
        elif entry.get("type") == "custom-title" and isinstance(
            entry.get("sessionId"), str
        ):
            custom_titles[entry["sessionId"]] = entry.get("customTitle") or ""
        elif entry.get("type") == "tag" and isinstance(
            entry.get("sessionId"), str
        ):
            tags[entry["sessionId"]] = entry.get("tag") or ""
        elif entry.get("type") == "agent-name" and isinstance(
            entry.get("sessionId"), str
        ):
            agent_names[entry["sessionId"]] = entry.get("agentName") or ""
        elif entry.get("type") == "agent-color" and isinstance(
            entry.get("sessionId"), str
        ):
            agent_colors[entry["sessionId"]] = entry.get("agentColor") or ""
        elif entry.get("type") == "agent-setting" and isinstance(
            entry.get("sessionId"), str
        ):
            agent_settings[entry["sessionId"]] = (
                entry.get("agentSetting") or ""
            )
        elif entry.get("type") == "mode" and isinstance(
            entry.get("sessionId"), str
        ):
            modes[entry["sessionId"]] = entry.get("mode") or ""
        elif entry.get("type") == "pr-link" and isinstance(
            entry.get("sessionId"), str
        ):
            pr_numbers[entry["sessionId"]] = int(entry.get("prNumber") or 0)
            pr_urls[entry["sessionId"]] = entry.get("prUrl") or ""
            pr_repositories[entry["sessionId"]] = (
                entry.get("prRepository") or ""
            )

    apply_preserved_segment_relinks(messages)
    apply_snip_removals(messages)

    all_messages = list(messages.values())
    parent_uuids = {
        message.get("parentUuid")
        for message in all_messages
        if message.get("parentUuid")
    }
    terminal_messages = [
        message
        for message in all_messages
        if message["uuid"] not in parent_uuids
    ]

    leaf_uuids: set[str] = set()
    for terminal in terminal_messages:
        seen: set[str] = set()
        current: dict[str, Any] | None = terminal
        while current:
            uuid_text = current["uuid"]
            if uuid_text in seen:
                break
            seen.add(uuid_text)
            if current.get("type") in {"user", "assistant"}:
                leaf_uuids.add(uuid_text)
                break
            parent_uuid = current.get("parentUuid")
            current = messages.get(parent_uuid) if parent_uuid else None

    return {
        "messages": messages,
        "summaries": summaries,
        "custom_titles": custom_titles,
        "tags": tags,
        "agent_names": agent_names,
        "agent_colors": agent_colors,
        "agent_settings": agent_settings,
        "pr_numbers": pr_numbers,
        "pr_urls": pr_urls,
        "pr_repositories": pr_repositories,
        "modes": modes,
        "leaf_uuids": leaf_uuids,
    }


def load_all_logs_from_session_file(file_path: Path) -> list[SessionLog]:
    data = load_transcript_file(file_path)
    messages: dict[str, dict[str, Any]] = data["messages"]
    if not messages:
        return []

    leaf_messages: list[dict[str, Any]] = []
    children_by_parent: dict[str, list[dict[str, Any]]] = defaultdict(list)

    for message in messages.values():
        if message["uuid"] in data["leaf_uuids"]:
            leaf_messages.append(message)
        elif isinstance(message.get("parentUuid"), str):
            children_by_parent[message["parentUuid"]].append(message)

    logs: list[SessionLog] = []

    for leaf_message in leaf_messages:
        chain = build_conversation_chain(messages, leaf_message)
        if not chain:
            continue

        trailing_messages = sort_by_timestamp(
            children_by_parent.get(leaf_message["uuid"], [])
        )
        if trailing_messages:
            chain.extend(trailing_messages)

        first_message = chain[0]
        session_id = str(leaf_message.get("sessionId") or file_path.stem)
        logs.append(
            SessionLog(
                date=str(leaf_message.get("timestamp") or ""),
                messages=chain,
                full_path=str(file_path),
                created=parse_iso_timestamp(first_message.get("timestamp")),
                modified=parse_iso_timestamp(leaf_message.get("timestamp")),
                first_prompt=extract_first_prompt(chain),
                message_count=count_visible_messages(chain),
                is_sidechain=bool(first_message.get("isSidechain")),
                session_id=session_id,
                leaf_uuid=leaf_message["uuid"],
                summary=data["summaries"].get(leaf_message["uuid"]),
                custom_title=data["custom_titles"].get(session_id),
                tag=data["tags"].get(session_id),
                agent_name=data["agent_names"].get(session_id),
                agent_color=data["agent_colors"].get(session_id),
                agent_setting=data["agent_settings"].get(session_id),
                mode=data["modes"].get(session_id),
                pr_number=data["pr_numbers"].get(session_id),
                pr_url=data["pr_urls"].get(session_id),
                pr_repository=data["pr_repositories"].get(session_id),
                git_branch=leaf_message.get("gitBranch"),
                project_path=str(first_message.get("cwd") or ""),
            )
        )

    return logs


def classify_tool_error(content: str) -> str:
    lower = content.lower()
    if "exit code" in lower:
        return "Command Failed"
    if "rejected" in lower or "doesn't want" in lower:
        return "User Rejected"
    if "string to replace not found" in lower or "no changes" in lower:
        return "Edit Failed"
    if "modified since read" in lower:
        return "File Changed"
    if "exceeds maximum" in lower or "too large" in lower:
        return "File Too Large"
    if "file not found" in lower or "does not exist" in lower:
        return "File Not Found"
    return "Other"


def language_from_path(file_path: str) -> str | None:
    return EXTENSION_TO_LANGUAGE.get(Path(file_path).suffix.lower())


def diff_line_counts(old: str, new: str) -> tuple[int, int]:
    old_lines = old.splitlines()
    new_lines = new.splitlines()
    added = 0
    removed = 0
    matcher = difflib.SequenceMatcher(a=old_lines, b=new_lines)
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag in {"replace", "insert"}:
            added += j2 - j1
        if tag in {"replace", "delete"}:
            removed += i2 - i1
    return added, removed


def is_human_user_message(message: dict[str, Any]) -> bool:
    if message.get("type") != "user":
        return False
    content = (message.get("message") or {}).get("content")
    if isinstance(content, str):
        return bool(content.strip())
    if not isinstance(content, list):
        return False
    return any(
        isinstance(block, dict) and block.get("type") == "text"
        for block in content
    )


def extract_tool_stats(log: SessionLog) -> dict[str, Any]:
    tool_counts: Counter[str] = Counter()
    languages: Counter[str] = Counter()
    git_commits = 0
    git_pushes = 0
    input_tokens = 0
    output_tokens = 0
    user_interruptions = 0
    user_response_times: list[float] = []
    tool_errors = 0
    tool_error_categories: Counter[str] = Counter()
    uses_task_agent = False
    uses_mcp = False
    uses_web_search = False
    uses_web_fetch = False
    lines_added = 0
    lines_removed = 0
    files_modified: set[str] = set()
    message_hours: list[int] = []
    user_message_timestamps: list[str] = []
    last_assistant_timestamp: str | None = None

    for message in log.messages:
        timestamp = message.get("timestamp")
        if message.get("type") == "assistant":
            if timestamp:
                last_assistant_timestamp = timestamp

            usage = (message.get("message") or {}).get("usage") or {}
            input_tokens += int(usage.get("input_tokens") or 0)
            output_tokens += int(usage.get("output_tokens") or 0)

            content = (message.get("message") or {}).get("content")
            if isinstance(content, list):
                for block in content:
                    if (
                        not isinstance(block, dict)
                        or block.get("type") != "tool_use"
                    ):
                        continue
                    tool_name = str(block.get("name") or "")
                    tool_counts[tool_name] += 1
                    if tool_name in AGENT_TOOL_NAMES:
                        uses_task_agent = True
                    if tool_name.startswith("mcp__"):
                        uses_mcp = True
                    if tool_name == "WebSearch":
                        uses_web_search = True
                    if tool_name == "WebFetch":
                        uses_web_fetch = True

                    tool_input = block.get("input") or {}
                    if isinstance(tool_input, dict):
                        file_path = str(tool_input.get("file_path") or "")
                        if file_path:
                            language = language_from_path(file_path)
                            if language:
                                languages[language] += 1
                            if tool_name in {"Edit", "Write"}:
                                files_modified.add(file_path)

                        if tool_name == "Edit":
                            added, removed = diff_line_counts(
                                str(tool_input.get("old_string") or ""),
                                str(tool_input.get("new_string") or ""),
                            )
                            lines_added += added
                            lines_removed += removed
                        if tool_name == "Write":
                            content_text = str(tool_input.get("content") or "")
                            if content_text:
                                lines_added += content_text.count("\n") + 1

                        command = str(tool_input.get("command") or "")
                        if "git commit" in command:
                            git_commits += 1
                        if "git push" in command:
                            git_pushes += 1

        if message.get("type") == "user":
            if is_human_user_message(message) and timestamp:
                parsed = parse_iso_timestamp(timestamp).astimezone()
                message_hours.append(parsed.hour)
                user_message_timestamps.append(timestamp)

                if last_assistant_timestamp:
                    assistant_time = parse_iso_timestamp(
                        last_assistant_timestamp
                    )
                    response_time = (
                        parse_iso_timestamp(timestamp) - assistant_time
                    ).total_seconds()
                    if 2 < response_time < 3600:
                        user_response_times.append(response_time)

            content = (message.get("message") or {}).get("content")
            if isinstance(content, list):
                for block in content:
                    if (
                        not isinstance(block, dict)
                        or block.get("type") != "tool_result"
                    ):
                        continue
                    if block.get("is_error"):
                        tool_errors += 1
                        tool_error_categories[
                            classify_tool_error(
                                str(block.get("content") or "")
                            )
                        ] += 1

            user_text = extract_user_message_text(message)
            if "[Request interrupted by user" in user_text:
                user_interruptions += 1

    return {
        "tool_counts": dict(tool_counts),
        "languages": dict(languages),
        "git_commits": git_commits,
        "git_pushes": git_pushes,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "user_interruptions": user_interruptions,
        "user_response_times": user_response_times,
        "tool_errors": tool_errors,
        "tool_error_categories": dict(tool_error_categories),
        "uses_task_agent": uses_task_agent,
        "uses_mcp": uses_mcp,
        "uses_web_search": uses_web_search,
        "uses_web_fetch": uses_web_fetch,
        "lines_added": lines_added,
        "lines_removed": lines_removed,
        "files_modified": len(files_modified),
        "message_hours": message_hours,
        "user_message_timestamps": user_message_timestamps,
    }


def log_to_session_meta(log: SessionLog) -> SessionMeta:
    stats = extract_tool_stats(log)
    user_message_count = 0
    assistant_message_count = 0
    for message in log.messages:
        if message.get("type") == "assistant":
            assistant_message_count += 1
        if is_human_user_message(message):
            user_message_count += 1

    duration_minutes = round((log.modified - log.created).total_seconds() / 60)

    return SessionMeta(
        session_id=log.session_id,
        project_path=log.project_path,
        start_time=log.created.isoformat(),
        duration_minutes=duration_minutes,
        user_message_count=user_message_count,
        assistant_message_count=assistant_message_count,
        tool_counts=stats["tool_counts"],
        languages=stats["languages"],
        git_commits=stats["git_commits"],
        git_pushes=stats["git_pushes"],
        input_tokens=stats["input_tokens"],
        output_tokens=stats["output_tokens"],
        first_prompt=log.first_prompt,
        summary=log.summary,
        user_interruptions=stats["user_interruptions"],
        user_response_times=stats["user_response_times"],
        tool_errors=stats["tool_errors"],
        tool_error_categories=stats["tool_error_categories"],
        uses_task_agent=stats["uses_task_agent"],
        uses_mcp=stats["uses_mcp"],
        uses_web_search=stats["uses_web_search"],
        uses_web_fetch=stats["uses_web_fetch"],
        lines_added=stats["lines_added"],
        lines_removed=stats["lines_removed"],
        files_modified=stats["files_modified"],
        message_hours=stats["message_hours"],
        user_message_timestamps=stats["user_message_timestamps"],
    )


def load_cached_session_meta(
    cache_dir: Path, session_id: str
) -> SessionMeta | None:
    path = cache_dir / "session-meta" / f"{session_id}.json"
    try:
        payload = json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return None
    if not isinstance(payload, dict):
        return None
    try:
        return SessionMeta.from_dict(payload)
    except TypeError:
        return None


def save_session_meta(cache_dir: Path, meta: SessionMeta) -> None:
    target_dir = cache_dir / "session-meta"
    ensure_dir(target_dir)
    path = target_dir / f"{meta.session_id}.json"
    path.write_text(json.dumps(asdict(meta), indent=2), encoding="utf-8")


def load_cached_facets(
    cache_dir: Path, session_id: str
) -> SessionFacets | None:
    path = cache_dir / "facets" / f"{session_id}.json"
    try:
        payload = json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return None
    if not isinstance(payload, dict):
        return None
    try:
        return SessionFacets.from_dict(payload)
    except TypeError:
        return None


def save_facets(cache_dir: Path, facets: SessionFacets) -> None:
    target_dir = cache_dir / "facets"
    ensure_dir(target_dir)
    path = target_dir / f"{facets.session_id}.json"
    path.write_text(json.dumps(asdict(facets), indent=2), encoding="utf-8")


def scan_all_sessions(projects_dir: Path) -> list[dict[str, Any]]:
    if not projects_dir.exists():
        return []
    results: list[dict[str, Any]] = []
    for project_dir in projects_dir.iterdir():
        if not project_dir.is_dir():
            continue
        for session_file in project_dir.iterdir():
            if not session_file.is_file() or session_file.suffix != ".jsonl":
                continue
            if not validate_uuid(session_file.stem):
                continue
            try:
                stat = session_file.stat()
            except OSError:
                continue
            results.append(
                {
                    "session_id": session_file.stem,
                    "path": session_file,
                    "mtime": stat.st_mtime,
                    "size": stat.st_size,
                }
            )
    results.sort(key=lambda item: item["mtime"], reverse=True)
    return results


def is_meta_session(log: SessionLog) -> bool:
    for message in log.messages[:5]:
        if message.get("type") != "user":
            continue
        text = extract_user_message_text(message)
        if (
            "RESPOND WITH ONLY A VALID JSON OBJECT" in text
            or "record_facets" in text
        ):
            return True
    return False


def choose_best_log(
    logs: list[SessionLog],
    scope: ProjectScope,
) -> SessionLog | None:
    matching_logs = [
        log for log in logs if matches_project_scope(log.project_path, scope)
    ]
    if not matching_logs:
        return None
    return max(
        matching_logs,
        key=lambda log: (
            sum(
                1 for message in log.messages if is_human_user_message(message)
            ),
            int((log.modified - log.created).total_seconds()),
        ),
    )


def is_substantive_session(meta: SessionMeta) -> bool:
    return meta.user_message_count >= 2 and meta.duration_minutes >= 1


def normalize_instruction(text: str) -> str:
    normalized = re.sub(r"\s+", " ", text.strip().lower())
    return normalized.strip(" .")


def split_candidate_sentences(text: str) -> list[str]:
    raw_parts = re.split(r"[\n\r]+|(?<=[.!?])\s+", text)
    return [part.strip() for part in raw_parts if part and part.strip()]


def extract_user_texts(log: SessionLog) -> list[str]:
    texts: list[str] = []
    for message in log.messages:
        if not is_human_user_message(message):
            continue
        text = extract_user_message_text(message)
        if text:
            texts.append(text)
    return texts


def extract_user_instructions(log: SessionLog) -> list[str]:
    instructions: list[str] = []
    seen: set[str] = set()
    for text in extract_user_texts(log):
        for sentence in split_candidate_sentences(text):
            if not (6 <= len(sentence) <= 180):
                continue
            if not any(
                pattern.search(sentence)
                for pattern in REPEATED_INSTRUCTION_PATTERNS
            ):
                continue
            cleaned = truncate(sentence, 140)
            normalized = normalize_instruction(cleaned)
            if normalized in seen:
                continue
            seen.add(normalized)
            instructions.append(cleaned)
    return instructions[:10]


def detect_goal_categories(
    log: SessionLog, meta: SessionMeta
) -> dict[str, int]:
    texts = extract_user_texts(log)
    counts: Counter[str] = Counter()

    if meta.user_message_count < 2 or meta.duration_minutes < 1:
        return {"warmup_minimal": 1}

    for text in texts:
        for category, patterns in GOAL_PATTERNS.items():
            if any(pattern.search(text) for pattern in patterns):
                counts[category] += 1

    if not counts:
        read_heavy = sum(
            meta.tool_counts.get(name, 0) for name in ("Read", "Grep", "Glob")
        )
        if meta.files_modified > 0 or meta.lines_added > 0:
            counts["implement_feature"] += 1
        elif meta.tool_errors > 0:
            counts["debug_investigate"] += 1
        elif read_heavy > 0:
            counts["understand_codebase"] += 1
        else:
            counts["warmup_minimal"] += 1

    return dict(counts)


def detect_satisfaction(user_texts: list[str]) -> dict[str, int]:
    counts: Counter[str] = Counter()
    messages = user_texts[1:] if len(user_texts) > 1 else user_texts
    for text in messages:
        if any(pattern.search(text) for pattern in NEGATIVE_STRONG_PATTERNS):
            counts["frustrated"] += 1
        elif any(pattern.search(text) for pattern in NEGATIVE_MILD_PATTERNS):
            counts["dissatisfied"] += 1
        elif any(pattern.search(text) for pattern in POSITIVE_STRONG_PATTERNS):
            counts["happy"] += 1
        elif any(pattern.search(text) for pattern in POSITIVE_MILD_PATTERNS):
            counts["satisfied"] += 1
        elif any(pattern.search(text) for pattern in CONTINUATION_PATTERNS):
            counts["likely_satisfied"] += 1

    if not counts:
        counts["unsure"] = 1

    return dict(counts)


def detect_friction(
    meta: SessionMeta, user_texts: list[str]
) -> tuple[dict[str, int], str]:
    counts: Counter[str] = Counter()
    all_text = "\n".join(user_texts)

    if meta.tool_errors > 0:
        counts["tool_failed"] += meta.tool_errors

    if meta.tool_error_categories.get("User Rejected", 0) > 0:
        counts["user_rejected_action"] += meta.tool_error_categories[
            "User Rejected"
        ]

    if meta.user_interruptions > 0:
        counts["user_stopped_early"] += meta.user_interruptions

    if (
        meta.tool_error_categories.get("Command Failed", 0) > 0
        or meta.tool_error_categories.get("File Not Found", 0) > 0
    ):
        counts["claude_got_blocked"] += 1

    if any(
        "wrong file" in text.lower() or "wrong place" in text.lower()
        for text in user_texts
    ):
        counts["wrong_file_or_location"] += 1

    if any(
        "too much" in text.lower() or "overkill" in text.lower()
        for text in user_texts
    ):
        counts["excessive_changes"] += 1

    if any(
        pattern.search(all_text)
        for pattern in NEGATIVE_STRONG_PATTERNS + NEGATIVE_MILD_PATTERNS
    ):
        if meta.files_modified > 0:
            counts["buggy_code"] += 1
        else:
            counts["wrong_approach"] += 1

    if meta.user_response_times:
        median = statistics.median(meta.user_response_times)
        if median > 300:
            counts["slow_or_verbose"] += 1

    if (
        not counts
        and meta.tool_errors == 0
        and meta.files_modified == 0
        and meta.user_interruptions == 0
    ):
        detail = ""
    else:
        top_category = (
            counts.most_common(1)[0][0] if counts else "wrong_approach"
        )
        detail = {
            "tool_failed": "Tool runs failed and forced retries before the work could move forward.",
            "user_rejected_action": "Claude proposed actions that the user did not want to approve.",
            "user_stopped_early": "The run was interrupted before the original plan fully landed.",
            "claude_got_blocked": "Environment-level failures blocked progress more than reasoning quality did.",
            "buggy_code": "The first patch did not hold up under validation and needed another pass.",
            "wrong_approach": "The initial solution path was not the cheapest route to the goal.",
            "slow_or_verbose": "The session spent too much time in intermediate output rather than forward progress.",
            "wrong_file_or_location": "The implementation drifted toward the wrong file or layer.",
            "excessive_changes": "The patch scope expanded beyond what the task really needed.",
        }.get(top_category, "There was measurable friction during execution.")

    return dict(counts), detail


def infer_outcome(
    meta: SessionMeta,
    satisfaction: dict[str, int],
    friction: dict[str, int],
) -> str:
    positive = sum(
        satisfaction.get(key, 0)
        for key in ("happy", "satisfied", "likely_satisfied")
    )
    negative = sum(
        satisfaction.get(key, 0) for key in ("frustrated", "dissatisfied")
    )

    if negative >= 2 and positive == 0:
        return "not_achieved"
    if (
        meta.user_interruptions > 0
        and meta.files_modified == 0
        and positive == 0
    ):
        return "not_achieved"
    if positive > 0 and meta.files_modified > 0 and not friction:
        return "fully_achieved"
    if positive > 0 and (meta.files_modified > 0 or meta.git_commits > 0):
        return "mostly_achieved"
    if meta.files_modified > 0 or meta.tool_counts:
        return (
            "partially_achieved" if negative > positive else "mostly_achieved"
        )
    return "unclear_from_transcript"


def infer_helpfulness(outcome: str, satisfaction: dict[str, int]) -> str:
    if outcome == "fully_achieved":
        return (
            "essential" if satisfaction.get("happy", 0) > 0 else "very_helpful"
        )
    if outcome == "mostly_achieved":
        return (
            "very_helpful"
            if satisfaction.get("satisfied", 0) > 0
            else "moderately_helpful"
        )
    if outcome == "partially_achieved":
        return "slightly_helpful"
    if outcome == "not_achieved":
        return "unhelpful"
    return "moderately_helpful"


def infer_session_type(
    goal_categories: dict[str, int], meta: SessionMeta
) -> str:
    active_goals = [
        goal
        for goal, count in goal_categories.items()
        if count > 0 and goal != "warmup_minimal"
    ]
    if meta.user_message_count <= 1:
        return "quick_question"
    if len(active_goals) >= 2 and meta.user_message_count >= 4:
        return "multi_task"
    if meta.user_message_count >= 5 or meta.user_interruptions > 0:
        return "iterative_refinement"
    if (
        goal_categories.get("understand_codebase", 0) > 0
        or goal_categories.get("analyze_data", 0) > 0
    ) and meta.files_modified == 0:
        return "exploration"
    return "single_task"


def infer_primary_success(
    goal_categories: dict[str, int], meta: SessionMeta, outcome: str
) -> str:
    if outcome not in {"fully_achieved", "mostly_achieved"}:
        return "none"
    if (
        goal_categories.get("debug_investigate", 0) > 0
        and sum(
            meta.tool_counts.get(name, 0) for name in ("Read", "Grep", "Glob")
        )
        > 0
    ):
        return "good_debugging"
    if (
        meta.files_modified >= 3
        or (meta.lines_added + meta.lines_removed) >= 60
    ):
        return "multi_file_changes"
    if meta.files_modified > 0:
        return "correct_code_edits"
    if (
        sum(meta.tool_counts.get(name, 0) for name in ("Read", "Grep", "Glob"))
        >= 3
    ):
        return "fast_accurate_search"
    if meta.assistant_message_count > meta.user_message_count:
        return "good_explanations"
    if meta.uses_task_agent:
        return "proactive_help"
    return "none"


def heuristic_extract_facets(
    log: SessionLog, meta: SessionMeta
) -> SessionFacets:
    user_texts = extract_user_texts(log)
    goal_categories = detect_goal_categories(log, meta)
    satisfaction = detect_satisfaction(user_texts)
    friction_counts, friction_detail = detect_friction(meta, user_texts)
    outcome = infer_outcome(meta, satisfaction, friction_counts)
    helpfulness = infer_helpfulness(outcome, satisfaction)
    session_type = infer_session_type(goal_categories, meta)
    primary_success = infer_primary_success(goal_categories, meta, outcome)
    instructions = extract_user_instructions(log)

    underlying_goal = truncate(
        meta.first_prompt or "Investigate the transcripted task", 120
    )
    brief_summary = (
        f"{underlying_goal} ({OUTCOME_VERBS.get(outcome, outcome)})."
    )

    return SessionFacets(
        session_id=meta.session_id,
        underlying_goal=underlying_goal,
        goal_categories=goal_categories,
        outcome=outcome,
        user_satisfaction_counts=satisfaction,
        claude_helpfulness=helpfulness,
        session_type=session_type,
        friction_counts=friction_counts,
        friction_detail=friction_detail,
        primary_success=primary_success,
        brief_summary=brief_summary,
        user_instructions_to_claude=instructions,
    )


def is_minimal_session(facet: SessionFacets | None) -> bool:
    if not facet:
        return False
    active = [key for key, count in facet.goal_categories.items() if count > 0]
    return active == ["warmup_minimal"]


def detect_multi_clauding(sessions: list[SessionMeta]) -> dict[str, int]:
    overlap_window_seconds = 30 * 60
    all_messages: list[tuple[float, str]] = []
    for session in sessions:
        for timestamp in session.user_message_timestamps:
            all_messages.append(
                (
                    parse_iso_timestamp(timestamp).timestamp(),
                    session.session_id,
                )
            )
    all_messages.sort()

    session_last_index: dict[str, int] = {}
    window_start = 0
    overlap_pairs: set[tuple[str, str]] = set()
    messages_during: set[tuple[int, str]] = set()

    for index, (timestamp, session_id) in enumerate(all_messages):
        while (
            window_start < index
            and timestamp - all_messages[window_start][0]
            > overlap_window_seconds
        ):
            expiring_session = all_messages[window_start][1]
            if session_last_index.get(expiring_session) == window_start:
                session_last_index.pop(expiring_session, None)
            window_start += 1

        previous_index = session_last_index.get(session_id)
        if previous_index is not None:
            for between_index in range(previous_index + 1, index):
                between_session = all_messages[between_index][1]
                if between_session != session_id:
                    pair = tuple(sorted((session_id, between_session)))
                    overlap_pairs.add(pair)
                    messages_during.add(
                        (int(all_messages[previous_index][0]), session_id)
                    )
                    messages_during.add(
                        (int(all_messages[between_index][0]), between_session)
                    )
                    messages_during.add((int(timestamp), session_id))
                    break

        session_last_index[session_id] = index

    sessions_involved: set[str] = set()
    for first, second in overlap_pairs:
        sessions_involved.add(first)
        sessions_involved.add(second)

    return {
        "overlap_events": len(overlap_pairs),
        "sessions_involved": len(sessions_involved),
        "user_messages_during": len(messages_during),
    }


def aggregate_data(
    sessions: list[SessionMeta], facets: dict[str, SessionFacets]
) -> AggregatedData:
    result = AggregatedData(
        total_sessions=len(sessions),
        sessions_with_facets=len(facets),
        date_range={"start": "", "end": ""},
    )

    dates: list[str] = []
    all_response_times: list[float] = []
    all_message_hours: list[int] = []

    for session in sessions:
        dates.append(session.start_time)
        result.total_messages += session.user_message_count
        result.total_duration_hours += session.duration_minutes / 60
        result.total_input_tokens += session.input_tokens
        result.total_output_tokens += session.output_tokens
        result.git_commits += session.git_commits
        result.git_pushes += session.git_pushes
        result.total_interruptions += session.user_interruptions
        result.total_tool_errors += session.tool_errors
        result.total_lines_added += session.lines_added
        result.total_lines_removed += session.lines_removed
        result.total_files_modified += session.files_modified
        result.sessions_using_task_agent += int(session.uses_task_agent)
        result.sessions_using_mcp += int(session.uses_mcp)
        result.sessions_using_web_search += int(session.uses_web_search)
        result.sessions_using_web_fetch += int(session.uses_web_fetch)
        all_response_times.extend(session.user_response_times)
        all_message_hours.extend(session.message_hours)

        for key, count in session.tool_counts.items():
            result.tool_counts[key] = result.tool_counts.get(key, 0) + count
        for key, count in session.languages.items():
            result.languages[key] = result.languages.get(key, 0) + count
        for key, count in session.tool_error_categories.items():
            result.tool_error_categories[key] = (
                result.tool_error_categories.get(key, 0) + count
            )

        if session.project_path:
            result.projects[session.project_path] = (
                result.projects.get(session.project_path, 0) + 1
            )

        facet = facets.get(session.session_id)
        if facet:
            for key, count in facet.goal_categories.items():
                if count > 0:
                    result.goal_categories[key] = (
                        result.goal_categories.get(key, 0) + count
                    )
            result.outcomes[facet.outcome] = (
                result.outcomes.get(facet.outcome, 0) + 1
            )
            result.helpfulness[facet.claude_helpfulness] = (
                result.helpfulness.get(facet.claude_helpfulness, 0) + 1
            )
            result.session_types[facet.session_type] = (
                result.session_types.get(facet.session_type, 0) + 1
            )
            for key, count in facet.user_satisfaction_counts.items():
                if count > 0:
                    result.satisfaction[key] = (
                        result.satisfaction.get(key, 0) + count
                    )
            for key, count in facet.friction_counts.items():
                if count > 0:
                    result.friction[key] = result.friction.get(key, 0) + count
            if facet.primary_success != "none":
                result.success[facet.primary_success] = (
                    result.success.get(facet.primary_success, 0) + 1
                )

        if len(result.session_summaries) < 50:
            result.session_summaries.append(
                {
                    "id": session.session_id[:8],
                    "date": iso_date(session.start_time),
                    "summary": truncate(
                        session.summary or session.first_prompt, 100
                    ),
                    "goal": facet.underlying_goal if facet else "",
                }
            )

    if dates:
        dates.sort()
        result.date_range["start"] = iso_date(dates[0])
        result.date_range["end"] = iso_date(dates[-1])

    if all_response_times:
        result.user_response_times = all_response_times
        result.median_response_time = statistics.median(all_response_times)
        result.avg_response_time = sum(all_response_times) / len(
            all_response_times
        )

    if dates:
        unique_days = {iso_date(date) for date in dates}
        result.days_active = len(unique_days)
        if result.days_active:
            result.messages_per_day = round(
                result.total_messages / result.days_active, 1
            )

    result.message_hours = all_message_hours
    result.multi_clauding = detect_multi_clauding(sessions)
    return result


def top_entries(
    data: dict[str, int], limit: int = 3, exclude: set[str] | None = None
) -> list[tuple[str, int]]:
    exclude = exclude or set()
    return [
        (key, count)
        for key, count in sorted(
            data.items(), key=lambda item: item[1], reverse=True
        )
        if key not in exclude and count > 0
    ][:limit]


def project_areas_from_heuristics(
    data: AggregatedData,
) -> list[dict[str, Any]]:
    goal_entries = top_entries(
        data.goal_categories, limit=5, exclude={"warmup_minimal"}
    )
    areas: list[dict[str, Any]] = []
    for key, count in goal_entries:
        areas.append(
            {
                "name": safe_title(key),
                "session_count": count,
                "description": PROJECT_AREA_DESCRIPTIONS.get(
                    key,
                    "You use Claude Code for this work often enough that it shows up as a recurring pattern in your sessions.",
                ),
            }
        )
    if not areas and data.projects:
        for project_path, count in top_entries(data.projects, limit=3):
            areas.append(
                {
                    "name": Path(project_path).name or project_path,
                    "session_count": count,
                    "description": "This project shows up repeatedly in your transcripts, so it is a meaningful part of your Claude Code workload.",
                }
            )
    return areas


def interaction_style_from_heuristics(data: AggregatedData) -> dict[str, str]:
    dominant_session_type = (
        top_entries(data.session_types, limit=1)[0][0]
        if data.session_types
        else ""
    )

    if (
        dominant_session_type == "iterative_refinement"
        or data.median_response_time < 90
    ):
        sentence_1 = "You tend to iterate quickly with Claude Code, tightening the ask as soon as you see an intermediate result."
        key_pattern = "Fast feedback loops shape how you use Claude."
    elif data.median_response_time > 240 and data.total_interruptions == 0:
        sentence_1 = "You usually hand Claude a chunk of work, let it run, and review after it has produced something substantial."
        key_pattern = "You prefer chunked execution over constant steering."
    else:
        sentence_1 = "You mix direct requests with short follow-up corrections rather than sticking to one rigid interaction pattern."
        key_pattern = "You balance direct asks with light steering."

    if (
        data.total_interruptions > 0
        or data.friction.get("user_rejected_action", 0) > 0
    ):
        sentence_2 = "You keep a close hand on execution and intervene quickly when the plan starts drifting."
    else:
        sentence_2 = "Once the task is framed well, you usually let Claude carry the middle of the execution rather than micromanaging each step."

    if data.sessions_using_task_agent > 0 or data.sessions_using_mcp > 0:
        sentence_3 = "You are willing to widen the tool surface when it clearly buys leverage, instead of staying confined to basic file edits."
    else:
        sentence_3 = "You mostly stay close to repo-local context and core edit/search tools, which keeps the workflow predictable."

    return {
        "narrative": f"{sentence_1}\n\n{sentence_2} {sentence_3}",
        "key_pattern": key_pattern,
    }


def what_works_from_heuristics(data: AggregatedData) -> dict[str, Any]:
    workflows: list[dict[str, str]] = []

    if data.total_files_modified > 0:
        workflows.append(
            {
                "title": "Ship Multi-file Changes",
                "description": "You are using Claude for work that actually changes the codebase, not just for explanations. That is where the tool creates the most leverage.",
            }
        )

    if (
        data.success.get("fast_accurate_search", 0) > 0
        or data.goal_categories.get("understand_codebase", 0) > 0
    ):
        workflows.append(
            {
                "title": "Map Unknown Code Quickly",
                "description": "You use Claude well when the work starts with orientation. Search-heavy sessions are turning into faster diagnoses and clearer edits.",
            }
        )

    if data.git_commits > 0:
        workflows.append(
            {
                "title": "Close The Loop",
                "description": "You are not stopping at patches. Sessions often make it all the way into a reviewable or commit-ready state, which is the right bar for this workflow.",
            }
        )

    if data.sessions_using_task_agent > 0:
        workflows.append(
            {
                "title": "Delegate Focused Exploration",
                "description": "When the repo is broad, you are already comfortable letting a narrower thread explore part of the problem space in parallel.",
            }
        )

    if not workflows:
        workflows.append(
            {
                "title": "Keep Sessions Concrete",
                "description": "Your best sessions are the ones with a concrete target and a visible success condition. The transcripts suggest that clarity pays off immediately.",
            }
        )

    return {
        "intro": "These are the patterns where Claude Code is already creating real leverage for you.",
        "impressive_workflows": workflows[:3],
    }


def friction_examples(
    category: str, facets: dict[str, SessionFacets]
) -> list[str]:
    examples: list[str] = []
    seen: set[str] = set()
    for facet in facets.values():
        if facet.friction_counts.get(category, 0) <= 0:
            continue
        candidate = facet.friction_detail or facet.brief_summary
        normalized = normalize_instruction(candidate)
        if normalized in seen or not candidate:
            continue
        seen.add(normalized)
        examples.append(truncate(candidate, 110))
        if len(examples) == 2:
            break
    return examples


def friction_analysis_from_heuristics(
    data: AggregatedData, facets: dict[str, SessionFacets]
) -> dict[str, Any]:
    categories: list[dict[str, Any]] = []
    for key, _count in top_entries(data.friction, limit=3):
        categories.append(
            {
                "category": safe_title(key),
                "description": FRICTION_DESCRIPTIONS.get(
                    key,
                    "This pattern shows up often enough that it is worth changing the workflow around it.",
                ),
                "examples": friction_examples(key, facets),
            }
        )

    if not categories:
        categories.append(
            {
                "category": "Low Measured Friction",
                "description": "No single friction pattern dominates the transcripts. Most sessions either land cleanly or fail for different reasons.",
                "examples": [],
            }
        )

    return {
        "intro": "The biggest slowdowns are not random; they cluster into a few repeatable failure modes.",
        "categories": categories,
    }


def repeated_instructions(
    facets: dict[str, SessionFacets],
) -> list[tuple[str, int, str]]:
    counts: Counter[str] = Counter()
    originals: dict[str, str] = {}
    for facet in facets.values():
        for instruction in facet.user_instructions_to_claude:
            normalized = normalize_instruction(instruction)
            if not normalized:
                continue
            counts[normalized] += 1
            originals.setdefault(normalized, instruction)
    ranked = counts.most_common()
    return [
        (normalized, count, originals[normalized])
        for normalized, count in ranked
    ]


def feature_suggestions(
    data: AggregatedData, repeated: list[tuple[str, int, str]]
) -> list[dict[str, str]]:
    selections: list[str] = []

    if data.sessions_using_mcp == 0:
        selections.append("MCP Servers")
    if data.sessions_using_task_agent == 0:
        selections.append("Task Agents")
    if repeated:
        selections.append("Custom Skills")
    if data.total_tool_errors > 0:
        selections.append("Hooks")
    if data.git_commits > 0 or data.total_lines_added > 150:
        selections.append("Headless Mode")

    ordered = []
    seen: set[str] = set()
    for name in selections:
        if name in seen:
            continue
        seen.add(name)
        ordered.append(name)

    output: list[dict[str, str]] = []
    for name in ordered[:3]:
        why = {
            "MCP Servers": "You are still solving most tasks with local repo context only. External context would remove lookup overhead when the answer lives outside the tree.",
            "Task Agents": "Your transcripts show broad tasks that would benefit from parallel exploration instead of one linear thread doing all the discovery.",
            "Custom Skills": "You repeat certain instructions enough that they should become a reusable workflow instead of another line in chat.",
            "Hooks": "Validation failures are costing you turns. Automating the checks would catch them before you need to ask again.",
            "Headless Mode": "Some of your work is procedural enough that it can be turned into a repeatable non-interactive job for CI or local automation.",
        }[name]
        output.append(
            {
                "feature": name,
                "one_liner": FEATURE_CATALOG[name]["one_liner"],
                "why_for_you": why,
                "example_code": FEATURE_CATALOG[name]["example_code"],
            }
        )
    return output


def claude_md_additions(
    repeated: list[tuple[str, int, str]],
) -> list[dict[str, str]]:
    additions: list[dict[str, str]] = []
    for _normalized, count, original in repeated:
        if count < 2:
            continue
        additions.append(
            {
                "addition": original,
                "why": f"You repeated this in {count} separate sessions. That is strong evidence it belongs in durable project guidance.",
                "prompt_scaffold": "Add under a workflow or validation section in CLAUDE.md.",
            }
        )
        if len(additions) == 3:
            break
    return additions


def usage_patterns(
    data: AggregatedData, repeated: list[tuple[str, int, str]]
) -> list[dict[str, str]]:
    patterns: list[dict[str, str]] = []

    if data.total_tool_errors > 0:
        patterns.append(
            {
                "title": "Ask For A Validation Pass",
                "suggestion": "Separate implementation from verification so the model knows the job is not done at the first patch.",
                "detail": "This is the fastest way to reduce avoidable retries when the first edit is plausible but not yet proven. It is especially useful in sessions that already include shell validation.",
                "copyable_prompt": "Make the minimal patch first, then run the relevant validation and tell me exactly what still fails before doing any more refactoring.",
            }
        )

    if data.total_interruptions > 0:
        patterns.append(
            {
                "title": "Split Plan From Execute",
                "suggestion": "Use one short turn to force a bounded plan before Claude starts changing files.",
                "detail": "Your interruptions suggest that the problem is often not effort but drift. A compact execution plan makes it easier to catch the wrong approach before the patch grows.",
                "copyable_prompt": "Before editing anything, give me a 3-step plan with the files you expect to touch and the validation you will run. Wait for approval.",
            }
        )

    if repeated:
        patterns.append(
            {
                "title": "Promote Repeated Constraints",
                "suggestion": "Move recurring instructions into persistent project guidance instead of restating them in chat.",
                "detail": "If the same constraint appears in multiple sessions, it is no longer session-specific. Turning it into durable guidance frees the conversation to focus on the task.",
                "copyable_prompt": "Review the last few sessions and extract the recurring instructions I keep repeating. Draft the exact CLAUDE.md additions you would recommend.",
            }
        )

    if data.sessions_using_task_agent == 0:
        patterns.append(
            {
                "title": "Use Parallel Exploration",
                "suggestion": "Ask Claude to split discovery work across agents when a question touches multiple subsystems.",
                "detail": "This is most useful when the next step is blocked on understanding several parts of the repo at once. It keeps the main thread focused on synthesis instead of raw search.",
                "copyable_prompt": "Use one agent to trace the failing code path, another to inspect tests, and then summarize the overlap before making changes.",
            }
        )

    return patterns[:3]


def on_the_horizon(data: AggregatedData) -> dict[str, Any]:
    opportunities = [
        {
            "title": "Patch Then Verify In Parallel",
            "whats_possible": "A stronger workflow is to let one thread patch while another thread prepares validation or regression checks. That shrinks the dead time between edit and confidence.",
            "how_to_try": "Use agents for exploration and keep the main thread for the final patch synthesis.",
            "copyable_prompt": "Use one agent to prepare the patch plan, another to identify the best validation commands, then merge the findings and implement the smallest safe fix.",
        },
        {
            "title": "Repo-Wide Maintenance Bursts",
            "whats_possible": "As models improve, the obvious next step is batching repetitive repo maintenance instead of handling one fix at a time. That includes lint cleanup, test migrations, and repeated mechanical edits.",
            "how_to_try": "Pair headless mode with a narrow validation command so the batch job has a hard stop condition.",
            "copyable_prompt": "Identify one mechanical issue repeated across the repo, fix it in the smallest safe batch, and stop if the validation command starts failing for a new reason.",
        },
        {
            "title": "Background Repair Loops",
            "whats_possible": "The longer-term opportunity is a workflow where Claude iterates against failing checks with less supervision and hands you a compact review packet when it converges.",
            "how_to_try": "Use scripts or CI entrypoints with headless mode so the loop can restart from the same validation target.",
            "copyable_prompt": "Treat the failing CI target as the contract. Iterate until it passes or you can prove the blocker is environmental, then summarize the exact diff and remaining risk.",
        },
    ]

    if data.total_tool_errors == 0:
        opportunities[2]["whats_possible"] = (
            "Because your sessions are not dominated by tool failure, you are a good candidate for longer autonomous repair loops with less supervision."
        )

    return {
        "intro": "The next gains are less about better autocomplete and more about moving whole workflows into repeatable loops.",
        "opportunities": opportunities,
    }


def fun_ending_from_heuristics(
    facets: dict[str, SessionFacets],
) -> dict[str, str]:
    repeated = repeated_instructions(facets)
    if repeated and repeated[0][1] >= 2:
        return {
            "headline": f'"{repeated[0][2]}" kept coming back across sessions.',
            "detail": "That is usually a sign that the workflow wants a durable default instead of another reminder in chat.",
        }

    positive = [
        facet
        for facet in facets.values()
        if facet.user_satisfaction_counts.get("happy", 0) > 0
        or facet.user_satisfaction_counts.get("satisfied", 0) > 0
    ]
    if positive:
        chosen = positive[0]
        return {
            "headline": truncate(chosen.underlying_goal, 90),
            "detail": chosen.brief_summary,
        }

    if facets:
        chosen = next(iter(facets.values()))
        return {
            "headline": truncate(chosen.underlying_goal, 90),
            "detail": chosen.brief_summary,
        }

    return {}


def at_a_glance(
    interaction: dict[str, str],
    what_works: dict[str, Any],
    friction: dict[str, Any],
    suggestions: dict[str, Any],
    horizon: dict[str, Any],
) -> dict[str, str]:
    working = interaction.get("key_pattern", "")
    if what_works.get("impressive_workflows"):
        first = what_works["impressive_workflows"][0]["title"]
        working = f"{working} Your strongest sessions usually end with {first.lower()}."

    hindering = friction.get("categories", [{}])[0]
    hindering_text = ""
    if hindering:
        hindering_text = f"The main drag is {str(hindering.get('category', '')).lower()}. You lose momentum when the first path is not cheap to validate or redirect."

    feature_names = [
        item["feature"] for item in suggestions.get("features_to_try", [])
    ]
    quick_wins = (
        ", ".join(feature_names[:2])
        if feature_names
        else "promoting repeated guidance into CLAUDE.md"
    )
    quick_wins_text = f"The fastest upgrades are {quick_wins}. They directly target the repeated overhead in these sessions."

    opportunity_names = [
        item["title"] for item in horizon.get("opportunities", [])
    ]
    ambitious = (
        ", ".join(opportunity_names[:2])
        if opportunity_names
        else "patch-and-verify loops"
    )
    ambitious_text = f"The next workflow to prepare for is {ambitious.lower()}. Better models will make longer repair and validation loops much more practical."

    return {
        "whats_working": working,
        "whats_hindering": hindering_text,
        "quick_wins": quick_wins_text,
        "ambitious_workflows": ambitious_text,
    }


def generate_heuristic_insights(
    data: AggregatedData, facets: dict[str, SessionFacets]
) -> dict[str, Any]:
    project_areas = project_areas_from_heuristics(data)
    interaction = interaction_style_from_heuristics(data)
    works = what_works_from_heuristics(data)
    friction = friction_analysis_from_heuristics(data, facets)
    repeated = repeated_instructions(facets)
    suggestions = {
        "claude_md_additions": claude_md_additions(repeated),
        "features_to_try": feature_suggestions(data, repeated),
        "usage_patterns": usage_patterns(data, repeated),
    }
    horizon = on_the_horizon(data)
    fun = fun_ending_from_heuristics(facets)

    return {
        "project_areas": {"areas": project_areas},
        "interaction_style": interaction,
        "what_works": works,
        "friction_analysis": friction,
        "suggestions": suggestions,
        "on_the_horizon": horizon,
        "fun_ending": fun,
        "at_a_glance": at_a_glance(
            interaction, works, friction, suggestions, horizon
        ),
    }


def escape_html_with_bold(text: str) -> str:
    escaped = html.escape(text or "")
    return re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", escaped)


def markdown_to_html(text: str) -> str:
    paragraphs = [part for part in text.split("\n\n") if part.strip()]
    rendered = []
    for paragraph in paragraphs:
        escaped = escape_html_with_bold(paragraph).replace("\n", "<br>")
        rendered.append(f"<p>{escaped}</p>")
    return "\n".join(rendered)


def generate_bar_chart(
    data: dict[str, int],
    color: str,
    max_items: int = 6,
    fixed_order: list[str] | None = None,
) -> str:
    if fixed_order:
        entries = [
            (key, data[key]) for key in fixed_order if data.get(key, 0) > 0
        ]
    else:
        entries = sorted(data.items(), key=lambda item: item[1], reverse=True)[
            :max_items
        ]
    if not entries:
        return '<p class="empty">No data</p>'
    max_value = max(count for _label, count in entries) or 1
    rows = []
    for label, count in entries:
        width = (count / max_value) * 100
        rows.append(
            f'<div class="bar-row"><div class="bar-label">{html.escape(safe_title(label))}</div>'
            f'<div class="bar-track"><div class="bar-fill" style="width:{width:.2f}%;background:{color}"></div></div>'
            f'<div class="bar-value">{count}</div></div>'
        )
    return "\n".join(rows)


def generate_response_time_histogram(times: list[float]) -> str:
    if not times:
        return '<p class="empty">No response time data</p>'
    buckets: list[tuple[str, int]] = [
        ("2-10s", 0),
        ("10-30s", 0),
        ("30s-1m", 0),
        ("1-2m", 0),
        ("2-5m", 0),
        ("5-15m", 0),
        (">15m", 0),
    ]
    bucket_counts = dict(buckets)
    for value in times:
        if value < 10:
            bucket_counts["2-10s"] += 1
        elif value < 30:
            bucket_counts["10-30s"] += 1
        elif value < 60:
            bucket_counts["30s-1m"] += 1
        elif value < 120:
            bucket_counts["1-2m"] += 1
        elif value < 300:
            bucket_counts["2-5m"] += 1
        elif value < 900:
            bucket_counts["5-15m"] += 1
        else:
            bucket_counts[">15m"] += 1
    return generate_bar_chart(
        bucket_counts, "#6366f1", max_items=len(bucket_counts)
    )


def generate_time_of_day_chart(hours: list[int]) -> str:
    if not hours:
        return '<p class="empty">No time data</p>'
    periods = {
        "Morning (6-12)": range(6, 12),
        "Afternoon (12-18)": range(12, 18),
        "Evening (18-24)": range(18, 24),
        "Night (0-6)": range(6),
    }
    counts = dict.fromkeys(periods, 0)
    for hour in hours:
        for label, hour_range in periods.items():
            if hour in hour_range:
                counts[label] += 1
                break
    return generate_bar_chart(counts, "#8b5cf6", max_items=len(counts))


def render_cards(
    cards: list[dict[str, Any]],
    title_key: str,
    description_key: str,
    class_name: str,
) -> str:
    if not cards:
        return ""
    parts = ['<div class="card-stack">']
    for card in cards:
        parts.append(
            f'<div class="{class_name}">'
            f'<div class="card-title">{html.escape(str(card.get(title_key) or ""))}</div>'
            f'<div class="card-body">{html.escape(str(card.get(description_key) or ""))}</div>'
            "</div>"
        )
    parts.append("</div>")
    return "\n".join(parts)


def generate_html_report(
    data: AggregatedData, insights: dict[str, Any]
) -> str:
    at_a_glance = insights.get("at_a_glance") or {}
    project_areas = (insights.get("project_areas") or {}).get("areas") or []
    interaction_style = insights.get("interaction_style") or {}
    what_works = insights.get("what_works") or {}
    friction = insights.get("friction_analysis") or {}
    suggestions = insights.get("suggestions") or {}
    horizon = insights.get("on_the_horizon") or {}
    fun = insights.get("fun_ending") or {}

    glance_sections = []
    for label, key in (
        ("What's working", "whats_working"),
        ("What's hindering you", "whats_hindering"),
        ("Quick wins to try", "quick_wins"),
        ("Ambitious workflows", "ambitious_workflows"),
    ):
        if at_a_glance.get(key):
            glance_sections.append(
                f'<div class="glance-section"><strong>{html.escape(label)}:</strong> {escape_html_with_bold(str(at_a_glance[key]))}</div>'
            )

    project_cards = []
    for area in project_areas:
        project_cards.append(
            '<div class="project-card">'
            f'<div class="project-header"><span class="project-name">{html.escape(str(area.get("name") or ""))}</span>'
            f'<span class="project-count">~{int(area.get("session_count") or 0)} sessions</span></div>'
            f'<div class="project-desc">{html.escape(str(area.get("description") or ""))}</div>'
            "</div>"
        )

    what_works_cards = []
    for item in what_works.get("impressive_workflows") or []:
        what_works_cards.append(
            '<div class="success-card">'
            f'<div class="card-title">{html.escape(str(item.get("title") or ""))}</div>'
            f'<div class="card-body">{html.escape(str(item.get("description") or ""))}</div>'
            "</div>"
        )

    friction_cards = []
    for item in friction.get("categories") or []:
        examples_html = ""
        examples = item.get("examples") or []
        if examples:
            examples_html = (
                "<ul>"
                + "".join(
                    f"<li>{html.escape(str(example))}</li>"
                    for example in examples
                )
                + "</ul>"
            )
        friction_cards.append(
            '<div class="warning-card">'
            f'<div class="card-title">{html.escape(str(item.get("category") or ""))}</div>'
            f'<div class="card-body">{html.escape(str(item.get("description") or ""))}</div>'
            f"{examples_html}"
            "</div>"
        )

    feature_cards = []
    for item in suggestions.get("features_to_try") or []:
        code = item.get("example_code") or ""
        feature_cards.append(
            '<div class="feature-card">'
            f'<div class="card-title">{html.escape(str(item.get("feature") or ""))}</div>'
            f'<div class="card-body">{html.escape(str(item.get("one_liner") or ""))}</div>'
            f'<div class="subtle"><strong>Why for you:</strong> {html.escape(str(item.get("why_for_you") or ""))}</div>'
            f"<pre>{html.escape(str(code))}</pre>"
            "</div>"
        )

    addition_cards = []
    for item in suggestions.get("claude_md_additions") or []:
        addition_cards.append(
            '<div class="feature-card">'
            f'<div class="card-title">{html.escape(str(item.get("addition") or ""))}</div>'
            f'<div class="subtle">{html.escape(str(item.get("why") or ""))}</div>'
            f'<div class="muted">{html.escape(str(item.get("prompt_scaffold") or ""))}</div>'
            "</div>"
        )

    pattern_cards = []
    for item in suggestions.get("usage_patterns") or []:
        pattern_cards.append(
            '<div class="info-card">'
            f'<div class="card-title">{html.escape(str(item.get("title") or ""))}</div>'
            f'<div class="card-body">{html.escape(str(item.get("suggestion") or ""))}</div>'
            f'<div class="subtle">{html.escape(str(item.get("detail") or ""))}</div>'
            f"<pre>{html.escape(str(item.get('copyable_prompt') or ''))}</pre>"
            "</div>"
        )

    horizon_cards = []
    for item in horizon.get("opportunities") or []:
        horizon_cards.append(
            '<div class="future-card">'
            f'<div class="card-title">{html.escape(str(item.get("title") or ""))}</div>'
            f'<div class="card-body">{html.escape(str(item.get("whats_possible") or ""))}</div>'
            f'<div class="subtle"><strong>Getting started:</strong> {html.escape(str(item.get("how_to_try") or ""))}</div>'
            f"<pre>{html.escape(str(item.get('copyable_prompt') or ''))}</pre>"
            "</div>"
        )

    css = """
* { box-sizing: border-box; }
body {
  margin: 0;
  font-family: "SF Pro Text", "Inter", -apple-system, BlinkMacSystemFont, sans-serif;
  background: #f8fafc;
  color: #334155;
  line-height: 1.6;
}
.container {
  max-width: 980px;
  margin: 0 auto;
  padding: 40px 20px 80px;
}
h1 {
  margin: 0 0 8px;
  font-size: 34px;
  color: #0f172a;
}
h2 {
  margin: 40px 0 14px;
  font-size: 20px;
  color: #0f172a;
}
.subtitle {
  margin: 0 0 28px;
  color: #64748b;
}
.glance {
  padding: 20px 24px;
  background: linear-gradient(135deg, #fef3c7, #fde68a);
  border: 1px solid #f59e0b;
  border-radius: 14px;
  margin-bottom: 28px;
}
.glance-title {
  margin-bottom: 12px;
  font-size: 15px;
  font-weight: 700;
  color: #92400e;
}
.glance-section {
  margin-bottom: 10px;
  color: #78350f;
}
.stats {
  display: grid;
  grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
  gap: 12px;
  margin: 24px 0 36px;
}
.stat {
  background: white;
  border: 1px solid #e2e8f0;
  border-radius: 12px;
  padding: 14px 16px;
}
.stat-value {
  font-size: 24px;
  font-weight: 700;
  color: #0f172a;
}
.stat-label {
  font-size: 11px;
  letter-spacing: 0.08em;
  text-transform: uppercase;
  color: #64748b;
}
.grid {
  display: grid;
  grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
  gap: 18px;
  margin: 18px 0;
}
.panel {
  background: white;
  border: 1px solid #e2e8f0;
  border-radius: 14px;
  padding: 16px;
}
.panel-title {
  margin-bottom: 12px;
  font-size: 12px;
  text-transform: uppercase;
  letter-spacing: 0.08em;
  color: #64748b;
}
.bar-row {
  display: flex;
  align-items: center;
  gap: 8px;
  margin-bottom: 8px;
}
.bar-label {
  width: 120px;
  font-size: 12px;
  color: #475569;
}
.bar-track {
  flex: 1;
  height: 8px;
  background: #f1f5f9;
  border-radius: 999px;
  overflow: hidden;
}
.bar-fill {
  height: 100%;
  border-radius: 999px;
}
.bar-value {
  width: 36px;
  text-align: right;
  font-size: 12px;
  color: #64748b;
}
.project-list,
.card-stack {
  display: flex;
  flex-direction: column;
  gap: 12px;
}
.project-card,
.success-card,
.warning-card,
.feature-card,
.info-card,
.future-card {
  background: white;
  border-radius: 12px;
  padding: 16px;
  border: 1px solid #e2e8f0;
}
.success-card {
  background: #f0fdf4;
  border-color: #86efac;
}
.warning-card {
  background: #fef2f2;
  border-color: #fca5a5;
}
.feature-card {
  background: #eff6ff;
  border-color: #bfdbfe;
}
.info-card {
  background: #f0f9ff;
  border-color: #7dd3fc;
}
.future-card {
  background: #faf5ff;
  border-color: #c4b5fd;
}
.project-header {
  display: flex;
  justify-content: space-between;
  gap: 8px;
  margin-bottom: 8px;
}
.project-name,
.card-title {
  font-weight: 700;
  color: #0f172a;
}
.project-count {
  font-size: 12px;
  color: #64748b;
}
.project-desc,
.card-body {
  color: #475569;
}
.subtle {
  margin-top: 8px;
  color: #334155;
  font-size: 14px;
}
.muted {
  margin-top: 6px;
  color: #64748b;
  font-size: 13px;
}
pre {
  white-space: pre-wrap;
  margin: 12px 0 0;
  padding: 12px;
  border-radius: 10px;
  background: #f8fafc;
  border: 1px solid #e2e8f0;
  font-size: 12px;
  overflow-x: auto;
}
.empty {
  color: #94a3b8;
}
.narrative {
  background: white;
  border: 1px solid #e2e8f0;
  border-radius: 14px;
  padding: 18px;
}
.narrative p {
  margin: 0 0 12px;
}
.fun {
  margin-top: 40px;
  padding: 24px;
  border-radius: 16px;
  background: linear-gradient(135deg, #fef3c7, #fde68a);
  border: 1px solid #f59e0b;
  text-align: center;
}
.fun-quote {
  font-size: 19px;
  font-weight: 700;
  color: #78350f;
}
.fun-detail {
  margin-top: 8px;
  color: #92400e;
}
ul {
  margin: 10px 0 0 18px;
}
@media (max-width: 640px) {
  .bar-label {
    width: 92px;
  }
}
"""

    html_parts = [
        "<!DOCTYPE html>",
        "<html>",
        "<head>",
        '<meta charset="utf-8">',
        "<title>Claude Code Insights (Python)</title>",
        f"<style>{css}</style>",
        "</head>",
        "<body>",
        '<div class="container">',
        "<h1>Claude Code Insights (Python)</h1>",
        f'<p class="subtitle">{data.total_messages:,} messages across {data.total_sessions} sessions | {html.escape(data.date_range.get("start", ""))} to {html.escape(data.date_range.get("end", ""))}</p>',
    ]

    if glance_sections:
        html_parts.append(
            '<div class="glance"><div class="glance-title">At a Glance</div>'
        )
        html_parts.extend(glance_sections)
        html_parts.append("</div>")

    html_parts.append(
        '<div class="stats">'
        f'<div class="stat"><div class="stat-value">{data.total_messages:,}</div><div class="stat-label">Messages</div></div>'
        f'<div class="stat"><div class="stat-value">{data.total_sessions}</div><div class="stat-label">Sessions</div></div>'
        f'<div class="stat"><div class="stat-value">{data.days_active}</div><div class="stat-label">Days Active</div></div>'
        f'<div class="stat"><div class="stat-value">{data.total_files_modified}</div><div class="stat-label">Files Modified</div></div>'
        f'<div class="stat"><div class="stat-value">{data.git_commits}</div><div class="stat-label">Commits</div></div>'
        "</div>"
    )

    if project_cards:
        html_parts.append("<h2>What You Work On</h2>")
        html_parts.append('<div class="project-list">')
        html_parts.extend(project_cards)
        html_parts.append("</div>")

    if interaction_style.get("narrative"):
        html_parts.append("<h2>How You Use Claude Code</h2>")
        html_parts.append(
            f'<div class="narrative">{markdown_to_html(str(interaction_style["narrative"]))}</div>'
        )

    html_parts.append('<div class="grid">')
    html_parts.append(
        f'<div class="panel"><div class="panel-title">What You Wanted</div>{generate_bar_chart(data.goal_categories, "#2563eb")}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Top Tools Used</div>{generate_bar_chart(data.tool_counts, "#0891b2")}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Languages</div>{generate_bar_chart(data.languages, "#10b981")}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Session Types</div>{generate_bar_chart(data.session_types, "#8b5cf6")}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Response Time Distribution</div>{generate_response_time_histogram(data.user_response_times)}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Messages By Time Of Day</div>{generate_time_of_day_chart(data.message_hours)}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Outcomes</div>{generate_bar_chart(data.outcomes, "#7c3aed", fixed_order=OUTCOME_ORDER)}</div>'
    )
    html_parts.append(
        f'<div class="panel"><div class="panel-title">Satisfaction</div>{generate_bar_chart(data.satisfaction, "#eab308", fixed_order=SATISFACTION_ORDER)}</div>'
    )
    html_parts.append("</div>")

    if what_works_cards:
        html_parts.append("<h2>Impressive Things You Did</h2>")
        if what_works.get("intro"):
            html_parts.append(
                f'<p class="subtitle">{html.escape(str(what_works["intro"]))}</p>'
            )
        html_parts.append('<div class="card-stack">')
        html_parts.extend(what_works_cards)
        html_parts.append("</div>")

    if friction_cards:
        html_parts.append("<h2>Where Things Go Wrong</h2>")
        if friction.get("intro"):
            html_parts.append(
                f'<p class="subtitle">{html.escape(str(friction["intro"]))}</p>'
            )
        html_parts.append('<div class="card-stack">')
        html_parts.extend(friction_cards)
        html_parts.append("</div>")

    if addition_cards:
        html_parts.append("<h2>Suggested CLAUDE.md Additions</h2>")
        html_parts.append('<div class="card-stack">')
        html_parts.extend(addition_cards)
        html_parts.append("</div>")

    if feature_cards:
        html_parts.append("<h2>Existing Claude Code Features To Try</h2>")
        html_parts.append('<div class="card-stack">')
        html_parts.extend(feature_cards)
        html_parts.append("</div>")

    if pattern_cards:
        html_parts.append("<h2>New Ways To Use Claude Code</h2>")
        html_parts.append('<div class="card-stack">')
        html_parts.extend(pattern_cards)
        html_parts.append("</div>")

    if horizon_cards:
        html_parts.append("<h2>On The Horizon</h2>")
        if horizon.get("intro"):
            html_parts.append(
                f'<p class="subtitle">{html.escape(str(horizon["intro"]))}</p>'
            )
        html_parts.append('<div class="card-stack">')
        html_parts.extend(horizon_cards)
        html_parts.append("</div>")

    if fun.get("headline"):
        html_parts.append('<div class="fun">')
        html_parts.append(
            f'<div class="fun-quote">"{html.escape(str(fun["headline"]))}"</div>'
        )
        if fun.get("detail"):
            html_parts.append(
                f'<div class="fun-detail">{html.escape(str(fun["detail"]))}</div>'
            )
        html_parts.append("</div>")

    html_parts.extend(["</div>", "</body>", "</html>"])
    return "\n".join(html_parts)


def build_export_data(
    data: AggregatedData,
    insights: dict[str, Any],
    facets: dict[str, SessionFacets],
    project_scope_prefix: str | None = None,
) -> dict[str, Any]:
    facets_summary = {
        "total": len(facets),
        "goal_categories": {},
        "outcomes": {},
        "satisfaction": {},
        "friction": {},
    }

    for facet in facets.values():
        for key, count in facet.goal_categories.items():
            if count > 0:
                facets_summary["goal_categories"][key] = (
                    facets_summary["goal_categories"].get(key, 0) + count
                )
        facets_summary["outcomes"][facet.outcome] = (
            facets_summary["outcomes"].get(facet.outcome, 0) + 1
        )
        for key, count in facet.user_satisfaction_counts.items():
            if count > 0:
                facets_summary["satisfaction"][key] = (
                    facets_summary["satisfaction"].get(key, 0) + count
                )
        for key, count in facet.friction_counts.items():
            if count > 0:
                facets_summary["friction"][key] = (
                    facets_summary["friction"].get(key, 0) + count
                )

    return {
        "metadata": {
            "username": os.getenv("USER") or "unknown",
            "generated_at": datetime.now(tz=timezone.utc).isoformat(),
            "claude_code_version": "python-port",
            "date_range": data.date_range,
            "session_count": data.total_sessions,
            "project_scope_prefix": project_scope_prefix,
        },
        "aggregated_data": asdict(data),
        "insights": insights,
        "facets_summary": facets_summary,
    }


def generate_usage_report(args: argparse.Namespace) -> dict[str, Any]:
    cache_dir: Path = args.cache_dir
    ensure_dir(cache_dir)
    scope = build_project_scope(args.project_path_prefix)

    scanned_sessions = scan_all_sessions(args.projects_dir)
    total_sessions_scanned = 0

    metas: list[SessionMeta] = []
    logs_for_facets: dict[str, SessionLog] = {}
    uncached_sessions: list[dict[str, Any]] = []

    for item in scanned_sessions:
        cached = load_cached_session_meta(cache_dir, item["session_id"])
        if cached and matches_project_scope(cached.project_path, scope):
            metas.append(cached)
            total_sessions_scanned += 1
        elif len(uncached_sessions) < args.max_sessions_load:
            uncached_sessions.append(item)

    for item in uncached_sessions:
        logs = load_all_logs_from_session_file(item["path"])
        if not logs:
            continue
        best_log = choose_best_log(logs, scope)
        if best_log is None:
            continue
        if is_meta_session(best_log):
            continue
        meta = log_to_session_meta(best_log)
        metas.append(meta)
        total_sessions_scanned += 1
        logs_for_facets[meta.session_id] = best_log
        save_session_meta(cache_dir, meta)

    best_by_session: dict[str, SessionMeta] = {}
    for meta in metas:
        current = best_by_session.get(meta.session_id)
        if (
            current is None
            or meta.user_message_count > current.user_message_count
            or (
                meta.user_message_count == current.user_message_count
                and meta.duration_minutes > current.duration_minutes
            )
        ):
            best_by_session[meta.session_id] = meta

    metas = sorted(
        best_by_session.values(),
        key=lambda meta: meta.start_time,
        reverse=True,
    )
    substantive_metas = [
        meta for meta in metas if is_substantive_session(meta)
    ]

    facets: dict[str, SessionFacets] = {}
    facet_candidates: list[SessionMeta] = []

    for meta in substantive_metas:
        cached = load_cached_facets(cache_dir, meta.session_id)
        if cached:
            facets[meta.session_id] = cached
        else:
            facet_candidates.append(meta)

    for meta in facet_candidates[: args.max_facet_extractions]:
        log = logs_for_facets.get(meta.session_id)
        if log is None:
            path = Path(meta.project_path)
            session_file = None
            if path:
                candidate = Path(meta.project_path)
                del candidate
            for item in scanned_sessions:
                if item["session_id"] == meta.session_id:
                    session_file = item["path"]
                    break
            if session_file:
                logs = load_all_logs_from_session_file(session_file)
                if logs:
                    log = choose_best_log(logs, scope)
                    if log is None:
                        continue
                    logs_for_facets[meta.session_id] = log
        if not log:
            continue
        facet = heuristic_extract_facets(log, meta)
        facets[facet.session_id] = facet
        save_facets(cache_dir, facet)

    substantive_facets = {
        session_id: facet
        for session_id, facet in facets.items()
        if not is_minimal_session(facet)
    }
    substantive_sessions = [
        meta
        for meta in substantive_metas
        if meta.session_id in substantive_facets
    ]
    if not substantive_sessions:
        substantive_sessions = substantive_metas

    aggregated = aggregate_data(substantive_sessions, substantive_facets)
    aggregated.total_sessions_scanned = total_sessions_scanned
    insights = generate_heuristic_insights(aggregated, substantive_facets)

    output_html = args.output_html or (cache_dir / "report.html")
    output_json = args.output_json or (cache_dir / "report.json")

    ensure_dir(output_html.parent)
    ensure_dir(output_json.parent)
    output_html.write_text(
        generate_html_report(aggregated, insights), encoding="utf-8"
    )
    output_json.write_text(
        json.dumps(
            build_export_data(
                aggregated,
                insights,
                substantive_facets,
                args.project_path_prefix,
            ),
            indent=2,
        ),
        encoding="utf-8",
    )

    return {
        "html_path": output_html,
        "json_path": output_json,
        "data": aggregated,
        "insights": insights,
        "facets": substantive_facets,
    }


def print_summary(result: dict[str, Any]) -> None:
    data: AggregatedData = result["data"]
    at_a_glance = result["insights"].get("at_a_glance") or {}
    print(f"Wrote HTML report: {result['html_path']}")
    print(f"Wrote JSON export: {result['json_path']}")
    print(
        f"Analyzed {data.total_sessions} sessions "
        f"({data.total_messages} user messages, {round(data.total_duration_hours)}h) "
        f"from {data.date_range.get('start', '')} to {data.date_range.get('end', '')}"
    )
    if at_a_glance.get("whats_working"):
        print(f"What's working: {at_a_glance['whats_working']}")
    if at_a_glance.get("quick_wins"):
        print(f"Quick wins: {at_a_glance['quick_wins']}")


def main() -> int:
    args = parse_args()
    result = generate_usage_report(args)
    print_summary(result)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())