perf(analytics): use rfind and local json.loads (#44)

* perf(analytics): use rfind and local json.loads for hot paths Replace Path().suffix with string rfind for extension extraction, use local json.loads binding and bytes split for JSONL parsing. * fix: use splitlines and preserve extensionless file behavior split("\n") mishandles \r\n line endings. The early return on extensionless files changed behavior vs the original Path().suffix which returned "" and fell through. Use splitlines() and let extensionless files fall through with lang=None. * style: use ternary for extensionless file check per SIM108 * Add blackbox benchmark VM infra D2s_v5 (non-burstable, 2 vCPU, 8 GB) with cloud-init provisioning, CPU-pinned benchmarks, and A/B comparison scripts. --------- Co-authored-by: codeflash[bot] <codeflash[bot]@users.noreply.github.com>
2026-05-04 18:25:19 +00:00 · 2026-04-29 03:22:42 -05:00 · 2026-04-29 03:22:42 -05:00 · 1ff2a76152
commit 1ff2a76152
parent 1e8cbbede4
3 changed files with 376 additions and 5 deletions
--- a/.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml
+++ b/.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml
@ -0,0 +1,256 @@
+#cloud-config
+#
+# Benchmark VM provisioning for blackbox package (codeflash-agent monorepo)
+#
+# Pure Python package -- no system-level deps beyond build tools.
+# Private repo: requires SSH agent forwarding for clone.
+#
+# Two-phase setup:
+#   Phase 1 (cloud-init): packages, hyperfine, uv
+#   Phase 2 (manual):     ssh -A, clone, uv sync, baseline benchmarks
+#
+# Usage:
+#   az vm create ... --custom-data infra/cloud-init.yaml
+#   bash infra/vm-manage.sh ssh
+#   bash ~/setup.sh
+#
+# VM: Azure Standard_D2s_v5 (2 vCPU, 8 GB RAM, general-purpose)
+#   Smallest non-burstable option -- blackbox is CPU-bound, not memory-bound.
+#   Use taskset -c 0 to pin benchmarks to 1 core for consistent results.
+#   Non-burstable ensures consistent CPU -- no credit depletion or turbo variability.
+
+package_update: true
+packages:
+  - git
+  - build-essential
+  - curl
+
+write_files:
+  # --- Benchmark: blackbox functions (main vs branch) ---
+  - path: /home/azureuser/bench/bench_blackbox.sh
+    owner: azureuser:azureuser
+    permissions: "0755"
+    defer: true
+    content: |
+      #!/usr/bin/env bash
+      set -euo pipefail
+      BRANCH="${1:-HEAD}"
+      PYTHON=.venv/bin/python
+      cd ~/codeflash-agent
+
+      echo "=== Benchmarking blackbox: $BRANCH ==="
+      git fetch origin
+      git checkout "$BRANCH"
+      uv sync
+
+      taskset -c 0 $PYTHON /home/azureuser/bench/bench_functions.py \
+        ~/codeflash-agent
+
+  # --- Benchmark: A/B comparison ---
+  - path: /home/azureuser/bench/bench_ab.sh
+    owner: azureuser:azureuser
+    permissions: "0755"
+    defer: true
+    content: |
+      #!/usr/bin/env bash
+      set -euo pipefail
+      BASE="${1:?Usage: bench_ab.sh <base-branch> <opt-branch>}"
+      OPT="${2:?Usage: bench_ab.sh <base-branch> <opt-branch>}"
+
+      echo "=== A/B comparison: $BASE vs $OPT ==="
+      echo ""
+      echo "--- BASELINE: $BASE ---"
+      bash ~/bench/bench_blackbox.sh "$BASE"
+      echo ""
+      echo "--- OPTIMIZED: $OPT ---"
+      bash ~/bench/bench_blackbox.sh "$OPT"
+
+  # --- Benchmark: Python script for per-function timing ---
+  - path: /home/azureuser/bench/bench_functions.py
+    owner: azureuser:azureuser
+    permissions: "0644"
+    defer: true
+    content: |
+      """Benchmark blackbox hot-path functions -- min-of-5 runs per function."""
+      from __future__ import annotations
+
+      import inspect
+      import json
+      import sys
+      import tempfile
+      import timeit
+      from collections import Counter
+      from pathlib import Path
+
+      sys.path.insert(0, str(Path(sys.argv[1]) / "packages/blackbox/src"))
+
+      from blackbox.models import (
+          CODEFLASH_AGENT_PREFIXES,
+          CODEFLASH_COMMANDS,
+          CODEFLASH_SKILLS,
+          LogEntry,
+      )
+
+
+      def _build_transcript(n_lines: int = 500) -> Path:
+          entries = []
+          for i in range(n_lines):
+              ts = f"2025-01-15T10:{i // 60:02d}:{i % 60:02d}Z"
+              if i % 3 == 0:
+                  entries.append(json.dumps({
+                      "type": "user", "timestamp": ts, "sessionId": "sess-bench",
+                      "cwd": "/home/user/project", "gitBranch": "feature-x",
+                      "message": {"content": f"User message {i}"},
+                  }))
+              elif i % 3 == 1:
+                  entries.append(json.dumps({
+                      "type": "assistant", "timestamp": ts,
+                      "message": {
+                          "content": [
+                              {"type": "text", "text": f"Step {i}."},
+                              {"type": "tool_use", "id": f"tool_{i}", "name": "Write",
+                               "input": {"file_path": f"/project/mod_{i % 10}.py",
+                                         "content": "def f():\n    pass\n"}},
+                              {"type": "tool_use", "id": f"tool_{i}b", "name": "Bash",
+                               "input": {"command": f"git commit -m 'step {i}'"}},
+                          ],
+                          "usage": {"input_tokens": 1000, "output_tokens": 200,
+                                    "cache_read_input_tokens": 50,
+                                    "cache_creation_input_tokens": 25},
+                      },
+                  }))
+              else:
+                  entries.append(json.dumps({
+                      "type": "user", "timestamp": ts,
+                      "message": {"content": [
+                          {"type": "tool_result", "tool_use_id": f"tool_{i - 1}",
+                           "is_error": i % 15 == 0,
+                           "content": "OK" if i % 15 != 0 else "Error: exit code 1"}
+                      ]},
+                  }))
+          tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False, mode="w")
+          tmp.write("\n".join(entries))
+          tmp.close()
+          return Path(tmp.name)
+
+
+      def _build_log_entries(n: int = 200) -> list[LogEntry]:
+          entries = []
+          levels = ["assistant", "tool_call", "tool_result", "status", "error", "info"]
+          for i in range(n):
+              lvl = levels[i % len(levels)]
+              entries.append(LogEntry(
+                  timestamp=1700000000.0 + i,
+                  source="claude" if lvl in ("assistant", "tool_call") else "user",
+                  level=lvl,
+                  message=f"Sample message {i} with /path/to/some/file.py content",
+                  data={"tool_name": "Write", "preview": "edit file.py"}
+                        if lvl == "tool_call" else {},
+              ))
+          return entries
+
+
+      def best_of(fn, rounds: int = 5) -> float:
+          return min(fn() for _ in range(rounds))
+
+
+      def main() -> None:
+          transcript = _build_transcript(500)
+          entries = _build_log_entries(200)
+
+          from blackbox.analytics import extract_meta, track_file_changes
+          from blackbox.dashboard.transcript import parse_transcript
+          from blackbox.dashboard.rendering import render_log_html
+
+          sig = inspect.signature(track_file_changes)
+          has_languages = "languages" in sig.parameters
+
+          tool_inputs = [
+              ("Write", {"file_path": "/project/src/main.py", "content": "x = 1\ny = 2\n"}),
+              ("Edit", {"file_path": "/project/src/utils.py", "old_string": "a", "new_string": "b"}),
+              ("Write", {"file_path": "/project/README.md", "content": "hello"}),
+              ("Write", {"file_path": "/project/Makefile", "content": "all:"}),
+              ("Write", {"file_path": "/project/src/app.tsx", "content": "export default () => {}"}),
+          ]
+          test_vals = [
+              "codeflash-python", "codeflash", "random-agent", "codeflash-review",
+              "/optimize", "/status", "unknown-cmd", "/benchmark",
+              "other-prefix", "codeflash-researcher",
+          ]
+
+          # Warmup
+          extract_meta(transcript)
+          parse_transcript(transcript)
+          for e in entries:
+              render_log_html(e)
+
+          t1 = best_of(lambda: timeit.timeit(
+              lambda: extract_meta(transcript), number=200) / 200)
+
+          def _track():
+              f: set[str] = set()
+              langs: Counter[str] = Counter()
+              for tn, ti in tool_inputs:
+                  if has_languages:
+                      track_file_changes(tn, ti, f, langs)
+                  else:
+                      track_file_changes(tn, ti, f)
+          t2 = best_of(lambda: timeit.timeit(_track, number=10000) / 10000)
+
+          t3 = best_of(lambda: timeit.timeit(
+              lambda: parse_transcript(transcript), number=200) / 200)
+
+          def _render():
+              for e in entries:
+                  render_log_html(e)
+          t4 = best_of(lambda: timeit.timeit(_render, number=1000) / 1000)
+
+          def _member():
+              for v in test_vals:
+                  _ = v in CODEFLASH_AGENT_PREFIXES
+                  _ = v in CODEFLASH_SKILLS
+                  _ = v in CODEFLASH_COMMANDS
+          t5 = best_of(lambda: timeit.timeit(_member, number=100000) / 100000)
+
+          print(f"extract_meta        {t1*1000:.4f} ms")
+          print(f"track_file_changes  {t2*1000:.4f} ms")
+          print(f"parse_transcript    {t3*1000:.4f} ms")
+          print(f"render_log_html     {t4*1000:.4f} ms")
+          print(f"membership          {t5*1000:.6f} ms")
+          print(f"TOTAL               {(t1+t2+t3+t4+t5)*1000:.4f} ms")
+
+          transcript.unlink(missing_ok=True)
+
+
+      if __name__ == "__main__":
+          main()
+
+  # --- Post-provision setup (run manually after ssh -A) ---
+  - path: /home/azureuser/setup.sh
+    owner: azureuser:azureuser
+    permissions: "0755"
+    defer: true
+    content: |
+      #!/usr/bin/env bash
+      set -euo pipefail
+      export PATH="$HOME/.local/bin:$PATH"
+
+      echo "=== Cloning codeflash-agent ==="
+      git clone git@github.com:codeflash-ai/codeflash-agent.git ~/codeflash-agent
+      cd ~/codeflash-agent
+
+      echo "=== Installing dependencies ==="
+      uv sync
+
+      echo "=== Creating results directory ==="
+      mkdir -p ~/results
+
+      echo "=== Verifying installation ==="
+      .venv/bin/python -c 'from blackbox.models import LogEntry; print("OK")'
+
+      echo "=== Done ==="
+
+runcmd:
+  - wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb
+  - dpkg -i /tmp/hyperfine.deb
+  - su - azureuser -c 'curl -LsSf https://astral.sh/uv/install.sh | sh'
--- a/.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh
+++ b/.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh
@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+#
+# Azure benchmark VM lifecycle management for blackbox package
+#
+# Usage:
+#   bash infra/vm-manage.sh {create|start|stop|ip|ssh|bench <branch>|ab <base> <opt>|destroy}
+
+set -euo pipefail
+
+RG="blackbox-BENCH-RG"
+VM="blackbox-bench"
+REGION="westus2"
+SIZE="Standard_D2s_v5"
+IMAGE="Canonical:ubuntu-24_04-lts:server:latest"
+SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519.pub}"
+
+case "${1:-help}" in
+  create)
+    if [ ! -f "$SSH_KEY" ]; then
+      echo "Error: SSH public key not found at $SSH_KEY"
+      echo "Generate one: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519"
+      echo "Or set SSH_KEY=/path/to/key.pub"
+      exit 1
+    fi
+
+    echo "Creating resource group..."
+    az group create --name "$RG" --location "$REGION" --only-show-errors --output none
+
+    echo "Creating VM (Trusted Launch, SSH-only, locked-down NSG)..."
+    az vm create \
+      --resource-group "$RG" \
+      --name "$VM" \
+      --image "$IMAGE" \
+      --size "$SIZE" \
+      --os-disk-size-gb 32 \
+      --admin-username azureuser \
+      --ssh-key-values "$SSH_KEY" \
+      --authentication-type ssh \
+      --security-type TrustedLaunch \
+      --enable-secure-boot true \
+      --enable-vtpm true \
+      --nsg-rule NONE \
+      --custom-data infra/cloud-init.yaml \
+      --only-show-errors
+
+    MY_IP=$(curl -s ifconfig.me)
+    echo "Restricting SSH to $MY_IP..."
+    az network nsg rule create \
+      --resource-group "$RG" \
+      --nsg-name "${VM}NSG" \
+      --name AllowSSHFromMyIP \
+      --priority 1000 \
+      --source-address-prefixes "$MY_IP/32" \
+      --destination-port-ranges 22 \
+      --access Allow \
+      --protocol Tcp \
+      --output none
+
+    echo "VM created. Get IP with: $0 ip"
+    ;;
+
+  start)
+    echo "Starting VM..."
+    az vm start --resource-group "$RG" --name "$VM"
+    echo "Started. IP: $(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)"
+    ;;
+
+  stop)
+    echo "Deallocating VM (stops billing)..."
+    az vm deallocate --resource-group "$RG" --name "$VM"
+    echo "Deallocated."
+    ;;
+
+  ip)
+    az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv
+    ;;
+
+  ssh)
+    IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
+    ssh -A azureuser@"$IP" "${@:2}"
+    ;;
+
+  bench)
+    BRANCH="${2:?Usage: $0 bench <branch>}"
+    IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
+    ssh -A azureuser@"$IP" "bash ~/bench/bench_blackbox.sh $BRANCH"
+    ;;
+
+  ab)
+    BASE="${2:?Usage: $0 ab <base-branch> <opt-branch>}"
+    OPT="${3:?Usage: $0 ab <base-branch> <opt-branch>}"
+    IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
+    ssh -A azureuser@"$IP" "bash ~/bench/bench_ab.sh $BASE $OPT"
+    ;;
+
+  destroy)
+    echo "Destroying resource group (all resources)..."
+    az group delete --name "$RG" --yes --no-wait
+    echo "Deletion started."
+    ;;
+
+  help|*)
+    echo "Usage: $0 {create|start|stop|ip|ssh|bench <branch>|ab <base> <opt>|destroy}"
+    echo ""
+    echo "  create   - Provision VM with cloud-init"
+    echo "  start    - Start deallocated VM"
+    echo "  stop     - Deallocate VM (stops billing)"
+    echo "  ip       - Show VM public IP"
+    echo "  ssh      - SSH into VM with agent forwarding"
+    echo "  bench    - Run benchmarks on a branch"
+    echo "  ab       - A/B comparison between two branches"
+    echo "  destroy  - Delete resource group and all resources"
+    ;;
+esac
--- a/packages/blackbox/src/blackbox/analytics.py
+++ b/packages/blackbox/src/blackbox/analytics.py
@ -78,15 +78,16 @@ def extract_meta(path: Path) -> SessionMeta | None:  # noqa: C901, PLR0912, PLR0
    teams_created = 0

    try:
-        text = path.read_text()
+        text = path.read_bytes().decode("utf-8")
    except OSError:
        return None

+    _loads = json.loads
    for line in text.splitlines():
-        if not line.strip():
+        if not line or line.isspace():
            continue
        try:
-            raw = json.loads(line)
+            raw = _loads(line)
        except json.JSONDecodeError:
            continue

@ -302,8 +303,8 @@ def track_file_changes(
    if not fp:
        return
    files.add(fp)
-    ext = Path(fp).suffix.lower()
-    lang = FILE_EXTENSIONS.get(ext)
+    dot = fp.rfind(".")
+    lang = None if dot == -1 else FILE_EXTENSIONS.get(fp[dot:].lower())
    if lang:
        languages[lang] += 1