perf(analytics): use rfind and local json.loads (#44)

* perf(analytics): use rfind and local json.loads for hot paths

Replace Path().suffix with string rfind for extension extraction,
use local json.loads binding and bytes split for JSONL parsing.

* fix: use splitlines and preserve extensionless file behavior

split("\n") mishandles \r\n line endings. The early return on
extensionless files changed behavior vs the original Path().suffix
which returned "" and fell through. Use splitlines() and let
extensionless files fall through with lang=None.

* style: use ternary for extensionless file check per SIM108

* Add blackbox benchmark VM infra

D2s_v5 (non-burstable, 2 vCPU, 8 GB) with cloud-init provisioning,
CPU-pinned benchmarks, and A/B comparison scripts.

---------

Co-authored-by: codeflash[bot] <codeflash[bot]@users.noreply.github.com>
This commit is contained in:
Kevin Turcios 2026-04-29 03:22:42 -05:00 committed by GitHub
parent 1e8cbbede4
commit 1ff2a76152
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 376 additions and 5 deletions

View file

@ -0,0 +1,256 @@
#cloud-config
#
# Benchmark VM provisioning for blackbox package (codeflash-agent monorepo)
#
# Pure Python package -- no system-level deps beyond build tools.
# Private repo: requires SSH agent forwarding for clone.
#
# Two-phase setup:
# Phase 1 (cloud-init): packages, hyperfine, uv
# Phase 2 (manual): ssh -A, clone, uv sync, baseline benchmarks
#
# Usage:
# az vm create ... --custom-data infra/cloud-init.yaml
# bash infra/vm-manage.sh ssh
# bash ~/setup.sh
#
# VM: Azure Standard_D2s_v5 (2 vCPU, 8 GB RAM, general-purpose)
# Smallest non-burstable option -- blackbox is CPU-bound, not memory-bound.
# Use taskset -c 0 to pin benchmarks to 1 core for consistent results.
# Non-burstable ensures consistent CPU -- no credit depletion or turbo variability.
package_update: true
packages:
- git
- build-essential
- curl
write_files:
# --- Benchmark: blackbox functions (main vs branch) ---
- path: /home/azureuser/bench/bench_blackbox.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
BRANCH="${1:-HEAD}"
PYTHON=.venv/bin/python
cd ~/codeflash-agent
echo "=== Benchmarking blackbox: $BRANCH ==="
git fetch origin
git checkout "$BRANCH"
uv sync
taskset -c 0 $PYTHON /home/azureuser/bench/bench_functions.py \
~/codeflash-agent
# --- Benchmark: A/B comparison ---
- path: /home/azureuser/bench/bench_ab.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
BASE="${1:?Usage: bench_ab.sh <base-branch> <opt-branch>}"
OPT="${2:?Usage: bench_ab.sh <base-branch> <opt-branch>}"
echo "=== A/B comparison: $BASE vs $OPT ==="
echo ""
echo "--- BASELINE: $BASE ---"
bash ~/bench/bench_blackbox.sh "$BASE"
echo ""
echo "--- OPTIMIZED: $OPT ---"
bash ~/bench/bench_blackbox.sh "$OPT"
# --- Benchmark: Python script for per-function timing ---
- path: /home/azureuser/bench/bench_functions.py
owner: azureuser:azureuser
permissions: "0644"
defer: true
content: |
"""Benchmark blackbox hot-path functions -- min-of-5 runs per function."""
from __future__ import annotations
import inspect
import json
import sys
import tempfile
import timeit
from collections import Counter
from pathlib import Path
sys.path.insert(0, str(Path(sys.argv[1]) / "packages/blackbox/src"))
from blackbox.models import (
CODEFLASH_AGENT_PREFIXES,
CODEFLASH_COMMANDS,
CODEFLASH_SKILLS,
LogEntry,
)
def _build_transcript(n_lines: int = 500) -> Path:
entries = []
for i in range(n_lines):
ts = f"2025-01-15T10:{i // 60:02d}:{i % 60:02d}Z"
if i % 3 == 0:
entries.append(json.dumps({
"type": "user", "timestamp": ts, "sessionId": "sess-bench",
"cwd": "/home/user/project", "gitBranch": "feature-x",
"message": {"content": f"User message {i}"},
}))
elif i % 3 == 1:
entries.append(json.dumps({
"type": "assistant", "timestamp": ts,
"message": {
"content": [
{"type": "text", "text": f"Step {i}."},
{"type": "tool_use", "id": f"tool_{i}", "name": "Write",
"input": {"file_path": f"/project/mod_{i % 10}.py",
"content": "def f():\n pass\n"}},
{"type": "tool_use", "id": f"tool_{i}b", "name": "Bash",
"input": {"command": f"git commit -m 'step {i}'"}},
],
"usage": {"input_tokens": 1000, "output_tokens": 200,
"cache_read_input_tokens": 50,
"cache_creation_input_tokens": 25},
},
}))
else:
entries.append(json.dumps({
"type": "user", "timestamp": ts,
"message": {"content": [
{"type": "tool_result", "tool_use_id": f"tool_{i - 1}",
"is_error": i % 15 == 0,
"content": "OK" if i % 15 != 0 else "Error: exit code 1"}
]},
}))
tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False, mode="w")
tmp.write("\n".join(entries))
tmp.close()
return Path(tmp.name)
def _build_log_entries(n: int = 200) -> list[LogEntry]:
entries = []
levels = ["assistant", "tool_call", "tool_result", "status", "error", "info"]
for i in range(n):
lvl = levels[i % len(levels)]
entries.append(LogEntry(
timestamp=1700000000.0 + i,
source="claude" if lvl in ("assistant", "tool_call") else "user",
level=lvl,
message=f"Sample message {i} with /path/to/some/file.py content",
data={"tool_name": "Write", "preview": "edit file.py"}
if lvl == "tool_call" else {},
))
return entries
def best_of(fn, rounds: int = 5) -> float:
return min(fn() for _ in range(rounds))
def main() -> None:
transcript = _build_transcript(500)
entries = _build_log_entries(200)
from blackbox.analytics import extract_meta, track_file_changes
from blackbox.dashboard.transcript import parse_transcript
from blackbox.dashboard.rendering import render_log_html
sig = inspect.signature(track_file_changes)
has_languages = "languages" in sig.parameters
tool_inputs = [
("Write", {"file_path": "/project/src/main.py", "content": "x = 1\ny = 2\n"}),
("Edit", {"file_path": "/project/src/utils.py", "old_string": "a", "new_string": "b"}),
("Write", {"file_path": "/project/README.md", "content": "hello"}),
("Write", {"file_path": "/project/Makefile", "content": "all:"}),
("Write", {"file_path": "/project/src/app.tsx", "content": "export default () => {}"}),
]
test_vals = [
"codeflash-python", "codeflash", "random-agent", "codeflash-review",
"/optimize", "/status", "unknown-cmd", "/benchmark",
"other-prefix", "codeflash-researcher",
]
# Warmup
extract_meta(transcript)
parse_transcript(transcript)
for e in entries:
render_log_html(e)
t1 = best_of(lambda: timeit.timeit(
lambda: extract_meta(transcript), number=200) / 200)
def _track():
f: set[str] = set()
langs: Counter[str] = Counter()
for tn, ti in tool_inputs:
if has_languages:
track_file_changes(tn, ti, f, langs)
else:
track_file_changes(tn, ti, f)
t2 = best_of(lambda: timeit.timeit(_track, number=10000) / 10000)
t3 = best_of(lambda: timeit.timeit(
lambda: parse_transcript(transcript), number=200) / 200)
def _render():
for e in entries:
render_log_html(e)
t4 = best_of(lambda: timeit.timeit(_render, number=1000) / 1000)
def _member():
for v in test_vals:
_ = v in CODEFLASH_AGENT_PREFIXES
_ = v in CODEFLASH_SKILLS
_ = v in CODEFLASH_COMMANDS
t5 = best_of(lambda: timeit.timeit(_member, number=100000) / 100000)
print(f"extract_meta {t1*1000:.4f} ms")
print(f"track_file_changes {t2*1000:.4f} ms")
print(f"parse_transcript {t3*1000:.4f} ms")
print(f"render_log_html {t4*1000:.4f} ms")
print(f"membership {t5*1000:.6f} ms")
print(f"TOTAL {(t1+t2+t3+t4+t5)*1000:.4f} ms")
transcript.unlink(missing_ok=True)
if __name__ == "__main__":
main()
# --- Post-provision setup (run manually after ssh -A) ---
- path: /home/azureuser/setup.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
export PATH="$HOME/.local/bin:$PATH"
echo "=== Cloning codeflash-agent ==="
git clone git@github.com:codeflash-ai/codeflash-agent.git ~/codeflash-agent
cd ~/codeflash-agent
echo "=== Installing dependencies ==="
uv sync
echo "=== Creating results directory ==="
mkdir -p ~/results
echo "=== Verifying installation ==="
.venv/bin/python -c 'from blackbox.models import LogEntry; print("OK")'
echo "=== Done ==="
runcmd:
- wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb
- dpkg -i /tmp/hyperfine.deb
- su - azureuser -c 'curl -LsSf https://astral.sh/uv/install.sh | sh'

View file

@ -0,0 +1,114 @@
#!/usr/bin/env bash
#
# Azure benchmark VM lifecycle management for blackbox package
#
# Usage:
# bash infra/vm-manage.sh {create|start|stop|ip|ssh|bench <branch>|ab <base> <opt>|destroy}
set -euo pipefail
RG="blackbox-BENCH-RG"
VM="blackbox-bench"
REGION="westus2"
SIZE="Standard_D2s_v5"
IMAGE="Canonical:ubuntu-24_04-lts:server:latest"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519.pub}"
case "${1:-help}" in
create)
if [ ! -f "$SSH_KEY" ]; then
echo "Error: SSH public key not found at $SSH_KEY"
echo "Generate one: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519"
echo "Or set SSH_KEY=/path/to/key.pub"
exit 1
fi
echo "Creating resource group..."
az group create --name "$RG" --location "$REGION" --only-show-errors --output none
echo "Creating VM (Trusted Launch, SSH-only, locked-down NSG)..."
az vm create \
--resource-group "$RG" \
--name "$VM" \
--image "$IMAGE" \
--size "$SIZE" \
--os-disk-size-gb 32 \
--admin-username azureuser \
--ssh-key-values "$SSH_KEY" \
--authentication-type ssh \
--security-type TrustedLaunch \
--enable-secure-boot true \
--enable-vtpm true \
--nsg-rule NONE \
--custom-data infra/cloud-init.yaml \
--only-show-errors
MY_IP=$(curl -s ifconfig.me)
echo "Restricting SSH to $MY_IP..."
az network nsg rule create \
--resource-group "$RG" \
--nsg-name "${VM}NSG" \
--name AllowSSHFromMyIP \
--priority 1000 \
--source-address-prefixes "$MY_IP/32" \
--destination-port-ranges 22 \
--access Allow \
--protocol Tcp \
--output none
echo "VM created. Get IP with: $0 ip"
;;
start)
echo "Starting VM..."
az vm start --resource-group "$RG" --name "$VM"
echo "Started. IP: $(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)"
;;
stop)
echo "Deallocating VM (stops billing)..."
az vm deallocate --resource-group "$RG" --name "$VM"
echo "Deallocated."
;;
ip)
az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv
;;
ssh)
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
ssh -A azureuser@"$IP" "${@:2}"
;;
bench)
BRANCH="${2:?Usage: $0 bench <branch>}"
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
ssh -A azureuser@"$IP" "bash ~/bench/bench_blackbox.sh $BRANCH"
;;
ab)
BASE="${2:?Usage: $0 ab <base-branch> <opt-branch>}"
OPT="${3:?Usage: $0 ab <base-branch> <opt-branch>}"
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
ssh -A azureuser@"$IP" "bash ~/bench/bench_ab.sh $BASE $OPT"
;;
destroy)
echo "Destroying resource group (all resources)..."
az group delete --name "$RG" --yes --no-wait
echo "Deletion started."
;;
help|*)
echo "Usage: $0 {create|start|stop|ip|ssh|bench <branch>|ab <base> <opt>|destroy}"
echo ""
echo " create - Provision VM with cloud-init"
echo " start - Start deallocated VM"
echo " stop - Deallocate VM (stops billing)"
echo " ip - Show VM public IP"
echo " ssh - SSH into VM with agent forwarding"
echo " bench - Run benchmarks on a branch"
echo " ab - A/B comparison between two branches"
echo " destroy - Delete resource group and all resources"
;;
esac

View file

@ -78,15 +78,16 @@ def extract_meta(path: Path) -> SessionMeta | None: # noqa: C901, PLR0912, PLR0
teams_created = 0
try:
text = path.read_text()
text = path.read_bytes().decode("utf-8")
except OSError:
return None
_loads = json.loads
for line in text.splitlines():
if not line.strip():
if not line or line.isspace():
continue
try:
raw = json.loads(line)
raw = _loads(line)
except json.JSONDecodeError:
continue
@ -302,8 +303,8 @@ def track_file_changes(
if not fp:
return
files.add(fp)
ext = Path(fp).suffix.lower()
lang = FILE_EXTENSIONS.get(ext)
dot = fp.rfind(".")
lang = None if dot == -1 else FILE_EXTENSIONS.get(fp[dot:].lower())
if lang:
languages[lang] += 1