mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
perf(analytics): use rfind and local json.loads (#44)
* perf(analytics): use rfind and local json.loads for hot paths
Replace Path().suffix with string rfind for extension extraction,
use local json.loads binding and bytes split for JSONL parsing.
* fix: use splitlines and preserve extensionless file behavior
split("\n") mishandles \r\n line endings. The early return on
extensionless files changed behavior vs the original Path().suffix
which returned "" and fell through. Use splitlines() and let
extensionless files fall through with lang=None.
* style: use ternary for extensionless file check per SIM108
* Add blackbox benchmark VM infra
D2s_v5 (non-burstable, 2 vCPU, 8 GB) with cloud-init provisioning,
CPU-pinned benchmarks, and A/B comparison scripts.
---------
Co-authored-by: codeflash[bot] <codeflash[bot]@users.noreply.github.com>
This commit is contained in:
parent
1e8cbbede4
commit
1ff2a76152
3 changed files with 376 additions and 5 deletions
256
.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml
Normal file
256
.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml
Normal file
|
|
@ -0,0 +1,256 @@
|
||||||
|
#cloud-config
|
||||||
|
#
|
||||||
|
# Benchmark VM provisioning for blackbox package (codeflash-agent monorepo)
|
||||||
|
#
|
||||||
|
# Pure Python package -- no system-level deps beyond build tools.
|
||||||
|
# Private repo: requires SSH agent forwarding for clone.
|
||||||
|
#
|
||||||
|
# Two-phase setup:
|
||||||
|
# Phase 1 (cloud-init): packages, hyperfine, uv
|
||||||
|
# Phase 2 (manual): ssh -A, clone, uv sync, baseline benchmarks
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# az vm create ... --custom-data infra/cloud-init.yaml
|
||||||
|
# bash infra/vm-manage.sh ssh
|
||||||
|
# bash ~/setup.sh
|
||||||
|
#
|
||||||
|
# VM: Azure Standard_D2s_v5 (2 vCPU, 8 GB RAM, general-purpose)
|
||||||
|
# Smallest non-burstable option -- blackbox is CPU-bound, not memory-bound.
|
||||||
|
# Use taskset -c 0 to pin benchmarks to 1 core for consistent results.
|
||||||
|
# Non-burstable ensures consistent CPU -- no credit depletion or turbo variability.
|
||||||
|
|
||||||
|
package_update: true
|
||||||
|
packages:
|
||||||
|
- git
|
||||||
|
- build-essential
|
||||||
|
- curl
|
||||||
|
|
||||||
|
write_files:
|
||||||
|
# --- Benchmark: blackbox functions (main vs branch) ---
|
||||||
|
- path: /home/azureuser/bench/bench_blackbox.sh
|
||||||
|
owner: azureuser:azureuser
|
||||||
|
permissions: "0755"
|
||||||
|
defer: true
|
||||||
|
content: |
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
BRANCH="${1:-HEAD}"
|
||||||
|
PYTHON=.venv/bin/python
|
||||||
|
cd ~/codeflash-agent
|
||||||
|
|
||||||
|
echo "=== Benchmarking blackbox: $BRANCH ==="
|
||||||
|
git fetch origin
|
||||||
|
git checkout "$BRANCH"
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
taskset -c 0 $PYTHON /home/azureuser/bench/bench_functions.py \
|
||||||
|
~/codeflash-agent
|
||||||
|
|
||||||
|
# --- Benchmark: A/B comparison ---
|
||||||
|
- path: /home/azureuser/bench/bench_ab.sh
|
||||||
|
owner: azureuser:azureuser
|
||||||
|
permissions: "0755"
|
||||||
|
defer: true
|
||||||
|
content: |
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
BASE="${1:?Usage: bench_ab.sh <base-branch> <opt-branch>}"
|
||||||
|
OPT="${2:?Usage: bench_ab.sh <base-branch> <opt-branch>}"
|
||||||
|
|
||||||
|
echo "=== A/B comparison: $BASE vs $OPT ==="
|
||||||
|
echo ""
|
||||||
|
echo "--- BASELINE: $BASE ---"
|
||||||
|
bash ~/bench/bench_blackbox.sh "$BASE"
|
||||||
|
echo ""
|
||||||
|
echo "--- OPTIMIZED: $OPT ---"
|
||||||
|
bash ~/bench/bench_blackbox.sh "$OPT"
|
||||||
|
|
||||||
|
# --- Benchmark: Python script for per-function timing ---
|
||||||
|
- path: /home/azureuser/bench/bench_functions.py
|
||||||
|
owner: azureuser:azureuser
|
||||||
|
permissions: "0644"
|
||||||
|
defer: true
|
||||||
|
content: |
|
||||||
|
"""Benchmark blackbox hot-path functions -- min-of-5 runs per function."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import timeit
|
||||||
|
from collections import Counter
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(sys.argv[1]) / "packages/blackbox/src"))
|
||||||
|
|
||||||
|
from blackbox.models import (
|
||||||
|
CODEFLASH_AGENT_PREFIXES,
|
||||||
|
CODEFLASH_COMMANDS,
|
||||||
|
CODEFLASH_SKILLS,
|
||||||
|
LogEntry,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_transcript(n_lines: int = 500) -> Path:
|
||||||
|
entries = []
|
||||||
|
for i in range(n_lines):
|
||||||
|
ts = f"2025-01-15T10:{i // 60:02d}:{i % 60:02d}Z"
|
||||||
|
if i % 3 == 0:
|
||||||
|
entries.append(json.dumps({
|
||||||
|
"type": "user", "timestamp": ts, "sessionId": "sess-bench",
|
||||||
|
"cwd": "/home/user/project", "gitBranch": "feature-x",
|
||||||
|
"message": {"content": f"User message {i}"},
|
||||||
|
}))
|
||||||
|
elif i % 3 == 1:
|
||||||
|
entries.append(json.dumps({
|
||||||
|
"type": "assistant", "timestamp": ts,
|
||||||
|
"message": {
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": f"Step {i}."},
|
||||||
|
{"type": "tool_use", "id": f"tool_{i}", "name": "Write",
|
||||||
|
"input": {"file_path": f"/project/mod_{i % 10}.py",
|
||||||
|
"content": "def f():\n pass\n"}},
|
||||||
|
{"type": "tool_use", "id": f"tool_{i}b", "name": "Bash",
|
||||||
|
"input": {"command": f"git commit -m 'step {i}'"}},
|
||||||
|
],
|
||||||
|
"usage": {"input_tokens": 1000, "output_tokens": 200,
|
||||||
|
"cache_read_input_tokens": 50,
|
||||||
|
"cache_creation_input_tokens": 25},
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
else:
|
||||||
|
entries.append(json.dumps({
|
||||||
|
"type": "user", "timestamp": ts,
|
||||||
|
"message": {"content": [
|
||||||
|
{"type": "tool_result", "tool_use_id": f"tool_{i - 1}",
|
||||||
|
"is_error": i % 15 == 0,
|
||||||
|
"content": "OK" if i % 15 != 0 else "Error: exit code 1"}
|
||||||
|
]},
|
||||||
|
}))
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False, mode="w")
|
||||||
|
tmp.write("\n".join(entries))
|
||||||
|
tmp.close()
|
||||||
|
return Path(tmp.name)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_log_entries(n: int = 200) -> list[LogEntry]:
|
||||||
|
entries = []
|
||||||
|
levels = ["assistant", "tool_call", "tool_result", "status", "error", "info"]
|
||||||
|
for i in range(n):
|
||||||
|
lvl = levels[i % len(levels)]
|
||||||
|
entries.append(LogEntry(
|
||||||
|
timestamp=1700000000.0 + i,
|
||||||
|
source="claude" if lvl in ("assistant", "tool_call") else "user",
|
||||||
|
level=lvl,
|
||||||
|
message=f"Sample message {i} with /path/to/some/file.py content",
|
||||||
|
data={"tool_name": "Write", "preview": "edit file.py"}
|
||||||
|
if lvl == "tool_call" else {},
|
||||||
|
))
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def best_of(fn, rounds: int = 5) -> float:
|
||||||
|
return min(fn() for _ in range(rounds))
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
transcript = _build_transcript(500)
|
||||||
|
entries = _build_log_entries(200)
|
||||||
|
|
||||||
|
from blackbox.analytics import extract_meta, track_file_changes
|
||||||
|
from blackbox.dashboard.transcript import parse_transcript
|
||||||
|
from blackbox.dashboard.rendering import render_log_html
|
||||||
|
|
||||||
|
sig = inspect.signature(track_file_changes)
|
||||||
|
has_languages = "languages" in sig.parameters
|
||||||
|
|
||||||
|
tool_inputs = [
|
||||||
|
("Write", {"file_path": "/project/src/main.py", "content": "x = 1\ny = 2\n"}),
|
||||||
|
("Edit", {"file_path": "/project/src/utils.py", "old_string": "a", "new_string": "b"}),
|
||||||
|
("Write", {"file_path": "/project/README.md", "content": "hello"}),
|
||||||
|
("Write", {"file_path": "/project/Makefile", "content": "all:"}),
|
||||||
|
("Write", {"file_path": "/project/src/app.tsx", "content": "export default () => {}"}),
|
||||||
|
]
|
||||||
|
test_vals = [
|
||||||
|
"codeflash-python", "codeflash", "random-agent", "codeflash-review",
|
||||||
|
"/optimize", "/status", "unknown-cmd", "/benchmark",
|
||||||
|
"other-prefix", "codeflash-researcher",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Warmup
|
||||||
|
extract_meta(transcript)
|
||||||
|
parse_transcript(transcript)
|
||||||
|
for e in entries:
|
||||||
|
render_log_html(e)
|
||||||
|
|
||||||
|
t1 = best_of(lambda: timeit.timeit(
|
||||||
|
lambda: extract_meta(transcript), number=200) / 200)
|
||||||
|
|
||||||
|
def _track():
|
||||||
|
f: set[str] = set()
|
||||||
|
langs: Counter[str] = Counter()
|
||||||
|
for tn, ti in tool_inputs:
|
||||||
|
if has_languages:
|
||||||
|
track_file_changes(tn, ti, f, langs)
|
||||||
|
else:
|
||||||
|
track_file_changes(tn, ti, f)
|
||||||
|
t2 = best_of(lambda: timeit.timeit(_track, number=10000) / 10000)
|
||||||
|
|
||||||
|
t3 = best_of(lambda: timeit.timeit(
|
||||||
|
lambda: parse_transcript(transcript), number=200) / 200)
|
||||||
|
|
||||||
|
def _render():
|
||||||
|
for e in entries:
|
||||||
|
render_log_html(e)
|
||||||
|
t4 = best_of(lambda: timeit.timeit(_render, number=1000) / 1000)
|
||||||
|
|
||||||
|
def _member():
|
||||||
|
for v in test_vals:
|
||||||
|
_ = v in CODEFLASH_AGENT_PREFIXES
|
||||||
|
_ = v in CODEFLASH_SKILLS
|
||||||
|
_ = v in CODEFLASH_COMMANDS
|
||||||
|
t5 = best_of(lambda: timeit.timeit(_member, number=100000) / 100000)
|
||||||
|
|
||||||
|
print(f"extract_meta {t1*1000:.4f} ms")
|
||||||
|
print(f"track_file_changes {t2*1000:.4f} ms")
|
||||||
|
print(f"parse_transcript {t3*1000:.4f} ms")
|
||||||
|
print(f"render_log_html {t4*1000:.4f} ms")
|
||||||
|
print(f"membership {t5*1000:.6f} ms")
|
||||||
|
print(f"TOTAL {(t1+t2+t3+t4+t5)*1000:.4f} ms")
|
||||||
|
|
||||||
|
transcript.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
# --- Post-provision setup (run manually after ssh -A) ---
|
||||||
|
- path: /home/azureuser/setup.sh
|
||||||
|
owner: azureuser:azureuser
|
||||||
|
permissions: "0755"
|
||||||
|
defer: true
|
||||||
|
content: |
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
export PATH="$HOME/.local/bin:$PATH"
|
||||||
|
|
||||||
|
echo "=== Cloning codeflash-agent ==="
|
||||||
|
git clone git@github.com:codeflash-ai/codeflash-agent.git ~/codeflash-agent
|
||||||
|
cd ~/codeflash-agent
|
||||||
|
|
||||||
|
echo "=== Installing dependencies ==="
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
echo "=== Creating results directory ==="
|
||||||
|
mkdir -p ~/results
|
||||||
|
|
||||||
|
echo "=== Verifying installation ==="
|
||||||
|
.venv/bin/python -c 'from blackbox.models import LogEntry; print("OK")'
|
||||||
|
|
||||||
|
echo "=== Done ==="
|
||||||
|
|
||||||
|
runcmd:
|
||||||
|
- wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb
|
||||||
|
- dpkg -i /tmp/hyperfine.deb
|
||||||
|
- su - azureuser -c 'curl -LsSf https://astral.sh/uv/install.sh | sh'
|
||||||
114
.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh
Normal file
114
.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# Azure benchmark VM lifecycle management for blackbox package
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash infra/vm-manage.sh {create|start|stop|ip|ssh|bench <branch>|ab <base> <opt>|destroy}
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
RG="blackbox-BENCH-RG"
|
||||||
|
VM="blackbox-bench"
|
||||||
|
REGION="westus2"
|
||||||
|
SIZE="Standard_D2s_v5"
|
||||||
|
IMAGE="Canonical:ubuntu-24_04-lts:server:latest"
|
||||||
|
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519.pub}"
|
||||||
|
|
||||||
|
case "${1:-help}" in
|
||||||
|
create)
|
||||||
|
if [ ! -f "$SSH_KEY" ]; then
|
||||||
|
echo "Error: SSH public key not found at $SSH_KEY"
|
||||||
|
echo "Generate one: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519"
|
||||||
|
echo "Or set SSH_KEY=/path/to/key.pub"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Creating resource group..."
|
||||||
|
az group create --name "$RG" --location "$REGION" --only-show-errors --output none
|
||||||
|
|
||||||
|
echo "Creating VM (Trusted Launch, SSH-only, locked-down NSG)..."
|
||||||
|
az vm create \
|
||||||
|
--resource-group "$RG" \
|
||||||
|
--name "$VM" \
|
||||||
|
--image "$IMAGE" \
|
||||||
|
--size "$SIZE" \
|
||||||
|
--os-disk-size-gb 32 \
|
||||||
|
--admin-username azureuser \
|
||||||
|
--ssh-key-values "$SSH_KEY" \
|
||||||
|
--authentication-type ssh \
|
||||||
|
--security-type TrustedLaunch \
|
||||||
|
--enable-secure-boot true \
|
||||||
|
--enable-vtpm true \
|
||||||
|
--nsg-rule NONE \
|
||||||
|
--custom-data infra/cloud-init.yaml \
|
||||||
|
--only-show-errors
|
||||||
|
|
||||||
|
MY_IP=$(curl -s ifconfig.me)
|
||||||
|
echo "Restricting SSH to $MY_IP..."
|
||||||
|
az network nsg rule create \
|
||||||
|
--resource-group "$RG" \
|
||||||
|
--nsg-name "${VM}NSG" \
|
||||||
|
--name AllowSSHFromMyIP \
|
||||||
|
--priority 1000 \
|
||||||
|
--source-address-prefixes "$MY_IP/32" \
|
||||||
|
--destination-port-ranges 22 \
|
||||||
|
--access Allow \
|
||||||
|
--protocol Tcp \
|
||||||
|
--output none
|
||||||
|
|
||||||
|
echo "VM created. Get IP with: $0 ip"
|
||||||
|
;;
|
||||||
|
|
||||||
|
start)
|
||||||
|
echo "Starting VM..."
|
||||||
|
az vm start --resource-group "$RG" --name "$VM"
|
||||||
|
echo "Started. IP: $(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)"
|
||||||
|
;;
|
||||||
|
|
||||||
|
stop)
|
||||||
|
echo "Deallocating VM (stops billing)..."
|
||||||
|
az vm deallocate --resource-group "$RG" --name "$VM"
|
||||||
|
echo "Deallocated."
|
||||||
|
;;
|
||||||
|
|
||||||
|
ip)
|
||||||
|
az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv
|
||||||
|
;;
|
||||||
|
|
||||||
|
ssh)
|
||||||
|
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
|
||||||
|
ssh -A azureuser@"$IP" "${@:2}"
|
||||||
|
;;
|
||||||
|
|
||||||
|
bench)
|
||||||
|
BRANCH="${2:?Usage: $0 bench <branch>}"
|
||||||
|
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
|
||||||
|
ssh -A azureuser@"$IP" "bash ~/bench/bench_blackbox.sh $BRANCH"
|
||||||
|
;;
|
||||||
|
|
||||||
|
ab)
|
||||||
|
BASE="${2:?Usage: $0 ab <base-branch> <opt-branch>}"
|
||||||
|
OPT="${3:?Usage: $0 ab <base-branch> <opt-branch>}"
|
||||||
|
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
|
||||||
|
ssh -A azureuser@"$IP" "bash ~/bench/bench_ab.sh $BASE $OPT"
|
||||||
|
;;
|
||||||
|
|
||||||
|
destroy)
|
||||||
|
echo "Destroying resource group (all resources)..."
|
||||||
|
az group delete --name "$RG" --yes --no-wait
|
||||||
|
echo "Deletion started."
|
||||||
|
;;
|
||||||
|
|
||||||
|
help|*)
|
||||||
|
echo "Usage: $0 {create|start|stop|ip|ssh|bench <branch>|ab <base> <opt>|destroy}"
|
||||||
|
echo ""
|
||||||
|
echo " create - Provision VM with cloud-init"
|
||||||
|
echo " start - Start deallocated VM"
|
||||||
|
echo " stop - Deallocate VM (stops billing)"
|
||||||
|
echo " ip - Show VM public IP"
|
||||||
|
echo " ssh - SSH into VM with agent forwarding"
|
||||||
|
echo " bench - Run benchmarks on a branch"
|
||||||
|
echo " ab - A/B comparison between two branches"
|
||||||
|
echo " destroy - Delete resource group and all resources"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
@ -78,15 +78,16 @@ def extract_meta(path: Path) -> SessionMeta | None: # noqa: C901, PLR0912, PLR0
|
||||||
teams_created = 0
|
teams_created = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text = path.read_text()
|
text = path.read_bytes().decode("utf-8")
|
||||||
except OSError:
|
except OSError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
_loads = json.loads
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
if not line.strip():
|
if not line or line.isspace():
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
raw = json.loads(line)
|
raw = _loads(line)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -302,8 +303,8 @@ def track_file_changes(
|
||||||
if not fp:
|
if not fp:
|
||||||
return
|
return
|
||||||
files.add(fp)
|
files.add(fp)
|
||||||
ext = Path(fp).suffix.lower()
|
dot = fp.rfind(".")
|
||||||
lang = FILE_EXTENSIONS.get(ext)
|
lang = None if dot == -1 else FILE_EXTENSIONS.get(fp[dot:].lower())
|
||||||
if lang:
|
if lang:
|
||||||
languages[lang] += 1
|
languages[lang] += 1
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue