diff --git a/.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml b/.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml new file mode 100644 index 0000000..2e0ca98 --- /dev/null +++ b/.codeflash/krrt7/codeflash-ai/blackbox/infra/cloud-init.yaml @@ -0,0 +1,256 @@ +#cloud-config +# +# Benchmark VM provisioning for blackbox package (codeflash-agent monorepo) +# +# Pure Python package -- no system-level deps beyond build tools. +# Private repo: requires SSH agent forwarding for clone. +# +# Two-phase setup: +# Phase 1 (cloud-init): packages, hyperfine, uv +# Phase 2 (manual): ssh -A, clone, uv sync, baseline benchmarks +# +# Usage: +# az vm create ... --custom-data infra/cloud-init.yaml +# bash infra/vm-manage.sh ssh +# bash ~/setup.sh +# +# VM: Azure Standard_D2s_v5 (2 vCPU, 8 GB RAM, general-purpose) +# Smallest non-burstable option -- blackbox is CPU-bound, not memory-bound. +# Use taskset -c 0 to pin benchmarks to 1 core for consistent results. +# Non-burstable ensures consistent CPU -- no credit depletion or turbo variability. + +package_update: true +packages: + - git + - build-essential + - curl + +write_files: + # --- Benchmark: blackbox functions (main vs branch) --- + - path: /home/azureuser/bench/bench_blackbox.sh + owner: azureuser:azureuser + permissions: "0755" + defer: true + content: | + #!/usr/bin/env bash + set -euo pipefail + BRANCH="${1:-HEAD}" + PYTHON=.venv/bin/python + cd ~/codeflash-agent + + echo "=== Benchmarking blackbox: $BRANCH ===" + git fetch origin + git checkout "$BRANCH" + uv sync + + taskset -c 0 $PYTHON /home/azureuser/bench/bench_functions.py \ + ~/codeflash-agent + + # --- Benchmark: A/B comparison --- + - path: /home/azureuser/bench/bench_ab.sh + owner: azureuser:azureuser + permissions: "0755" + defer: true + content: | + #!/usr/bin/env bash + set -euo pipefail + BASE="${1:?Usage: bench_ab.sh }" + OPT="${2:?Usage: bench_ab.sh }" + + echo "=== A/B comparison: $BASE vs $OPT ===" + echo "" + echo "--- BASELINE: $BASE ---" + bash ~/bench/bench_blackbox.sh "$BASE" + echo "" + echo "--- OPTIMIZED: $OPT ---" + bash ~/bench/bench_blackbox.sh "$OPT" + + # --- Benchmark: Python script for per-function timing --- + - path: /home/azureuser/bench/bench_functions.py + owner: azureuser:azureuser + permissions: "0644" + defer: true + content: | + """Benchmark blackbox hot-path functions -- min-of-5 runs per function.""" + from __future__ import annotations + + import inspect + import json + import sys + import tempfile + import timeit + from collections import Counter + from pathlib import Path + + sys.path.insert(0, str(Path(sys.argv[1]) / "packages/blackbox/src")) + + from blackbox.models import ( + CODEFLASH_AGENT_PREFIXES, + CODEFLASH_COMMANDS, + CODEFLASH_SKILLS, + LogEntry, + ) + + + def _build_transcript(n_lines: int = 500) -> Path: + entries = [] + for i in range(n_lines): + ts = f"2025-01-15T10:{i // 60:02d}:{i % 60:02d}Z" + if i % 3 == 0: + entries.append(json.dumps({ + "type": "user", "timestamp": ts, "sessionId": "sess-bench", + "cwd": "/home/user/project", "gitBranch": "feature-x", + "message": {"content": f"User message {i}"}, + })) + elif i % 3 == 1: + entries.append(json.dumps({ + "type": "assistant", "timestamp": ts, + "message": { + "content": [ + {"type": "text", "text": f"Step {i}."}, + {"type": "tool_use", "id": f"tool_{i}", "name": "Write", + "input": {"file_path": f"/project/mod_{i % 10}.py", + "content": "def f():\n pass\n"}}, + {"type": "tool_use", "id": f"tool_{i}b", "name": "Bash", + "input": {"command": f"git commit -m 'step {i}'"}}, + ], + "usage": {"input_tokens": 1000, "output_tokens": 200, + "cache_read_input_tokens": 50, + "cache_creation_input_tokens": 25}, + }, + })) + else: + entries.append(json.dumps({ + "type": "user", "timestamp": ts, + "message": {"content": [ + {"type": "tool_result", "tool_use_id": f"tool_{i - 1}", + "is_error": i % 15 == 0, + "content": "OK" if i % 15 != 0 else "Error: exit code 1"} + ]}, + })) + tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False, mode="w") + tmp.write("\n".join(entries)) + tmp.close() + return Path(tmp.name) + + + def _build_log_entries(n: int = 200) -> list[LogEntry]: + entries = [] + levels = ["assistant", "tool_call", "tool_result", "status", "error", "info"] + for i in range(n): + lvl = levels[i % len(levels)] + entries.append(LogEntry( + timestamp=1700000000.0 + i, + source="claude" if lvl in ("assistant", "tool_call") else "user", + level=lvl, + message=f"Sample message {i} with /path/to/some/file.py content", + data={"tool_name": "Write", "preview": "edit file.py"} + if lvl == "tool_call" else {}, + )) + return entries + + + def best_of(fn, rounds: int = 5) -> float: + return min(fn() for _ in range(rounds)) + + + def main() -> None: + transcript = _build_transcript(500) + entries = _build_log_entries(200) + + from blackbox.analytics import extract_meta, track_file_changes + from blackbox.dashboard.transcript import parse_transcript + from blackbox.dashboard.rendering import render_log_html + + sig = inspect.signature(track_file_changes) + has_languages = "languages" in sig.parameters + + tool_inputs = [ + ("Write", {"file_path": "/project/src/main.py", "content": "x = 1\ny = 2\n"}), + ("Edit", {"file_path": "/project/src/utils.py", "old_string": "a", "new_string": "b"}), + ("Write", {"file_path": "/project/README.md", "content": "hello"}), + ("Write", {"file_path": "/project/Makefile", "content": "all:"}), + ("Write", {"file_path": "/project/src/app.tsx", "content": "export default () => {}"}), + ] + test_vals = [ + "codeflash-python", "codeflash", "random-agent", "codeflash-review", + "/optimize", "/status", "unknown-cmd", "/benchmark", + "other-prefix", "codeflash-researcher", + ] + + # Warmup + extract_meta(transcript) + parse_transcript(transcript) + for e in entries: + render_log_html(e) + + t1 = best_of(lambda: timeit.timeit( + lambda: extract_meta(transcript), number=200) / 200) + + def _track(): + f: set[str] = set() + langs: Counter[str] = Counter() + for tn, ti in tool_inputs: + if has_languages: + track_file_changes(tn, ti, f, langs) + else: + track_file_changes(tn, ti, f) + t2 = best_of(lambda: timeit.timeit(_track, number=10000) / 10000) + + t3 = best_of(lambda: timeit.timeit( + lambda: parse_transcript(transcript), number=200) / 200) + + def _render(): + for e in entries: + render_log_html(e) + t4 = best_of(lambda: timeit.timeit(_render, number=1000) / 1000) + + def _member(): + for v in test_vals: + _ = v in CODEFLASH_AGENT_PREFIXES + _ = v in CODEFLASH_SKILLS + _ = v in CODEFLASH_COMMANDS + t5 = best_of(lambda: timeit.timeit(_member, number=100000) / 100000) + + print(f"extract_meta {t1*1000:.4f} ms") + print(f"track_file_changes {t2*1000:.4f} ms") + print(f"parse_transcript {t3*1000:.4f} ms") + print(f"render_log_html {t4*1000:.4f} ms") + print(f"membership {t5*1000:.6f} ms") + print(f"TOTAL {(t1+t2+t3+t4+t5)*1000:.4f} ms") + + transcript.unlink(missing_ok=True) + + + if __name__ == "__main__": + main() + + # --- Post-provision setup (run manually after ssh -A) --- + - path: /home/azureuser/setup.sh + owner: azureuser:azureuser + permissions: "0755" + defer: true + content: | + #!/usr/bin/env bash + set -euo pipefail + export PATH="$HOME/.local/bin:$PATH" + + echo "=== Cloning codeflash-agent ===" + git clone git@github.com:codeflash-ai/codeflash-agent.git ~/codeflash-agent + cd ~/codeflash-agent + + echo "=== Installing dependencies ===" + uv sync + + echo "=== Creating results directory ===" + mkdir -p ~/results + + echo "=== Verifying installation ===" + .venv/bin/python -c 'from blackbox.models import LogEntry; print("OK")' + + echo "=== Done ===" + +runcmd: + - wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb + - dpkg -i /tmp/hyperfine.deb + - su - azureuser -c 'curl -LsSf https://astral.sh/uv/install.sh | sh' diff --git a/.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh b/.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh new file mode 100644 index 0000000..bac3c92 --- /dev/null +++ b/.codeflash/krrt7/codeflash-ai/blackbox/infra/vm-manage.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +# +# Azure benchmark VM lifecycle management for blackbox package +# +# Usage: +# bash infra/vm-manage.sh {create|start|stop|ip|ssh|bench |ab |destroy} + +set -euo pipefail + +RG="blackbox-BENCH-RG" +VM="blackbox-bench" +REGION="westus2" +SIZE="Standard_D2s_v5" +IMAGE="Canonical:ubuntu-24_04-lts:server:latest" +SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519.pub}" + +case "${1:-help}" in + create) + if [ ! -f "$SSH_KEY" ]; then + echo "Error: SSH public key not found at $SSH_KEY" + echo "Generate one: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519" + echo "Or set SSH_KEY=/path/to/key.pub" + exit 1 + fi + + echo "Creating resource group..." + az group create --name "$RG" --location "$REGION" --only-show-errors --output none + + echo "Creating VM (Trusted Launch, SSH-only, locked-down NSG)..." + az vm create \ + --resource-group "$RG" \ + --name "$VM" \ + --image "$IMAGE" \ + --size "$SIZE" \ + --os-disk-size-gb 32 \ + --admin-username azureuser \ + --ssh-key-values "$SSH_KEY" \ + --authentication-type ssh \ + --security-type TrustedLaunch \ + --enable-secure-boot true \ + --enable-vtpm true \ + --nsg-rule NONE \ + --custom-data infra/cloud-init.yaml \ + --only-show-errors + + MY_IP=$(curl -s ifconfig.me) + echo "Restricting SSH to $MY_IP..." + az network nsg rule create \ + --resource-group "$RG" \ + --nsg-name "${VM}NSG" \ + --name AllowSSHFromMyIP \ + --priority 1000 \ + --source-address-prefixes "$MY_IP/32" \ + --destination-port-ranges 22 \ + --access Allow \ + --protocol Tcp \ + --output none + + echo "VM created. Get IP with: $0 ip" + ;; + + start) + echo "Starting VM..." + az vm start --resource-group "$RG" --name "$VM" + echo "Started. IP: $(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)" + ;; + + stop) + echo "Deallocating VM (stops billing)..." + az vm deallocate --resource-group "$RG" --name "$VM" + echo "Deallocated." + ;; + + ip) + az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv + ;; + + ssh) + IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv) + ssh -A azureuser@"$IP" "${@:2}" + ;; + + bench) + BRANCH="${2:?Usage: $0 bench }" + IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv) + ssh -A azureuser@"$IP" "bash ~/bench/bench_blackbox.sh $BRANCH" + ;; + + ab) + BASE="${2:?Usage: $0 ab }" + OPT="${3:?Usage: $0 ab }" + IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv) + ssh -A azureuser@"$IP" "bash ~/bench/bench_ab.sh $BASE $OPT" + ;; + + destroy) + echo "Destroying resource group (all resources)..." + az group delete --name "$RG" --yes --no-wait + echo "Deletion started." + ;; + + help|*) + echo "Usage: $0 {create|start|stop|ip|ssh|bench |ab |destroy}" + echo "" + echo " create - Provision VM with cloud-init" + echo " start - Start deallocated VM" + echo " stop - Deallocate VM (stops billing)" + echo " ip - Show VM public IP" + echo " ssh - SSH into VM with agent forwarding" + echo " bench - Run benchmarks on a branch" + echo " ab - A/B comparison between two branches" + echo " destroy - Delete resource group and all resources" + ;; +esac