mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Add team member dimension to case study paths so multiple contributors can track optimization data independently. Derives member from git config user.name in session-start hooks. - Move all case studies under .codeflash/krrt7/ - Rename pypa/pip → python/pip (org grouping) - Update session-start hooks, docs, scripts, and references
333 lines
8.4 KiB
Bash
Executable file
333 lines
8.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
#
|
|
# Single source of truth for optimization project scaffolding.
|
|
#
|
|
# Usage:
|
|
# bash scripts/scaffold.sh <org> <project> <output-dir>
|
|
#
|
|
# Called by:
|
|
# - make bootstrap ORG=roboflow PROJECTS="supervision"
|
|
# - .claude/hooks/session-start.sh (auto-scaffolds .codeflash/{teammember}/{org}/{project}/)
|
|
|
|
set -euo pipefail
|
|
|
|
ORG="${1:?Usage: $0 <org> <project> <output-dir>}"
|
|
PROJECT="${2:?Usage: $0 <org> <project> <output-dir>}"
|
|
DEST="${3:?Usage: $0 <org> <project> <output-dir>}"
|
|
|
|
if [ -d "$DEST" ]; then
|
|
echo " Skipping $ORG/$PROJECT (already exists at $DEST)"
|
|
exit 0
|
|
fi
|
|
|
|
# Uppercase variants for Azure resource naming
|
|
ORG_UPPER=$(echo "$ORG" | tr '[:lower:]' '[:upper:]')
|
|
PROJECT_UPPER=$(echo "$PROJECT" | tr '[:lower:]' '[:upper:]')
|
|
|
|
mkdir -p "$DEST/bench" "$DEST/data" "$DEST/infra"
|
|
|
|
# --- status.md ---
|
|
cat > "$DEST/status.md" <<STATUS
|
|
# $PROJECT Status
|
|
|
|
Last updated: $(date +%Y-%m-%d)
|
|
|
|
## Current state
|
|
|
|
|
|
## Target repo
|
|
|
|
\`~/Desktop/work/${ORG}_org/$PROJECT\`
|
|
|
|
## PRs
|
|
|
|
| PR | Branch | Status | Description |
|
|
|---|---|---|---|
|
|
|
|
## Key results
|
|
|
|
|
|
## VM
|
|
|
|
- **IP**:
|
|
- **Size**: Standard_D2s_v5
|
|
- **RG**: ${PROJECT_UPPER}-BENCH-RG
|
|
- **State**: Not provisioned
|
|
|
|
## Next
|
|
|
|
|
|
## Notes
|
|
|
|
STATUS
|
|
|
|
# --- README.md ---
|
|
cat > "$DEST/README.md" <<'README_HEADER'
|
|
# <PROJECT> Performance Optimization
|
|
|
|
<!-- One-line description of what was optimized and why -->
|
|
|
|
## Results
|
|
|
|
**Environment**: <!-- Python X.Y -->, <!-- OS + architecture -->, hyperfine (warmup 5, min-runs 30)
|
|
|
|
| Benchmark | Before | After | Speedup |
|
|
|---|---:|---:|---:|
|
|
| | | | |
|
|
|
|
## What We Changed
|
|
|
|
### Startup / Import
|
|
|
|
-
|
|
|
|
### Architecture
|
|
|
|
-
|
|
|
|
### Micro-optimizations
|
|
|
|
-
|
|
|
|
### I/O
|
|
|
|
-
|
|
|
|
## Upstream Contributions
|
|
|
|
| PR | Status | Description |
|
|
|---|---|---|
|
|
| [org/repo#N](https://github.com/org/repo/pull/N) | <!-- Open/Merged --> | |
|
|
|
|
## Methodology
|
|
|
|
### Environment
|
|
|
|
- **VM**: <!-- e.g., Azure Standard_D2s_v5 (2 vCPU, 8 GB RAM) -->
|
|
- **OS**: Ubuntu 24.04 LTS
|
|
- **Language**: <!-- e.g., Python 3.13 -->
|
|
- **Tooling**: hyperfine (warmup 5, min-runs 30)
|
|
|
|
### Profiling approach
|
|
|
|
1. <!-- e.g., python -X importtime -->
|
|
2. <!-- e.g., cProfile / py-spy -->
|
|
3. <!-- e.g., E2E hyperfine -->
|
|
|
|
## Repo Structure
|
|
|
|
```
|
|
.
|
|
├── README.md # This file
|
|
├── status.md # Session state tracking
|
|
├── bench/ # Benchmark scripts
|
|
├── data/ # Raw benchmark data
|
|
│ └── results.tsv
|
|
└── infra/ # VM provisioning
|
|
├── cloud-init.yaml
|
|
└── vm-manage.sh
|
|
```
|
|
README_HEADER
|
|
|
|
# Substitute project name into README
|
|
sed -i '' "s|<PROJECT>|$PROJECT|g" "$DEST/README.md"
|
|
|
|
# --- bench/.gitkeep ---
|
|
touch "$DEST/bench/.gitkeep"
|
|
|
|
# --- data/results.tsv ---
|
|
cat > "$DEST/data/results.tsv" <<'TSV'
|
|
date commit target metric before after speedup notes
|
|
TSV
|
|
|
|
# --- infra/cloud-init.yaml ---
|
|
cat > "$DEST/infra/cloud-init.yaml" <<CLOUDINIT
|
|
#cloud-config
|
|
package_update: true
|
|
packages:
|
|
- git
|
|
- build-essential
|
|
- curl
|
|
- wget
|
|
- jq
|
|
- linux-tools-common
|
|
- linux-tools-generic
|
|
|
|
write_files:
|
|
- path: /home/azureuser/setup_${PROJECT}.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
export PATH="\$HOME/.local/bin:\$PATH"
|
|
|
|
echo "=== Installing uv ==="
|
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
export PATH="\$HOME/.local/bin:\$PATH"
|
|
|
|
echo "=== Installing Python ==="
|
|
uv python install 3.13
|
|
|
|
echo "=== Cloning $PROJECT ==="
|
|
git clone https://github.com/$ORG/$PROJECT.git ~/\$PROJECT
|
|
|
|
echo "=== Creating venv and installing ==="
|
|
cd ~/$PROJECT
|
|
uv venv --python 3.13
|
|
uv pip install -e .
|
|
|
|
echo "=== Installing profiling tools ==="
|
|
uv pip install memray py-spy
|
|
|
|
echo "=== Creating results directory ==="
|
|
mkdir -p ~/results
|
|
|
|
echo "=== Done ==="
|
|
|
|
- path: /home/azureuser/bin/gh-auth-token.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
if [ -z "\${GH_TOKEN:-}" ]; then
|
|
echo "Error: GH_TOKEN not set. Pass it via:"
|
|
echo " export GH_TOKEN=ghp_... && ssh -o SendEnv=GH_TOKEN azureuser@<ip> 'bash ~/bin/gh-auth-token.sh'"
|
|
exit 1
|
|
fi
|
|
echo "\$GH_TOKEN" | gh auth login --with-token
|
|
gh auth status
|
|
|
|
runcmd:
|
|
- wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb
|
|
- dpkg -i /tmp/hyperfine.deb
|
|
# Install GitHub CLI
|
|
- curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /usr/share/keyrings/githubcli-archive-keyring.gpg
|
|
- chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg
|
|
- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list
|
|
- apt-get update -qq && apt-get install -y gh
|
|
- su - azureuser -c 'bash /home/azureuser/setup_${PROJECT}.sh'
|
|
CLOUDINIT
|
|
|
|
# --- infra/vm-manage.sh ---
|
|
cat > "$DEST/infra/vm-manage.sh" <<VMMANAGE
|
|
#!/usr/bin/env bash
|
|
# Manage the $PROJECT-bench Azure VM
|
|
set -euo pipefail
|
|
|
|
RG="${PROJECT_UPPER}-BENCH-RG"
|
|
VM="$PROJECT-bench"
|
|
REGION="westus2"
|
|
SIZE="Standard_D2s_v5"
|
|
IMAGE="Canonical:ubuntu-24_04-lts:server:latest"
|
|
SSH_KEY="\${SSH_KEY:-\$HOME/.ssh/id_ed25519.pub}"
|
|
|
|
case "\${1:-help}" in
|
|
create)
|
|
if [ ! -f "\$SSH_KEY" ]; then
|
|
echo "Error: SSH public key not found at \$SSH_KEY"
|
|
echo "Generate one: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519"
|
|
echo "Or set SSH_KEY=/path/to/key.pub"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Creating resource group..."
|
|
az group create --name "\$RG" --location "\$REGION" --only-show-errors --output none
|
|
|
|
echo "Creating VM (Trusted Launch, SSH-only, locked-down NSG)..."
|
|
az vm create \\
|
|
--resource-group "\$RG" \\
|
|
--name "\$VM" \\
|
|
--image "\$IMAGE" \\
|
|
--size "\$SIZE" \\
|
|
--os-disk-size-gb 64 \\
|
|
--admin-username azureuser \\
|
|
--ssh-key-values "\$SSH_KEY" \\
|
|
--authentication-type ssh \\
|
|
--security-type TrustedLaunch \\
|
|
--enable-secure-boot true \\
|
|
--enable-vtpm true \\
|
|
--nsg-rule NONE \\
|
|
--custom-data infra/cloud-init.yaml \\
|
|
--only-show-errors
|
|
|
|
MY_IP=\$(curl -s ifconfig.me)
|
|
echo "Restricting SSH to \$MY_IP..."
|
|
az network nsg rule create \\
|
|
--resource-group "\$RG" \\
|
|
--nsg-name "\${VM}NSG" \\
|
|
--name AllowSSHFromMyIP \\
|
|
--priority 1000 \\
|
|
--source-address-prefixes "\$MY_IP/32" \\
|
|
--destination-port-ranges 22 \\
|
|
--access Allow \\
|
|
--protocol Tcp \\
|
|
--output none
|
|
|
|
echo "VM created. Get IP with: \$0 ip"
|
|
;;
|
|
|
|
start)
|
|
echo "Starting VM..."
|
|
az vm start --resource-group "\$RG" --name "\$VM"
|
|
echo "Started. IP: \$(az vm show -g "\$RG" -n "\$VM" -d --query publicIps -o tsv)"
|
|
;;
|
|
|
|
stop)
|
|
echo "Deallocating VM (stops billing)..."
|
|
az vm deallocate --resource-group "\$RG" --name "\$VM"
|
|
echo "Deallocated."
|
|
;;
|
|
|
|
ip)
|
|
az vm show -g "\$RG" -n "\$VM" -d --query publicIps -o tsv
|
|
;;
|
|
|
|
ssh)
|
|
IP=\$(az vm show -g "\$RG" -n "\$VM" -d --query publicIps -o tsv)
|
|
ssh -A azureuser@"\$IP" "\${@:2}"
|
|
;;
|
|
|
|
bench)
|
|
BRANCH="\${2:?Usage: \$0 bench <branch>}"
|
|
IP=\$(az vm show -g "\$RG" -n "\$VM" -d --query publicIps -o tsv)
|
|
ssh -A azureuser@"\$IP" "bash ~/bench/bench_all.sh \$BRANCH"
|
|
;;
|
|
|
|
gh-auth)
|
|
if [ -z "\${GH_TOKEN:-}" ]; then
|
|
echo "Error: GH_TOKEN not set."
|
|
echo "Usage: GH_TOKEN=ghp_... \$0 gh-auth"
|
|
exit 1
|
|
fi
|
|
IP=\$(az vm show -g "\$RG" -n "\$VM" -d --query publicIps -o tsv)
|
|
ssh -o SendEnv=GH_TOKEN azureuser@"\$IP" "bash ~/bin/gh-auth-token.sh"
|
|
;;
|
|
|
|
destroy)
|
|
echo "Destroying resource group (all resources)..."
|
|
az group delete --name "\$RG" --yes --no-wait
|
|
echo "Deletion started."
|
|
;;
|
|
|
|
help|*)
|
|
echo "Usage: \$0 {create|start|stop|ip|ssh|bench <branch>|gh-auth|destroy}"
|
|
echo ""
|
|
echo " create - Provision VM with cloud-init"
|
|
echo " start - Start deallocated VM"
|
|
echo " stop - Deallocate VM (stops billing)"
|
|
echo " ip - Show VM public IP"
|
|
echo " ssh - SSH into VM (with agent forwarding)"
|
|
echo " bench - Run benchmarks on a branch"
|
|
echo " gh-auth - Authenticate gh CLI on VM (requires GH_TOKEN)"
|
|
echo " destroy - Delete resource group and all resources"
|
|
;;
|
|
esac
|
|
VMMANAGE
|
|
|
|
chmod +x "$DEST/infra/vm-manage.sh"
|
|
|
|
echo " Scaffolded $ORG/$PROJECT → $DEST/"
|