codeflash-agent/.codeflash/netflix/metaflow/infra/vm-manage.sh
Kevin Turcios 3b59d97647 squash
2026-04-13 14:12:17 -05:00

106 lines
2.9 KiB
Bash

#!/usr/bin/env bash
#
# Azure benchmark VM lifecycle management for Netflix/metaflow
#
# Usage:
# bash infra/vm-manage.sh {create|start|stop|ip|ssh|bench <branch>|destroy}
set -euo pipefail
RG="metaflow-BENCH-RG"
VM="metaflow-bench"
REGION="westus2"
SIZE="Standard_D2s_v5"
IMAGE="Canonical:ubuntu-24_04-lts:server:latest"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519.pub}"
case "${1:-help}" in
create)
if [ ! -f "$SSH_KEY" ]; then
echo "Error: SSH public key not found at $SSH_KEY"
echo "Generate one: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519"
echo "Or set SSH_KEY=/path/to/key.pub"
exit 1
fi
echo "Creating resource group..."
az group create --name "$RG" --location "$REGION" --only-show-errors --output none
echo "Creating VM (Trusted Launch, SSH-only, locked-down NSG)..."
az vm create \
--resource-group "$RG" \
--name "$VM" \
--image "$IMAGE" \
--size "$SIZE" \
--os-disk-size-gb 64 \
--admin-username azureuser \
--ssh-key-values "$SSH_KEY" \
--authentication-type ssh \
--security-type TrustedLaunch \
--enable-secure-boot true \
--enable-vtpm true \
--nsg-rule NONE \
--custom-data infra/cloud-init.yaml \
--only-show-errors
MY_IP=$(curl -s ifconfig.me)
echo "Restricting SSH to $MY_IP..."
az network nsg rule create \
--resource-group "$RG" \
--nsg-name "${VM}NSG" \
--name AllowSSHFromMyIP \
--priority 1000 \
--source-address-prefixes "$MY_IP/32" \
--destination-port-ranges 22 \
--access Allow \
--protocol Tcp \
--output none
echo "VM created. Get IP with: $0 ip"
;;
start)
echo "Starting VM..."
az vm start --resource-group "$RG" --name "$VM"
echo "Started. IP: $(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)"
;;
stop)
echo "Deallocating VM (stops billing)..."
az vm deallocate --resource-group "$RG" --name "$VM"
echo "Deallocated."
;;
ip)
az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv
;;
ssh)
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
ssh -A azureuser@"$IP" "${@:2}"
;;
bench)
BRANCH="${2:?Usage: $0 bench <branch>}"
IP=$(az vm show -g "$RG" -n "$VM" -d --query publicIps -o tsv)
ssh -A azureuser@"$IP" "bash ~/bench/bench_compare.sh $BRANCH"
;;
destroy)
echo "Destroying resource group (all resources)..."
az group delete --name "$RG" --yes --no-wait
echo "Deletion started."
;;
help|*)
echo "Usage: $0 {create|start|stop|ip|ssh|bench <branch>|destroy}"
echo ""
echo " create - Provision VM with cloud-init"
echo " start - Start deallocated VM"
echo " stop - Deallocate VM (stops billing)"
echo " ip - Show VM public IP"
echo " ssh - SSH into VM"
echo " bench - Run benchmarks on a branch"
echo " destroy - Delete resource group and all resources"
;;
esac