codeflash-agent/.codeflash/coveragepy/coveragepy/infra/cloud-init.yaml
Kevin Turcios 3b59d97647 squash
2026-04-13 14:12:17 -05:00

250 lines
8.5 KiB
YAML

#cloud-config
package_update: true
packages:
- git
- build-essential
- curl
- wget
- jq
- linux-tools-common
- linux-tools-generic
write_files:
- path: /home/azureuser/bench/bench_numbits.py
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env python3
"""Micro-benchmark for coverage.py numbits operations."""
import json
import random
import sys
import timeit
sys.path.insert(0, "/home/azureuser/coveragepy")
from coverage.numbits import (
nums_to_numbits,
numbits_to_nums,
numbits_union,
numbits_intersection,
numbits_any_intersection,
num_in_numbits,
)
random.seed(42)
SMALL = set(random.sample(range(1, 200), 50))
MEDIUM = set(random.sample(range(1, 2000), 500))
LARGE = set(random.sample(range(1, 10000), 3000))
SMALL_NB = nums_to_numbits(SMALL)
MEDIUM_NB = nums_to_numbits(MEDIUM)
LARGE_NB = nums_to_numbits(LARGE)
SMALL_NB2 = nums_to_numbits(set(random.sample(range(1, 200), 50)))
MEDIUM_NB2 = nums_to_numbits(set(random.sample(range(1, 2000), 500)))
LARGE_NB2 = nums_to_numbits(set(random.sample(range(1, 10000), 3000)))
N = 10_000
benchmarks = {
"nums_to_numbits (small)": lambda: nums_to_numbits(SMALL),
"nums_to_numbits (medium)": lambda: nums_to_numbits(MEDIUM),
"nums_to_numbits (large)": lambda: nums_to_numbits(LARGE),
"numbits_to_nums (small)": lambda: numbits_to_nums(SMALL_NB),
"numbits_to_nums (medium)": lambda: numbits_to_nums(MEDIUM_NB),
"numbits_to_nums (large)": lambda: numbits_to_nums(LARGE_NB),
"numbits_union (small)": lambda: numbits_union(SMALL_NB, SMALL_NB2),
"numbits_union (medium)": lambda: numbits_union(MEDIUM_NB, MEDIUM_NB2),
"numbits_union (large)": lambda: numbits_union(LARGE_NB, LARGE_NB2),
"numbits_intersection (small)": lambda: numbits_intersection(SMALL_NB, SMALL_NB2),
"numbits_intersection (medium)": lambda: numbits_intersection(MEDIUM_NB, MEDIUM_NB2),
"numbits_intersection (large)": lambda: numbits_intersection(LARGE_NB, LARGE_NB2),
"numbits_any_intersection (small)": lambda: numbits_any_intersection(SMALL_NB, SMALL_NB2),
"numbits_any_intersection (medium)": lambda: numbits_any_intersection(MEDIUM_NB, MEDIUM_NB2),
"numbits_any_intersection (large)": lambda: numbits_any_intersection(LARGE_NB, LARGE_NB2),
"num_in_numbits (small)": lambda: num_in_numbits(100, SMALL_NB),
"num_in_numbits (medium)": lambda: num_in_numbits(1000, MEDIUM_NB),
"num_in_numbits (large)": lambda: num_in_numbits(5000, LARGE_NB),
}
outfile = sys.argv[1] if len(sys.argv) > 1 else None
results = {}
print(f"{'Benchmark':<45} {'Time (us)':>12}")
print("-" * 58)
for name, func in benchmarks.items():
t = timeit.timeit(func, number=N)
us = t / N * 1_000_000
results[name] = us
print(f"{name:<45} {us:>10.2f}us")
if outfile:
with open(outfile, "w") as f:
json.dump(results, f, indent=2)
print(f"\nJSON written to {outfile}")
- path: /home/azureuser/bench/bench_e2e.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
PYTHON="$HOME/coveragepy/.venv/bin/python"
COVERAGE="$HOME/coveragepy/.venv/bin/coverage"
echo "=== coverage.py E2E benchmarks ==="
echo "Python: $($PYTHON --version)"
echo "Coverage: $($COVERAGE --version | head -1)"
echo ""
# Create a synthetic workload: many-file project
WORKLOAD="$HOME/bench/workload"
if [ ! -d "$WORKLOAD" ]; then
echo "--- Creating synthetic workload ---"
mkdir -p "$WORKLOAD"
$PYTHON -c "
import os
for i in range(200):
with open(os.path.join('$WORKLOAD', f'mod_{i}.py'), 'w') as f:
f.write(f'def func_{i}():\n')
for j in range(50):
f.write(f' x_{j} = {j} * {i}\n')
f.write(f' return x_0\n\n')
with open(os.path.join('$WORKLOAD', 'run_all.py'), 'w') as f:
for i in range(200):
f.write(f'from mod_{i} import func_{i}\n')
for i in range(200):
f.write(f'func_{i}()\n')
"
fi
echo "--- coverage run (200 modules, 50 lines each) ---"
hyperfine --warmup 5 --min-runs 30 --shell=none \
--command-name 'coverage run' \
"$COVERAGE run $WORKLOAD/run_all.py"
echo ""
echo "--- coverage json (report generation) ---"
$COVERAGE run "$WORKLOAD/run_all.py" 2>/dev/null
hyperfine --warmup 3 --min-runs 20 --shell=none \
--command-name 'coverage json' \
"$COVERAGE json -o /dev/null"
echo ""
echo "--- baseline (no coverage) ---"
hyperfine --warmup 5 --min-runs 30 --shell=none \
--command-name 'no coverage' \
"$PYTHON $WORKLOAD/run_all.py"
- path: /home/azureuser/bench/bench_all.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
BRANCH="${1:?Usage: bench_all.sh <branch>}"
TS=$(date +%Y%m%d-%H%M%S)
OUTDIR="$HOME/results/${BRANCH//\//-}-${TS}"
mkdir -p "$OUTDIR"
PYTHON="$HOME/coveragepy/.venv/bin/python"
cd ~/coveragepy
git fetch origin
git checkout "$BRANCH"
export PATH="$HOME/.local/bin:$PATH"
uv pip install -e .
echo "=== Benchmarking branch: $BRANCH ==="
echo "Output: $OUTDIR"
echo ""
echo "--- Micro: numbits ---"
$PYTHON ~/bench/bench_numbits.py "$OUTDIR/numbits.json"
echo ""
echo "--- E2E ---"
bash ~/bench/bench_e2e.sh 2>&1 | tee "$OUTDIR/e2e.txt"
echo ""
echo "Results saved to $OUTDIR/"
ls -la "$OUTDIR/"
- path: /home/azureuser/bench/bench_compare.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
BASE="${1:?Usage: bench_compare.sh <base-branch> <opt-branch>}"
OPT="${2:?Usage: bench_compare.sh <base-branch> <opt-branch>}"
echo "=== Comparing $BASE vs $OPT ==="
bash ~/bench/bench_all.sh "$BASE"
bash ~/bench/bench_all.sh "$OPT"
echo ""
echo "Compare results in ~/results/"
ls ~/results/
- path: /home/azureuser/setup_coveragepy.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
export PATH="$HOME/.local/bin:$PATH"
echo "=== Installing uv ==="
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
echo "=== Installing Python ==="
uv python install 3.13
echo "=== Cloning coveragepy ==="
git clone https://github.com/nedbat/coveragepy.git ~/coveragepy
echo "=== Creating venv and installing ==="
cd ~/coveragepy
uv venv --python 3.13
uv pip install -e ".[dev]"
echo "=== Installing profiling tools ==="
uv pip install memray py-spy
echo "=== Creating results directory ==="
mkdir -p ~/results
echo "=== Done ==="
~/coveragepy/.venv/bin/python -c "import coverage; print(f'coverage {coverage.__version__} installed')"
- path: /home/azureuser/bin/gh-auth-token.sh
owner: azureuser:azureuser
permissions: "0755"
defer: true
content: |
#!/usr/bin/env bash
set -euo pipefail
if [ -z "${GH_TOKEN:-}" ]; then
echo "Error: GH_TOKEN not set. Pass it via:"
echo " export GH_TOKEN=ghp_... && ssh -o SendEnv=GH_TOKEN azureuser@<ip> 'bash ~/bin/gh-auth-token.sh'"
exit 1
fi
echo "$GH_TOKEN" | gh auth login --with-token
gh auth status
runcmd:
- wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb
- dpkg -i /tmp/hyperfine.deb
# Install GitHub CLI
- curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /usr/share/keyrings/githubcli-archive-keyring.gpg
- chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg
- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list
- apt-get update -qq && apt-get install -y gh
- su - azureuser -c 'bash /home/azureuser/setup_coveragepy.sh'