mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
250 lines
8.5 KiB
YAML
250 lines
8.5 KiB
YAML
#cloud-config
|
|
package_update: true
|
|
packages:
|
|
- git
|
|
- build-essential
|
|
- curl
|
|
- wget
|
|
- jq
|
|
- linux-tools-common
|
|
- linux-tools-generic
|
|
|
|
write_files:
|
|
- path: /home/azureuser/bench/bench_numbits.py
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env python3
|
|
"""Micro-benchmark for coverage.py numbits operations."""
|
|
import json
|
|
import random
|
|
import sys
|
|
import timeit
|
|
|
|
sys.path.insert(0, "/home/azureuser/coveragepy")
|
|
from coverage.numbits import (
|
|
nums_to_numbits,
|
|
numbits_to_nums,
|
|
numbits_union,
|
|
numbits_intersection,
|
|
numbits_any_intersection,
|
|
num_in_numbits,
|
|
)
|
|
|
|
random.seed(42)
|
|
|
|
SMALL = set(random.sample(range(1, 200), 50))
|
|
MEDIUM = set(random.sample(range(1, 2000), 500))
|
|
LARGE = set(random.sample(range(1, 10000), 3000))
|
|
|
|
SMALL_NB = nums_to_numbits(SMALL)
|
|
MEDIUM_NB = nums_to_numbits(MEDIUM)
|
|
LARGE_NB = nums_to_numbits(LARGE)
|
|
|
|
SMALL_NB2 = nums_to_numbits(set(random.sample(range(1, 200), 50)))
|
|
MEDIUM_NB2 = nums_to_numbits(set(random.sample(range(1, 2000), 500)))
|
|
LARGE_NB2 = nums_to_numbits(set(random.sample(range(1, 10000), 3000)))
|
|
|
|
N = 10_000
|
|
|
|
benchmarks = {
|
|
"nums_to_numbits (small)": lambda: nums_to_numbits(SMALL),
|
|
"nums_to_numbits (medium)": lambda: nums_to_numbits(MEDIUM),
|
|
"nums_to_numbits (large)": lambda: nums_to_numbits(LARGE),
|
|
"numbits_to_nums (small)": lambda: numbits_to_nums(SMALL_NB),
|
|
"numbits_to_nums (medium)": lambda: numbits_to_nums(MEDIUM_NB),
|
|
"numbits_to_nums (large)": lambda: numbits_to_nums(LARGE_NB),
|
|
"numbits_union (small)": lambda: numbits_union(SMALL_NB, SMALL_NB2),
|
|
"numbits_union (medium)": lambda: numbits_union(MEDIUM_NB, MEDIUM_NB2),
|
|
"numbits_union (large)": lambda: numbits_union(LARGE_NB, LARGE_NB2),
|
|
"numbits_intersection (small)": lambda: numbits_intersection(SMALL_NB, SMALL_NB2),
|
|
"numbits_intersection (medium)": lambda: numbits_intersection(MEDIUM_NB, MEDIUM_NB2),
|
|
"numbits_intersection (large)": lambda: numbits_intersection(LARGE_NB, LARGE_NB2),
|
|
"numbits_any_intersection (small)": lambda: numbits_any_intersection(SMALL_NB, SMALL_NB2),
|
|
"numbits_any_intersection (medium)": lambda: numbits_any_intersection(MEDIUM_NB, MEDIUM_NB2),
|
|
"numbits_any_intersection (large)": lambda: numbits_any_intersection(LARGE_NB, LARGE_NB2),
|
|
"num_in_numbits (small)": lambda: num_in_numbits(100, SMALL_NB),
|
|
"num_in_numbits (medium)": lambda: num_in_numbits(1000, MEDIUM_NB),
|
|
"num_in_numbits (large)": lambda: num_in_numbits(5000, LARGE_NB),
|
|
}
|
|
|
|
outfile = sys.argv[1] if len(sys.argv) > 1 else None
|
|
results = {}
|
|
|
|
print(f"{'Benchmark':<45} {'Time (us)':>12}")
|
|
print("-" * 58)
|
|
for name, func in benchmarks.items():
|
|
t = timeit.timeit(func, number=N)
|
|
us = t / N * 1_000_000
|
|
results[name] = us
|
|
print(f"{name:<45} {us:>10.2f}us")
|
|
|
|
if outfile:
|
|
with open(outfile, "w") as f:
|
|
json.dump(results, f, indent=2)
|
|
print(f"\nJSON written to {outfile}")
|
|
|
|
- path: /home/azureuser/bench/bench_e2e.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
PYTHON="$HOME/coveragepy/.venv/bin/python"
|
|
COVERAGE="$HOME/coveragepy/.venv/bin/coverage"
|
|
|
|
echo "=== coverage.py E2E benchmarks ==="
|
|
echo "Python: $($PYTHON --version)"
|
|
echo "Coverage: $($COVERAGE --version | head -1)"
|
|
echo ""
|
|
|
|
# Create a synthetic workload: many-file project
|
|
WORKLOAD="$HOME/bench/workload"
|
|
if [ ! -d "$WORKLOAD" ]; then
|
|
echo "--- Creating synthetic workload ---"
|
|
mkdir -p "$WORKLOAD"
|
|
$PYTHON -c "
|
|
import os
|
|
for i in range(200):
|
|
with open(os.path.join('$WORKLOAD', f'mod_{i}.py'), 'w') as f:
|
|
f.write(f'def func_{i}():\n')
|
|
for j in range(50):
|
|
f.write(f' x_{j} = {j} * {i}\n')
|
|
f.write(f' return x_0\n\n')
|
|
with open(os.path.join('$WORKLOAD', 'run_all.py'), 'w') as f:
|
|
for i in range(200):
|
|
f.write(f'from mod_{i} import func_{i}\n')
|
|
for i in range(200):
|
|
f.write(f'func_{i}()\n')
|
|
"
|
|
fi
|
|
|
|
echo "--- coverage run (200 modules, 50 lines each) ---"
|
|
hyperfine --warmup 5 --min-runs 30 --shell=none \
|
|
--command-name 'coverage run' \
|
|
"$COVERAGE run $WORKLOAD/run_all.py"
|
|
|
|
echo ""
|
|
echo "--- coverage json (report generation) ---"
|
|
$COVERAGE run "$WORKLOAD/run_all.py" 2>/dev/null
|
|
hyperfine --warmup 3 --min-runs 20 --shell=none \
|
|
--command-name 'coverage json' \
|
|
"$COVERAGE json -o /dev/null"
|
|
|
|
echo ""
|
|
echo "--- baseline (no coverage) ---"
|
|
hyperfine --warmup 5 --min-runs 30 --shell=none \
|
|
--command-name 'no coverage' \
|
|
"$PYTHON $WORKLOAD/run_all.py"
|
|
|
|
- path: /home/azureuser/bench/bench_all.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
BRANCH="${1:?Usage: bench_all.sh <branch>}"
|
|
TS=$(date +%Y%m%d-%H%M%S)
|
|
OUTDIR="$HOME/results/${BRANCH//\//-}-${TS}"
|
|
mkdir -p "$OUTDIR"
|
|
PYTHON="$HOME/coveragepy/.venv/bin/python"
|
|
|
|
cd ~/coveragepy
|
|
git fetch origin
|
|
git checkout "$BRANCH"
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
uv pip install -e .
|
|
|
|
echo "=== Benchmarking branch: $BRANCH ==="
|
|
echo "Output: $OUTDIR"
|
|
echo ""
|
|
|
|
echo "--- Micro: numbits ---"
|
|
$PYTHON ~/bench/bench_numbits.py "$OUTDIR/numbits.json"
|
|
|
|
echo ""
|
|
echo "--- E2E ---"
|
|
bash ~/bench/bench_e2e.sh 2>&1 | tee "$OUTDIR/e2e.txt"
|
|
|
|
echo ""
|
|
echo "Results saved to $OUTDIR/"
|
|
ls -la "$OUTDIR/"
|
|
|
|
- path: /home/azureuser/bench/bench_compare.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
BASE="${1:?Usage: bench_compare.sh <base-branch> <opt-branch>}"
|
|
OPT="${2:?Usage: bench_compare.sh <base-branch> <opt-branch>}"
|
|
|
|
echo "=== Comparing $BASE vs $OPT ==="
|
|
bash ~/bench/bench_all.sh "$BASE"
|
|
bash ~/bench/bench_all.sh "$OPT"
|
|
|
|
echo ""
|
|
echo "Compare results in ~/results/"
|
|
ls ~/results/
|
|
|
|
- path: /home/azureuser/setup_coveragepy.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
|
|
echo "=== Installing uv ==="
|
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
|
|
echo "=== Installing Python ==="
|
|
uv python install 3.13
|
|
|
|
echo "=== Cloning coveragepy ==="
|
|
git clone https://github.com/nedbat/coveragepy.git ~/coveragepy
|
|
|
|
echo "=== Creating venv and installing ==="
|
|
cd ~/coveragepy
|
|
uv venv --python 3.13
|
|
uv pip install -e ".[dev]"
|
|
|
|
echo "=== Installing profiling tools ==="
|
|
uv pip install memray py-spy
|
|
|
|
echo "=== Creating results directory ==="
|
|
mkdir -p ~/results
|
|
|
|
echo "=== Done ==="
|
|
~/coveragepy/.venv/bin/python -c "import coverage; print(f'coverage {coverage.__version__} installed')"
|
|
|
|
- path: /home/azureuser/bin/gh-auth-token.sh
|
|
owner: azureuser:azureuser
|
|
permissions: "0755"
|
|
defer: true
|
|
content: |
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
if [ -z "${GH_TOKEN:-}" ]; then
|
|
echo "Error: GH_TOKEN not set. Pass it via:"
|
|
echo " export GH_TOKEN=ghp_... && ssh -o SendEnv=GH_TOKEN azureuser@<ip> 'bash ~/bin/gh-auth-token.sh'"
|
|
exit 1
|
|
fi
|
|
echo "$GH_TOKEN" | gh auth login --with-token
|
|
gh auth status
|
|
|
|
runcmd:
|
|
- wget -q https://github.com/sharkdp/hyperfine/releases/download/v1.19.0/hyperfine_1.19.0_amd64.deb -O /tmp/hyperfine.deb
|
|
- dpkg -i /tmp/hyperfine.deb
|
|
# Install GitHub CLI
|
|
- curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /usr/share/keyrings/githubcli-archive-keyring.gpg
|
|
- chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg
|
|
- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list
|
|
- apt-get update -qq && apt-get install -y gh
|
|
- su - azureuser -c 'bash /home/azureuser/setup_coveragepy.sh'
|