mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
bench: add CLI startup benchmark for codeflash compare --script
Measures median wall-clock time for --version, --help, auth status,
and compare --help across 30 runs with 3 warmups.
Usage:
codeflash compare main codeflash/optimize \
--script "python benchmarks/bench_cli_startup.py" \
--script-output benchmarks/results.json
This commit is contained in:
parent
b533f50bdc
commit
2208e8ca77
2 changed files with 72 additions and 0 deletions
0
benchmarks/__init__.py
Normal file
0
benchmarks/__init__.py
Normal file
72
benchmarks/bench_cli_startup.py
Normal file
72
benchmarks/bench_cli_startup.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""Benchmark CLI startup latency for codeflash compare --script mode.
|
||||
|
||||
Run from a worktree root. Installs deps via uv sync, then times several
|
||||
CLI entry points and writes a JSON file mapping command names to median
|
||||
wall-clock seconds.
|
||||
|
||||
Usage:
|
||||
codeflash compare main codeflash/optimize \
|
||||
--script "python benchmarks/bench_cli_startup.py" \
|
||||
--script-output benchmarks/results.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
WARMUP = 3
|
||||
RUNS = 30
|
||||
OUTPUT = os.environ.get("BENCH_OUTPUT", "benchmarks/results.json")
|
||||
|
||||
COMMANDS: dict[str, list[str]] = {
|
||||
"version": ["uv", "run", "codeflash", "--version"],
|
||||
"help": ["uv", "run", "codeflash", "--help"],
|
||||
"auth_status": ["uv", "run", "codeflash", "auth", "status"],
|
||||
"compare_help": ["uv", "run", "codeflash", "compare", "--help"],
|
||||
}
|
||||
|
||||
|
||||
def measure(cmd: list[str], warmup: int = WARMUP, runs: int = RUNS) -> float:
|
||||
"""Return median wall-clock seconds for *cmd* over *runs* iterations."""
|
||||
env = {**os.environ, "CODEFLASH_API_KEY": "bench_dummy_key"}
|
||||
for _ in range(warmup):
|
||||
subprocess.run(cmd, capture_output=True, check=False, env=env)
|
||||
|
||||
times: list[float] = []
|
||||
for _ in range(runs):
|
||||
t0 = time.perf_counter()
|
||||
subprocess.run(cmd, capture_output=True, check=False, env=env)
|
||||
times.append(time.perf_counter() - t0)
|
||||
|
||||
times.sort()
|
||||
mid = len(times) // 2
|
||||
return times[mid] if len(times) % 2 else (times[mid - 1] + times[mid]) / 2
|
||||
|
||||
|
||||
def main() -> None:
|
||||
# Ensure deps are installed in the worktree
|
||||
subprocess.run(["uv", "sync"], check=True, capture_output=True)
|
||||
|
||||
results: dict[str, float] = {}
|
||||
for name, cmd in COMMANDS.items():
|
||||
print(f" {name}: ", end="", flush=True)
|
||||
median = measure(cmd)
|
||||
results[name] = round(median, 4)
|
||||
print(f"{median * 1000:.0f} ms")
|
||||
|
||||
# Total = sum of medians (useful for a single summary number)
|
||||
results["__total__"] = round(sum(results.values()), 4)
|
||||
|
||||
output_path = Path(OUTPUT)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
print(f"\nResults written to {OUTPUT}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue