mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
Measures median wall-clock time for --version, --help, auth status,
and compare --help across 30 runs with 3 warmups.
Usage:
codeflash compare main codeflash/optimize \
--script "python benchmarks/bench_cli_startup.py" \
--script-output benchmarks/results.json
72 lines
2.2 KiB
Python
72 lines
2.2 KiB
Python
"""Benchmark CLI startup latency for codeflash compare --script mode.
|
|
|
|
Run from a worktree root. Installs deps via uv sync, then times several
|
|
CLI entry points and writes a JSON file mapping command names to median
|
|
wall-clock seconds.
|
|
|
|
Usage:
|
|
codeflash compare main codeflash/optimize \
|
|
--script "python benchmarks/bench_cli_startup.py" \
|
|
--script-output benchmarks/results.json
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
|
|
WARMUP = 3
|
|
RUNS = 30
|
|
OUTPUT = os.environ.get("BENCH_OUTPUT", "benchmarks/results.json")
|
|
|
|
COMMANDS: dict[str, list[str]] = {
|
|
"version": ["uv", "run", "codeflash", "--version"],
|
|
"help": ["uv", "run", "codeflash", "--help"],
|
|
"auth_status": ["uv", "run", "codeflash", "auth", "status"],
|
|
"compare_help": ["uv", "run", "codeflash", "compare", "--help"],
|
|
}
|
|
|
|
|
|
def measure(cmd: list[str], warmup: int = WARMUP, runs: int = RUNS) -> float:
|
|
"""Return median wall-clock seconds for *cmd* over *runs* iterations."""
|
|
env = {**os.environ, "CODEFLASH_API_KEY": "bench_dummy_key"}
|
|
for _ in range(warmup):
|
|
subprocess.run(cmd, capture_output=True, check=False, env=env)
|
|
|
|
times: list[float] = []
|
|
for _ in range(runs):
|
|
t0 = time.perf_counter()
|
|
subprocess.run(cmd, capture_output=True, check=False, env=env)
|
|
times.append(time.perf_counter() - t0)
|
|
|
|
times.sort()
|
|
mid = len(times) // 2
|
|
return times[mid] if len(times) % 2 else (times[mid - 1] + times[mid]) / 2
|
|
|
|
|
|
def main() -> None:
|
|
# Ensure deps are installed in the worktree
|
|
subprocess.run(["uv", "sync"], check=True, capture_output=True)
|
|
|
|
results: dict[str, float] = {}
|
|
for name, cmd in COMMANDS.items():
|
|
print(f" {name}: ", end="", flush=True)
|
|
median = measure(cmd)
|
|
results[name] = round(median, 4)
|
|
print(f"{median * 1000:.0f} ms")
|
|
|
|
# Total = sum of medians (useful for a single summary number)
|
|
results["__total__"] = round(sum(results.values()), 4)
|
|
|
|
output_path = Path(OUTPUT)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with output_path.open("w") as f:
|
|
json.dump(results, f, indent=2)
|
|
print(f"\nResults written to {OUTPUT}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|