From 2208e8ca77b40a7703edec65ccaed99cc7271601 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 9 Apr 2026 23:59:26 -0500 Subject: [PATCH] bench: add CLI startup benchmark for codeflash compare --script Measures median wall-clock time for --version, --help, auth status, and compare --help across 30 runs with 3 warmups. Usage: codeflash compare main codeflash/optimize \ --script "python benchmarks/bench_cli_startup.py" \ --script-output benchmarks/results.json --- benchmarks/__init__.py | 0 benchmarks/bench_cli_startup.py | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/bench_cli_startup.py diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/bench_cli_startup.py b/benchmarks/bench_cli_startup.py new file mode 100644 index 000000000..e6b8e0ad0 --- /dev/null +++ b/benchmarks/bench_cli_startup.py @@ -0,0 +1,72 @@ +"""Benchmark CLI startup latency for codeflash compare --script mode. + +Run from a worktree root. Installs deps via uv sync, then times several +CLI entry points and writes a JSON file mapping command names to median +wall-clock seconds. + +Usage: + codeflash compare main codeflash/optimize \ + --script "python benchmarks/bench_cli_startup.py" \ + --script-output benchmarks/results.json +""" + +from __future__ import annotations + +import json +import os +import subprocess +import time +from pathlib import Path + +WARMUP = 3 +RUNS = 30 +OUTPUT = os.environ.get("BENCH_OUTPUT", "benchmarks/results.json") + +COMMANDS: dict[str, list[str]] = { + "version": ["uv", "run", "codeflash", "--version"], + "help": ["uv", "run", "codeflash", "--help"], + "auth_status": ["uv", "run", "codeflash", "auth", "status"], + "compare_help": ["uv", "run", "codeflash", "compare", "--help"], +} + + +def measure(cmd: list[str], warmup: int = WARMUP, runs: int = RUNS) -> float: + """Return median wall-clock seconds for *cmd* over *runs* iterations.""" + env = {**os.environ, "CODEFLASH_API_KEY": "bench_dummy_key"} + for _ in range(warmup): + subprocess.run(cmd, capture_output=True, check=False, env=env) + + times: list[float] = [] + for _ in range(runs): + t0 = time.perf_counter() + subprocess.run(cmd, capture_output=True, check=False, env=env) + times.append(time.perf_counter() - t0) + + times.sort() + mid = len(times) // 2 + return times[mid] if len(times) % 2 else (times[mid - 1] + times[mid]) / 2 + + +def main() -> None: + # Ensure deps are installed in the worktree + subprocess.run(["uv", "sync"], check=True, capture_output=True) + + results: dict[str, float] = {} + for name, cmd in COMMANDS.items(): + print(f" {name}: ", end="", flush=True) + median = measure(cmd) + results[name] = round(median, 4) + print(f"{median * 1000:.0f} ms") + + # Total = sum of medians (useful for a single summary number) + results["__total__"] = round(sum(results.values()), 4) + + output_path = Path(OUTPUT) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + json.dump(results, f, indent=2) + print(f"\nResults written to {OUTPUT}") + + +if __name__ == "__main__": + main()