codeflash-internal/cli/tests/test_bayesian_analysis.py
2025-01-17 17:44:24 -08:00

79 lines
2.7 KiB
Python

import numpy as np
from codeflash.verification.bayesian_analysis import (
analyse_function_runtime_data,
compare_function_runtime_distributions,
)
def test_bayesian_analysis() -> None:
functions = ["orig", "opt1", "opt2"] # original + 2 optimization candidates
inputs = ["inpA", "inpB", "inpC"] # 3 benchmarks
n = 5000 # repeated measurements per (function, input)
# Let's simulate some data in a dictionary:
# data[(fn, inp)] = array of shape (N,) with raw runtimes
rng = np.random.default_rng(42)
data = {}
for fn in functions:
for inp in inputs:
# We'll do random times with some big outliers for demonstration
base_time = 1.0
if fn == "orig":
factor = 1.0
elif fn == "opt1":
factor = 0.85 # 15% faster
else: # opt2
factor = 0.95 # 5% faster
# We'll also vary by input
if inp == "inpA":
factor_inp = 1.0
elif inp == "inpB":
factor_inp = 1.2
else: # inpC
factor_inp = 0.9
# final mean time = base_time * factor * factor_inp
mu = base_time * factor * factor_inp
# add noise, outliers
times = mu + rng.normal(0, 0.2 * mu, size=n)
times = np.clip(times, 0, None) # no negative times
# add some random big spikes
if rng.random() < 0.1:
times[rng.integers(0, n)] *= 5.0
data[(fn, inp)] = times
orig = [list(data[("orig", i)]) for i in inputs]
opt1 = [list(data[("opt1", i)]) for i in inputs]
opt2 = [list(data[("opt2", i)]) for i in inputs]
original_distribution, original_stats = analyse_function_runtime_data(orig, 10000)
optimized_distribution1, optimized_stats1 = analyse_function_runtime_data(opt1, 10000)
optimized_distribution2, optimized_stats2 = analyse_function_runtime_data(opt2, 10000)
speedup_stats1, faster_prob1 = compare_function_runtime_distributions(
original_distribution, optimized_distribution1
)
assert (
1.162
< speedup_stats1["credible_interval_lower_bound"]
< 1.165
< speedup_stats1["median"]
< 1.171
< speedup_stats1["credible_interval_upper_bound"]
< 1.174
)
assert faster_prob1 == 1.0
speedup_stats2, faster_prob2 = compare_function_runtime_distributions(
original_distribution, optimized_distribution2
)
assert (
1.046
< speedup_stats2["credible_interval_lower_bound"]
< 1.051
< speedup_stats2["median"]
< 1.054
< speedup_stats2["credible_interval_upper_bound"]
< 1.057
)
assert faster_prob1 == 1.0