Byesian analysis implementation

This commit is contained in:
RD 2025-01-17 17:44:24 -08:00
parent 85f3f9a471
commit 1e0956e60f
8 changed files with 341 additions and 64 deletions

View file

@ -3,7 +3,7 @@
<option name="myName" value="Project Default" />
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<Languages>
<language minSize="47" name="Python" />
<language minSize="63" name="Python" />
</Languages>
</inspection_tool>
<inspection_tool class="Eslint" enabled="true" level="SERVER PROBLEM" enabled_by_default="true" editorAttributes="GENERIC_SERVER_ERROR_OR_WARNING">
@ -20,5 +20,10 @@
</inspection_tool>
<inspection_tool class="PyTestUnpassedFixtureInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="PyTypeCheckerInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

View file

@ -0,0 +1,112 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import numba as nb
import numpy as np
if TYPE_CHECKING:
import numpy.typing as npt
@nb.njit(parallel=True, fastmath=True, cache=True)
def bayesian_bootstrap_runtime_means(
runtimes: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
) -> npt.NDArray[np.float64]:
"""Bayesian bootstrap for the mean of the runtimes list.
Returns an array of shape (bootstrap_size,) with draws from the posterior of the mean.
We draw random weights from Dirichlet(1,1,...,1) using the rngs random generators
(one per computation thread), and compute the weighted mean.
"""
num_timings = len(runtimes)
np_runtimes = np.array(runtimes).astype(np.float64)
draws = np.empty(bootstrap_size, dtype=np.float64)
num_threads = len(rngs)
thread_remainder = bootstrap_size % num_threads
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
)
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
for thread_id in nb.prange(num_threads):
thread_draws = draws[thread_idx[thread_id] : thread_idx[thread_id + 1]]
for bootstrap_id in range(num_bootstraps_per_thread[thread_id]):
# Dirichlet(1,...,1) is the normalized Gamma(1,1) distribution
weights = rngs[thread_id].gamma(1.0, 1.0, size=num_timings)
thread_draws[bootstrap_id] = np.dot(np_runtimes, weights / np.sum(weights))
return draws
def compute_function_runtime_posterior_means(
function_runtime_data: list[list[int]], bootstrap_size: int
) -> list[npt.NDArray[np.float64]]:
"""For each list of runtimes associated to a function input, do a Bayesian bootstrap to get a posterior of the mean.
Returns an array of shape (bootstrap_size,) for each function input.
"""
rng = np.random.default_rng()
return [
bayesian_bootstrap_runtime_means(input_runtime_data, tuple(rng.spawn(nb.get_num_threads())), bootstrap_size)
for input_runtime_data in function_runtime_data
]
@nb.njit(parallel=True, fastmath=True, cache=True)
def bootstrap_combined_function_input_runtime_means(
posterior_means: list[npt.NDArray[np.float64]], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
) -> npt.NDArray[np.float64]:
"""Given a function, we have posterior draws for each input, and get an overall expected time across these inputs.
We make random draws from each input's distribution using the rngs random generators (one per computation thread),
and compute their arithmetic mean.
Returns an array of shape (bootstrap_size,).
"""
num_inputs = len(posterior_means)
num_input_means = max([len(posterior_mean) for posterior_mean in posterior_means])
draws = np.empty(bootstrap_size, dtype=np.float64)
num_threads = len(rngs)
thread_remainder = bootstrap_size % num_threads
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
)
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
for thread_id in nb.prange(num_threads):
thread_draws = draws[thread_idx[thread_id] : thread_idx[thread_id + 1]]
for bootstrap_id in range(num_bootstraps_per_thread[thread_id]):
thread_draws[bootstrap_id] = (
sum([input_means[rngs[thread_id].integers(0, num_input_means)] for input_means in posterior_means])
/ num_inputs
)
return draws
def compute_statistics(distribution: npt.NDArray[np.float64], gamma: float = 0.95) -> dict[str, np.float64]:
lower_p = (1.0 - gamma) / 2 * 100
return {
"median": np.median(distribution),
"credible_interval_lower_bound": np.percentile(distribution, lower_p),
"credible_interval_upper_bound": np.percentile(distribution, 100 - lower_p),
}
def analyse_function_runtime_data(
function_runtime_data: list[list[int]], bootstrap_size: int
) -> tuple[npt.NDArray[np.float64], dict[str, np.float64]]:
rng = np.random.default_rng()
function_runtime_distribution = bootstrap_combined_function_input_runtime_means(
compute_function_runtime_posterior_means(function_runtime_data, bootstrap_size),
tuple(rng.spawn(nb.get_num_threads())),
bootstrap_size,
)
return function_runtime_distribution, compute_statistics(function_runtime_distribution)
def compare_function_runtime_distributions(
function1_runtime_distribution: npt.NDArray[np.float64], function2_runtime_distribution: npt.NDArray[np.float64]
) -> tuple[dict[str, np.float64], np.float64]:
speedup_distribution = function1_runtime_distribution / function2_runtime_distribution
return compute_statistics(speedup_distribution), np.mean(speedup_distribution > 1)

View file

@ -1,59 +0,0 @@
from __future__ import annotations
import math
from typing import TYPE_CHECKING
import numpy as np
from numba import get_num_threads, njit, prange
if TYPE_CHECKING:
import numpy.typing as npt
TWO_SIGMA = 2
@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_minima(
series: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
) -> npt.NDArray[np.int64]:
num_threads = len(rngs)
series_size = len(series)
npseries = np.array(series)
thread_remainder = bootstrap_size % num_threads
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
)
minima = np.empty(bootstrap_size)
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
for i in prange(num_threads):
thread_minima = minima[thread_idx[i] : thread_idx[i + 1]]
for k in range(num_bootstraps_per_thread[i]):
thread_minima[k] = min(npseries[rngs[i].integers(0, series_size, size=series_size)])
return minima
def bootstrap_noise_floor(series: list[int], bootstrap_size: int) -> np.float64:
rng = np.random.default_rng()
return np.std(bootstrap_minima(series, tuple(rng.spawn(get_num_threads())), bootstrap_size))
def combined_series_noise_floor(series1: list[int], series2: list[int], bootstrap_size: int) -> np.float64:
noise_floor1 = bootstrap_noise_floor(series1, bootstrap_size)
noise_floor2 = bootstrap_noise_floor(series2, bootstrap_size)
return math.sqrt(noise_floor1 * noise_floor1 + noise_floor2 * noise_floor2)
def series2_faster_95_confidence(
series1: list[int], series2: list[int], bootstrap_size: int
) -> tuple[float, float] | None:
min1 = min(series1)
min_diff = min1 - min(series2)
if min_diff <= 0:
return None
combined_noise_floor = combined_series_noise_floor(series1, series2, bootstrap_size)
percent_diff = 100 * min_diff / min1
uncertainty = TWO_SIGMA * 100 * combined_noise_floor / min1
if combined_noise_floor == 0 or min_diff / combined_noise_floor > TWO_SIGMA:
return percent_diff, uncertainty
return None

View file

@ -0,0 +1,131 @@
from __future__ import annotations
import math
from typing import TYPE_CHECKING
import numpy as np
from numba import get_num_threads, njit, prange
if TYPE_CHECKING:
import numpy.typing as npt
TWO_SIGMA = 2
@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_minima(
series: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
) -> npt.NDArray[np.int64]:
num_threads = len(rngs)
series_size = len(series)
npseries = np.array(series)
thread_remainder = bootstrap_size % num_threads
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
)
minima = np.empty(bootstrap_size)
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
for i in prange(num_threads):
thread_minima = minima[thread_idx[i] : thread_idx[i + 1]]
for k in range(num_bootstraps_per_thread[i]):
thread_minima[k] = min(npseries[rngs[i].integers(0, series_size, size=series_size)])
return minima
@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_minima_ratios(
series1: list[int], series2: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
) -> npt.NDArray[np.float64]:
num_threads = len(rngs)
series1_size = len(series1)
series2_size = len(series2)
npseries1 = np.array(series1)
npseries2 = np.array(series2)
thread_remainder = bootstrap_size % num_threads
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
)
minima_ratios = np.empty(bootstrap_size, dtype=np.float64)
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
for i in prange(num_threads):
thread_minima_ratios = minima_ratios[thread_idx[i] : thread_idx[i + 1]]
for k in range(num_bootstraps_per_thread[i]):
min2 = min(npseries2[rngs[i].integers(0, series2_size, size=series2_size)])
if min2 == 0:
thread_minima_ratios[k] = np.inf
else:
thread_minima_ratios[k] = min(npseries1[rngs[i].integers(0, series1_size, size=series1_size)]) / min2
return minima_ratios
@njit(parallel=True, fastmath=True, cache=True)
def bootstrap_ratios_geomean(
ratio_series: list[npt.NDArray[np.float64]], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
) -> npt.NDArray[np.float64]:
num_series = len(ratio_series)
num_threads = len(rngs)
thread_remainder = bootstrap_size % num_threads
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
)
combined_ratios = np.empty(bootstrap_size, dtype=np.float64)
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
for i in prange(num_threads):
thread_combined_ratios = combined_ratios[thread_idx[i] : thread_idx[i + 1]]
for k in range(num_bootstraps_per_thread[i]):
sum_log = 0.0
for series in ratio_series:
ratio = series[rngs[i].integers(0, len(series))]
if ratio <= 0:
ratio = 1e-12
sum_log += np.log(ratio)
thread_combined_ratios[k] = np.exp(sum_log / num_series)
return combined_ratios
def bootstrap_noise_floor(series: list[int], bootstrap_size: int) -> np.float64:
rng = np.random.default_rng()
return np.std(bootstrap_minima(series, tuple(rng.spawn(get_num_threads())), bootstrap_size))
def combined_series_noise_floor(series1: list[int], series2: list[int], bootstrap_size: int) -> np.float64:
noise_floor1 = bootstrap_noise_floor(series1, bootstrap_size)
noise_floor2 = bootstrap_noise_floor(series2, bootstrap_size)
return math.sqrt(noise_floor1 * noise_floor1 + noise_floor2 * noise_floor2)
def series2_faster_95_confidence(
series1: list[int], series2: list[int], bootstrap_size: int
) -> tuple[float, float] | None:
min1 = min(series1)
min_diff = min1 - min(series2)
if min_diff <= 0:
return None
combined_noise_floor = combined_series_noise_floor(series1, series2, bootstrap_size)
percent_diff = 100 * min_diff / min1
uncertainty = TWO_SIGMA * 100 * combined_noise_floor / min1
if combined_noise_floor == 0 or min_diff / combined_noise_floor > TWO_SIGMA:
return percent_diff, uncertainty
return None
def analyze_series_speedup(
multi_series1: list[list[int]], multi_series2: list[list[int]], bootstrap_size: int
) -> tuple[np.float64, np.float64, np.float64, np.float64, np.float64]:
rng = np.random.default_rng()
combined_ratios = bootstrap_ratios_geomean(
[
bootstrap_minima_ratios(series1, series2, tuple(rng.spawn(get_num_threads())), bootstrap_size)
for series1, series2 in zip(multi_series1, multi_series2)
],
tuple(rng.spawn(get_num_threads())),
bootstrap_size,
)
lower_bound_95_confidence = np.percentile(combined_ratios, 2.5)
upper_bound_95_confidence = np.percentile(combined_ratios, 97.5)
mean = np.mean(combined_ratios)
probablility_faster = np.mean(combined_ratios > 1.0)
return lower_bound_95_confidence, upper_bound_95_confidence, mean, probablility_faster

View file

@ -14,5 +14,5 @@ def test_compare_timing_series() -> None:
optimized_timing_series = create_timing_series(10000, 1800, 48)
result = series2_faster_95_confidence(original_timing_series, optimized_timing_series, 10000)
assert result is not None
assert 5 < result[0] < 15
assert result[1] < 3
assert 4 < result[0] < 16
assert result[1] < 4

View file

@ -139,7 +139,7 @@ install_types = true
plugins = ["pydantic.mypy"]
[[tool.mypy.overrides]]
module = ["jedi", "jedi.api.classes", "inquirer", "inquirer.themes"]
module = ["jedi", "jedi.api.classes", "inquirer", "inquirer.themes", "numba"]
ignore_missing_imports = true
[tool.pydantic-mypy]

View file

@ -0,0 +1,79 @@
import numpy as np
from codeflash.verification.bayesian_analysis import (
analyse_function_runtime_data,
compare_function_runtime_distributions,
)
def test_bayesian_analysis() -> None:
functions = ["orig", "opt1", "opt2"] # original + 2 optimization candidates
inputs = ["inpA", "inpB", "inpC"] # 3 benchmarks
n = 5000 # repeated measurements per (function, input)
# Let's simulate some data in a dictionary:
# data[(fn, inp)] = array of shape (N,) with raw runtimes
rng = np.random.default_rng(42)
data = {}
for fn in functions:
for inp in inputs:
# We'll do random times with some big outliers for demonstration
base_time = 1.0
if fn == "orig":
factor = 1.0
elif fn == "opt1":
factor = 0.85 # 15% faster
else: # opt2
factor = 0.95 # 5% faster
# We'll also vary by input
if inp == "inpA":
factor_inp = 1.0
elif inp == "inpB":
factor_inp = 1.2
else: # inpC
factor_inp = 0.9
# final mean time = base_time * factor * factor_inp
mu = base_time * factor * factor_inp
# add noise, outliers
times = mu + rng.normal(0, 0.2 * mu, size=n)
times = np.clip(times, 0, None) # no negative times
# add some random big spikes
if rng.random() < 0.1:
times[rng.integers(0, n)] *= 5.0
data[(fn, inp)] = times
orig = [list(data[("orig", i)]) for i in inputs]
opt1 = [list(data[("opt1", i)]) for i in inputs]
opt2 = [list(data[("opt2", i)]) for i in inputs]
original_distribution, original_stats = analyse_function_runtime_data(orig, 10000)
optimized_distribution1, optimized_stats1 = analyse_function_runtime_data(opt1, 10000)
optimized_distribution2, optimized_stats2 = analyse_function_runtime_data(opt2, 10000)
speedup_stats1, faster_prob1 = compare_function_runtime_distributions(
original_distribution, optimized_distribution1
)
assert (
1.162
< speedup_stats1["credible_interval_lower_bound"]
< 1.165
< speedup_stats1["median"]
< 1.171
< speedup_stats1["credible_interval_upper_bound"]
< 1.174
)
assert faster_prob1 == 1.0
speedup_stats2, faster_prob2 = compare_function_runtime_distributions(
original_distribution, optimized_distribution2
)
assert (
1.046
< speedup_stats2["credible_interval_lower_bound"]
< 1.051
< speedup_stats2["median"]
< 1.054
< speedup_stats2["credible_interval_upper_bound"]
< 1.057
)
assert faster_prob1 == 1.0

View file

@ -16,4 +16,13 @@ install_types = True
init_forbid_extra = True
init_typed = True
warn_required_dynamic_aliases = True
warn_untyped_fields = True
warn_untyped_fields = True
[mypy-jedi.*]
ignore_missing_imports = True
[mypy-inquirer.*]
ignore_missing_imports = True
[mypy-numba.*]
ignore_missing_imports = True