Byesian analysis implementation
This commit is contained in:
parent
85f3f9a471
commit
1e0956e60f
8 changed files with 341 additions and 64 deletions
|
|
@ -3,7 +3,7 @@
|
|||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<Languages>
|
||||
<language minSize="47" name="Python" />
|
||||
<language minSize="63" name="Python" />
|
||||
</Languages>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="Eslint" enabled="true" level="SERVER PROBLEM" enabled_by_default="true" editorAttributes="GENERIC_SERVER_ERROR_OR_WARNING">
|
||||
|
|
@ -20,5 +20,10 @@
|
|||
</inspection_tool>
|
||||
<inspection_tool class="PyTestUnpassedFixtureInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="PyTypeCheckerInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||
<option name="processCode" value="true" />
|
||||
<option name="processLiterals" value="true" />
|
||||
<option name="processComments" value="true" />
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
||||
112
cli/codeflash/verification/bayesian_analysis.py
Normal file
112
cli/codeflash/verification/bayesian_analysis.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numba as nb
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy.typing as npt
|
||||
|
||||
|
||||
@nb.njit(parallel=True, fastmath=True, cache=True)
|
||||
def bayesian_bootstrap_runtime_means(
|
||||
runtimes: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
|
||||
) -> npt.NDArray[np.float64]:
|
||||
"""Bayesian bootstrap for the mean of the runtimes list.
|
||||
|
||||
Returns an array of shape (bootstrap_size,) with draws from the posterior of the mean.
|
||||
We draw random weights from Dirichlet(1,1,...,1) using the rngs random generators
|
||||
(one per computation thread), and compute the weighted mean.
|
||||
"""
|
||||
num_timings = len(runtimes)
|
||||
np_runtimes = np.array(runtimes).astype(np.float64)
|
||||
draws = np.empty(bootstrap_size, dtype=np.float64)
|
||||
|
||||
num_threads = len(rngs)
|
||||
thread_remainder = bootstrap_size % num_threads
|
||||
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
|
||||
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
|
||||
)
|
||||
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
|
||||
|
||||
for thread_id in nb.prange(num_threads):
|
||||
thread_draws = draws[thread_idx[thread_id] : thread_idx[thread_id + 1]]
|
||||
for bootstrap_id in range(num_bootstraps_per_thread[thread_id]):
|
||||
# Dirichlet(1,...,1) is the normalized Gamma(1,1) distribution
|
||||
weights = rngs[thread_id].gamma(1.0, 1.0, size=num_timings)
|
||||
thread_draws[bootstrap_id] = np.dot(np_runtimes, weights / np.sum(weights))
|
||||
return draws
|
||||
|
||||
|
||||
def compute_function_runtime_posterior_means(
|
||||
function_runtime_data: list[list[int]], bootstrap_size: int
|
||||
) -> list[npt.NDArray[np.float64]]:
|
||||
"""For each list of runtimes associated to a function input, do a Bayesian bootstrap to get a posterior of the mean.
|
||||
|
||||
Returns an array of shape (bootstrap_size,) for each function input.
|
||||
"""
|
||||
rng = np.random.default_rng()
|
||||
return [
|
||||
bayesian_bootstrap_runtime_means(input_runtime_data, tuple(rng.spawn(nb.get_num_threads())), bootstrap_size)
|
||||
for input_runtime_data in function_runtime_data
|
||||
]
|
||||
|
||||
|
||||
@nb.njit(parallel=True, fastmath=True, cache=True)
|
||||
def bootstrap_combined_function_input_runtime_means(
|
||||
posterior_means: list[npt.NDArray[np.float64]], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
|
||||
) -> npt.NDArray[np.float64]:
|
||||
"""Given a function, we have posterior draws for each input, and get an overall expected time across these inputs.
|
||||
|
||||
We make random draws from each input's distribution using the rngs random generators (one per computation thread),
|
||||
and compute their arithmetic mean.
|
||||
Returns an array of shape (bootstrap_size,).
|
||||
"""
|
||||
num_inputs = len(posterior_means)
|
||||
num_input_means = max([len(posterior_mean) for posterior_mean in posterior_means])
|
||||
draws = np.empty(bootstrap_size, dtype=np.float64)
|
||||
|
||||
num_threads = len(rngs)
|
||||
thread_remainder = bootstrap_size % num_threads
|
||||
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
|
||||
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
|
||||
)
|
||||
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
|
||||
|
||||
for thread_id in nb.prange(num_threads):
|
||||
thread_draws = draws[thread_idx[thread_id] : thread_idx[thread_id + 1]]
|
||||
for bootstrap_id in range(num_bootstraps_per_thread[thread_id]):
|
||||
thread_draws[bootstrap_id] = (
|
||||
sum([input_means[rngs[thread_id].integers(0, num_input_means)] for input_means in posterior_means])
|
||||
/ num_inputs
|
||||
)
|
||||
return draws
|
||||
|
||||
|
||||
def compute_statistics(distribution: npt.NDArray[np.float64], gamma: float = 0.95) -> dict[str, np.float64]:
|
||||
lower_p = (1.0 - gamma) / 2 * 100
|
||||
return {
|
||||
"median": np.median(distribution),
|
||||
"credible_interval_lower_bound": np.percentile(distribution, lower_p),
|
||||
"credible_interval_upper_bound": np.percentile(distribution, 100 - lower_p),
|
||||
}
|
||||
|
||||
|
||||
def analyse_function_runtime_data(
|
||||
function_runtime_data: list[list[int]], bootstrap_size: int
|
||||
) -> tuple[npt.NDArray[np.float64], dict[str, np.float64]]:
|
||||
rng = np.random.default_rng()
|
||||
function_runtime_distribution = bootstrap_combined_function_input_runtime_means(
|
||||
compute_function_runtime_posterior_means(function_runtime_data, bootstrap_size),
|
||||
tuple(rng.spawn(nb.get_num_threads())),
|
||||
bootstrap_size,
|
||||
)
|
||||
return function_runtime_distribution, compute_statistics(function_runtime_distribution)
|
||||
|
||||
|
||||
def compare_function_runtime_distributions(
|
||||
function1_runtime_distribution: npt.NDArray[np.float64], function2_runtime_distribution: npt.NDArray[np.float64]
|
||||
) -> tuple[dict[str, np.float64], np.float64]:
|
||||
speedup_distribution = function1_runtime_distribution / function2_runtime_distribution
|
||||
return compute_statistics(speedup_distribution), np.mean(speedup_distribution > 1)
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
from numba import get_num_threads, njit, prange
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy.typing as npt
|
||||
|
||||
TWO_SIGMA = 2
|
||||
|
||||
|
||||
@njit(parallel=True, fastmath=True, cache=True)
|
||||
def bootstrap_minima(
|
||||
series: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
|
||||
) -> npt.NDArray[np.int64]:
|
||||
num_threads = len(rngs)
|
||||
series_size = len(series)
|
||||
npseries = np.array(series)
|
||||
thread_remainder = bootstrap_size % num_threads
|
||||
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
|
||||
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
|
||||
)
|
||||
minima = np.empty(bootstrap_size)
|
||||
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
|
||||
|
||||
for i in prange(num_threads):
|
||||
thread_minima = minima[thread_idx[i] : thread_idx[i + 1]]
|
||||
for k in range(num_bootstraps_per_thread[i]):
|
||||
thread_minima[k] = min(npseries[rngs[i].integers(0, series_size, size=series_size)])
|
||||
return minima
|
||||
|
||||
|
||||
def bootstrap_noise_floor(series: list[int], bootstrap_size: int) -> np.float64:
|
||||
rng = np.random.default_rng()
|
||||
return np.std(bootstrap_minima(series, tuple(rng.spawn(get_num_threads())), bootstrap_size))
|
||||
|
||||
|
||||
def combined_series_noise_floor(series1: list[int], series2: list[int], bootstrap_size: int) -> np.float64:
|
||||
noise_floor1 = bootstrap_noise_floor(series1, bootstrap_size)
|
||||
noise_floor2 = bootstrap_noise_floor(series2, bootstrap_size)
|
||||
return math.sqrt(noise_floor1 * noise_floor1 + noise_floor2 * noise_floor2)
|
||||
|
||||
|
||||
def series2_faster_95_confidence(
|
||||
series1: list[int], series2: list[int], bootstrap_size: int
|
||||
) -> tuple[float, float] | None:
|
||||
min1 = min(series1)
|
||||
min_diff = min1 - min(series2)
|
||||
if min_diff <= 0:
|
||||
return None
|
||||
combined_noise_floor = combined_series_noise_floor(series1, series2, bootstrap_size)
|
||||
percent_diff = 100 * min_diff / min1
|
||||
uncertainty = TWO_SIGMA * 100 * combined_noise_floor / min1
|
||||
if combined_noise_floor == 0 or min_diff / combined_noise_floor > TWO_SIGMA:
|
||||
return percent_diff, uncertainty
|
||||
return None
|
||||
131
cli/experiments/statistical_analysis.py
Normal file
131
cli/experiments/statistical_analysis.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
from numba import get_num_threads, njit, prange
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy.typing as npt
|
||||
|
||||
TWO_SIGMA = 2
|
||||
|
||||
|
||||
@njit(parallel=True, fastmath=True, cache=True)
|
||||
def bootstrap_minima(
|
||||
series: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
|
||||
) -> npt.NDArray[np.int64]:
|
||||
num_threads = len(rngs)
|
||||
series_size = len(series)
|
||||
npseries = np.array(series)
|
||||
thread_remainder = bootstrap_size % num_threads
|
||||
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
|
||||
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
|
||||
)
|
||||
minima = np.empty(bootstrap_size)
|
||||
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
|
||||
|
||||
for i in prange(num_threads):
|
||||
thread_minima = minima[thread_idx[i] : thread_idx[i + 1]]
|
||||
for k in range(num_bootstraps_per_thread[i]):
|
||||
thread_minima[k] = min(npseries[rngs[i].integers(0, series_size, size=series_size)])
|
||||
return minima
|
||||
|
||||
|
||||
@njit(parallel=True, fastmath=True, cache=True)
|
||||
def bootstrap_minima_ratios(
|
||||
series1: list[int], series2: list[int], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
|
||||
) -> npt.NDArray[np.float64]:
|
||||
num_threads = len(rngs)
|
||||
series1_size = len(series1)
|
||||
series2_size = len(series2)
|
||||
npseries1 = np.array(series1)
|
||||
npseries2 = np.array(series2)
|
||||
thread_remainder = bootstrap_size % num_threads
|
||||
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
|
||||
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
|
||||
)
|
||||
minima_ratios = np.empty(bootstrap_size, dtype=np.float64)
|
||||
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
|
||||
|
||||
for i in prange(num_threads):
|
||||
thread_minima_ratios = minima_ratios[thread_idx[i] : thread_idx[i + 1]]
|
||||
for k in range(num_bootstraps_per_thread[i]):
|
||||
min2 = min(npseries2[rngs[i].integers(0, series2_size, size=series2_size)])
|
||||
if min2 == 0:
|
||||
thread_minima_ratios[k] = np.inf
|
||||
else:
|
||||
thread_minima_ratios[k] = min(npseries1[rngs[i].integers(0, series1_size, size=series1_size)]) / min2
|
||||
return minima_ratios
|
||||
|
||||
|
||||
@njit(parallel=True, fastmath=True, cache=True)
|
||||
def bootstrap_ratios_geomean(
|
||||
ratio_series: list[npt.NDArray[np.float64]], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
|
||||
) -> npt.NDArray[np.float64]:
|
||||
num_series = len(ratio_series)
|
||||
num_threads = len(rngs)
|
||||
thread_remainder = bootstrap_size % num_threads
|
||||
num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
|
||||
[1] * thread_remainder + [0] * (num_threads - thread_remainder)
|
||||
)
|
||||
combined_ratios = np.empty(bootstrap_size, dtype=np.float64)
|
||||
thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
|
||||
|
||||
for i in prange(num_threads):
|
||||
thread_combined_ratios = combined_ratios[thread_idx[i] : thread_idx[i + 1]]
|
||||
for k in range(num_bootstraps_per_thread[i]):
|
||||
sum_log = 0.0
|
||||
for series in ratio_series:
|
||||
ratio = series[rngs[i].integers(0, len(series))]
|
||||
if ratio <= 0:
|
||||
ratio = 1e-12
|
||||
sum_log += np.log(ratio)
|
||||
thread_combined_ratios[k] = np.exp(sum_log / num_series)
|
||||
return combined_ratios
|
||||
|
||||
|
||||
def bootstrap_noise_floor(series: list[int], bootstrap_size: int) -> np.float64:
|
||||
rng = np.random.default_rng()
|
||||
return np.std(bootstrap_minima(series, tuple(rng.spawn(get_num_threads())), bootstrap_size))
|
||||
|
||||
|
||||
def combined_series_noise_floor(series1: list[int], series2: list[int], bootstrap_size: int) -> np.float64:
|
||||
noise_floor1 = bootstrap_noise_floor(series1, bootstrap_size)
|
||||
noise_floor2 = bootstrap_noise_floor(series2, bootstrap_size)
|
||||
return math.sqrt(noise_floor1 * noise_floor1 + noise_floor2 * noise_floor2)
|
||||
|
||||
|
||||
def series2_faster_95_confidence(
|
||||
series1: list[int], series2: list[int], bootstrap_size: int
|
||||
) -> tuple[float, float] | None:
|
||||
min1 = min(series1)
|
||||
min_diff = min1 - min(series2)
|
||||
if min_diff <= 0:
|
||||
return None
|
||||
combined_noise_floor = combined_series_noise_floor(series1, series2, bootstrap_size)
|
||||
percent_diff = 100 * min_diff / min1
|
||||
uncertainty = TWO_SIGMA * 100 * combined_noise_floor / min1
|
||||
if combined_noise_floor == 0 or min_diff / combined_noise_floor > TWO_SIGMA:
|
||||
return percent_diff, uncertainty
|
||||
return None
|
||||
|
||||
|
||||
def analyze_series_speedup(
|
||||
multi_series1: list[list[int]], multi_series2: list[list[int]], bootstrap_size: int
|
||||
) -> tuple[np.float64, np.float64, np.float64, np.float64, np.float64]:
|
||||
rng = np.random.default_rng()
|
||||
combined_ratios = bootstrap_ratios_geomean(
|
||||
[
|
||||
bootstrap_minima_ratios(series1, series2, tuple(rng.spawn(get_num_threads())), bootstrap_size)
|
||||
for series1, series2 in zip(multi_series1, multi_series2)
|
||||
],
|
||||
tuple(rng.spawn(get_num_threads())),
|
||||
bootstrap_size,
|
||||
)
|
||||
lower_bound_95_confidence = np.percentile(combined_ratios, 2.5)
|
||||
upper_bound_95_confidence = np.percentile(combined_ratios, 97.5)
|
||||
mean = np.mean(combined_ratios)
|
||||
probablility_faster = np.mean(combined_ratios > 1.0)
|
||||
return lower_bound_95_confidence, upper_bound_95_confidence, mean, probablility_faster
|
||||
|
|
@ -14,5 +14,5 @@ def test_compare_timing_series() -> None:
|
|||
optimized_timing_series = create_timing_series(10000, 1800, 48)
|
||||
result = series2_faster_95_confidence(original_timing_series, optimized_timing_series, 10000)
|
||||
assert result is not None
|
||||
assert 5 < result[0] < 15
|
||||
assert result[1] < 3
|
||||
assert 4 < result[0] < 16
|
||||
assert result[1] < 4
|
||||
|
|
@ -139,7 +139,7 @@ install_types = true
|
|||
plugins = ["pydantic.mypy"]
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = ["jedi", "jedi.api.classes", "inquirer", "inquirer.themes"]
|
||||
module = ["jedi", "jedi.api.classes", "inquirer", "inquirer.themes", "numba"]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pydantic-mypy]
|
||||
|
|
|
|||
79
cli/tests/test_bayesian_analysis.py
Normal file
79
cli/tests/test_bayesian_analysis.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import numpy as np
|
||||
from codeflash.verification.bayesian_analysis import (
|
||||
analyse_function_runtime_data,
|
||||
compare_function_runtime_distributions,
|
||||
)
|
||||
|
||||
|
||||
def test_bayesian_analysis() -> None:
|
||||
functions = ["orig", "opt1", "opt2"] # original + 2 optimization candidates
|
||||
inputs = ["inpA", "inpB", "inpC"] # 3 benchmarks
|
||||
n = 5000 # repeated measurements per (function, input)
|
||||
|
||||
# Let's simulate some data in a dictionary:
|
||||
# data[(fn, inp)] = array of shape (N,) with raw runtimes
|
||||
rng = np.random.default_rng(42)
|
||||
data = {}
|
||||
for fn in functions:
|
||||
for inp in inputs:
|
||||
# We'll do random times with some big outliers for demonstration
|
||||
base_time = 1.0
|
||||
if fn == "orig":
|
||||
factor = 1.0
|
||||
elif fn == "opt1":
|
||||
factor = 0.85 # 15% faster
|
||||
else: # opt2
|
||||
factor = 0.95 # 5% faster
|
||||
# We'll also vary by input
|
||||
if inp == "inpA":
|
||||
factor_inp = 1.0
|
||||
elif inp == "inpB":
|
||||
factor_inp = 1.2
|
||||
else: # inpC
|
||||
factor_inp = 0.9
|
||||
|
||||
# final mean time = base_time * factor * factor_inp
|
||||
mu = base_time * factor * factor_inp
|
||||
# add noise, outliers
|
||||
times = mu + rng.normal(0, 0.2 * mu, size=n)
|
||||
times = np.clip(times, 0, None) # no negative times
|
||||
# add some random big spikes
|
||||
if rng.random() < 0.1:
|
||||
times[rng.integers(0, n)] *= 5.0
|
||||
data[(fn, inp)] = times
|
||||
|
||||
orig = [list(data[("orig", i)]) for i in inputs]
|
||||
opt1 = [list(data[("opt1", i)]) for i in inputs]
|
||||
opt2 = [list(data[("opt2", i)]) for i in inputs]
|
||||
|
||||
original_distribution, original_stats = analyse_function_runtime_data(orig, 10000)
|
||||
optimized_distribution1, optimized_stats1 = analyse_function_runtime_data(opt1, 10000)
|
||||
optimized_distribution2, optimized_stats2 = analyse_function_runtime_data(opt2, 10000)
|
||||
|
||||
speedup_stats1, faster_prob1 = compare_function_runtime_distributions(
|
||||
original_distribution, optimized_distribution1
|
||||
)
|
||||
assert (
|
||||
1.162
|
||||
< speedup_stats1["credible_interval_lower_bound"]
|
||||
< 1.165
|
||||
< speedup_stats1["median"]
|
||||
< 1.171
|
||||
< speedup_stats1["credible_interval_upper_bound"]
|
||||
< 1.174
|
||||
)
|
||||
assert faster_prob1 == 1.0
|
||||
|
||||
speedup_stats2, faster_prob2 = compare_function_runtime_distributions(
|
||||
original_distribution, optimized_distribution2
|
||||
)
|
||||
assert (
|
||||
1.046
|
||||
< speedup_stats2["credible_interval_lower_bound"]
|
||||
< 1.051
|
||||
< speedup_stats2["median"]
|
||||
< 1.054
|
||||
< speedup_stats2["credible_interval_upper_bound"]
|
||||
< 1.057
|
||||
)
|
||||
assert faster_prob1 == 1.0
|
||||
11
mypy.ini
11
mypy.ini
|
|
@ -16,4 +16,13 @@ install_types = True
|
|||
init_forbid_extra = True
|
||||
init_typed = True
|
||||
warn_required_dynamic_aliases = True
|
||||
warn_untyped_fields = True
|
||||
warn_untyped_fields = True
|
||||
|
||||
[mypy-jedi.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-inquirer.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-numba.*]
|
||||
ignore_missing_imports = True
|
||||
Loading…
Reference in a new issue