mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
Bayesian analysis implementation
This commit is contained in:
parent
fe91ece425
commit
593bdc8e88
6 changed files with 102 additions and 32 deletions
|
|
@ -9,9 +9,10 @@
|
|||
<inspection_tool class="Eslint" enabled="true" level="SERVER PROBLEM" enabled_by_default="true" editorAttributes="GENERIC_SERVER_ERROR_OR_WARNING">
|
||||
<option name="useSeverityFromConfigFile" value="false" />
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyDataclassInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="PyMissingTypeHintsInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
|
||||
<inspection_tool class="PyNestedDecoratorsInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<inspection_tool class="PyPep8Inspection" enabled="false" level="WEAK WARNING" enabled_by_default="false">
|
||||
<option name="ignoredErrors">
|
||||
<list>
|
||||
<option value="E203" />
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
</ENTRIES>
|
||||
</EXTENSION>
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/cli/codeflash/main.py" />
|
||||
<option name="PARAMETERS" value="--file code_to_optimize/bubble_sort.py --verbose --module-root $PROJECT_DIR$/cli --function sorter --test-framework pytest --tests-root code_to_optimize/tests/pytest" />
|
||||
<option name="PARAMETERS" value="--file code_to_optimize/bubble_sort.py --module-root $PROJECT_DIR$/cli --function sorter --test-framework pytest --tests-root code_to_optimize/tests/pytest" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="true" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ from codeflash.result.create_pr import check_create_pr, existing_tests_source_fo
|
|||
from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
|
||||
from codeflash.result.explanation import Explanation
|
||||
from codeflash.telemetry.posthog_cf import ph
|
||||
from codeflash.verification.bayesian_analysis import compare_function_runtime_distributions
|
||||
from codeflash.verification.concolic_testing import generate_concolic_tests
|
||||
from codeflash.verification.equivalence import compare_test_results
|
||||
from codeflash.verification.parse_test_output import parse_test_results
|
||||
|
|
@ -77,6 +78,9 @@ from codeflash.verification.verifier import generate_tests
|
|||
if TYPE_CHECKING:
|
||||
from argparse import Namespace
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
from codeflash.either import Result
|
||||
from codeflash.models.models import CoverageData, FunctionSource, OptimizedCandidate
|
||||
|
||||
|
|
@ -352,7 +356,12 @@ class Optimizer:
|
|||
cleanup_paths(paths_to_cleanup)
|
||||
return Failure(baseline_result.failure())
|
||||
|
||||
original_code_baseline, test_functions_to_remove = baseline_result.unwrap()
|
||||
(
|
||||
original_code_baseline,
|
||||
original_code_runtime_distribution,
|
||||
original_code_runtime_statistics,
|
||||
test_functions_to_remove,
|
||||
) = baseline_result.unwrap()
|
||||
if isinstance(original_code_baseline, OriginalCodeBaseline) and not coverage_critic(
|
||||
original_code_baseline.coverage_results, self.args.test_framework
|
||||
):
|
||||
|
|
@ -371,6 +380,7 @@ class Optimizer:
|
|||
function_to_optimize=function_to_optimize,
|
||||
original_code=validated_original_code[function_to_optimize.file_path].source_code,
|
||||
original_code_baseline=original_code_baseline,
|
||||
original_code_runtime_distribution=original_code_runtime_distribution,
|
||||
original_helper_code=original_helper_code,
|
||||
function_trace_id=function_trace_id[:-4] + f"EXP{u}" if should_run_experiment else function_trace_id,
|
||||
)
|
||||
|
|
@ -480,11 +490,13 @@ class Optimizer:
|
|||
function_to_optimize: FunctionToOptimize,
|
||||
original_code: str,
|
||||
original_code_baseline: OriginalCodeBaseline,
|
||||
original_code_runtime_distribution: npt.NDArray[np.float64],
|
||||
original_helper_code: dict[Path, str],
|
||||
function_trace_id: str,
|
||||
) -> BestOptimization | None:
|
||||
best_optimization: BestOptimization | None = None
|
||||
best_runtime_until_now = original_code_baseline.runtime
|
||||
best_speedup_ratio_until_now = 1.0
|
||||
|
||||
speedup_ratios: dict[str, float | None] = {}
|
||||
optimized_runtimes: dict[str, float | None] = {}
|
||||
|
|
@ -528,7 +540,9 @@ class Optimizer:
|
|||
is_correct[candidate.optimization_id] = False
|
||||
speedup_ratios[candidate.optimization_id] = None
|
||||
else:
|
||||
candidate_result: OptimizedCandidateResult = run_results.unwrap()
|
||||
candidate_result, candidate_runtime_distribution, candidate_runtime_statistics = (
|
||||
run_results.unwrap()
|
||||
)
|
||||
best_test_runtime = candidate_result.best_test_runtime
|
||||
optimized_runtimes[candidate.optimization_id] = best_test_runtime
|
||||
is_correct[candidate.optimization_id] = True
|
||||
|
|
@ -537,6 +551,10 @@ class Optimizer:
|
|||
)
|
||||
speedup_ratios[candidate.optimization_id] = perf_gain
|
||||
|
||||
speedup_stats = compare_function_runtime_distributions(
|
||||
original_code_runtime_distribution, candidate_runtime_distribution
|
||||
)
|
||||
|
||||
tree = Tree(f"Candidate #{candidate_index} - Runtime Information")
|
||||
if speedup_critic(
|
||||
candidate_result, original_code_baseline.runtime, best_runtime_until_now
|
||||
|
|
@ -568,6 +586,24 @@ class Optimizer:
|
|||
console.print(tree)
|
||||
console.rule()
|
||||
|
||||
logger.info(
|
||||
f"Overall candidate time (95% Credible Interval) = ["
|
||||
f"{humanize_runtime(candidate_runtime_statistics['credible_interval_lower_bound'])}, "
|
||||
f"{humanize_runtime(candidate_runtime_statistics['credible_interval_upper_bound'])}, "
|
||||
f"median={humanize_runtime(candidate_runtime_statistics['median'])}"
|
||||
f"\nSpeedup of candidate vs original:"
|
||||
f"\n95% CI = [{speedup_stats['credible_interval_lower_bound']:.3f}, "
|
||||
f"{speedup_stats['credible_interval_upper_bound']:.3f}]"
|
||||
f"\nmedian = {speedup_stats['median']:.3f}"
|
||||
)
|
||||
console.rule()
|
||||
if speedup_stats["credible_interval_lower_bound"] > 1.0:
|
||||
logger.info("The candidate is faster than the original code with a 95% probability.")
|
||||
if speedup_stats["median"] > best_speedup_ratio_until_now:
|
||||
best_speedup_ratio_until_now = speedup_stats["median"]
|
||||
logger.info("This candidate is the best candidate so far.")
|
||||
else:
|
||||
logger.info("This candidate is not faster than the current fastest candidate.")
|
||||
self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
|
||||
except KeyboardInterrupt as e:
|
||||
self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
|
||||
|
|
@ -941,7 +977,7 @@ class Optimizer:
|
|||
|
||||
def establish_original_code_baseline(
|
||||
self, function_name: str, function_file_path: Path, code_context: CodeOptimizationContext
|
||||
) -> Result[tuple[OriginalCodeBaseline, list[str]], str]:
|
||||
) -> Result[tuple[OriginalCodeBaseline, npt.NDArray[np.float64], dict[str, np.float64], list[str]], str]:
|
||||
# For the original function - run the tests and get the runtime, plus coverage
|
||||
with progress_bar(f"Establishing original code baseline for {function_name}"):
|
||||
assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]
|
||||
|
|
@ -1011,7 +1047,9 @@ class Optimizer:
|
|||
console.rule()
|
||||
|
||||
total_timing = benchmarking_results.total_passed_runtime() # caution: doesn't handle the loop index
|
||||
|
||||
runtime_distribution, runtime_statistics = benchmarking_results.bayesian_nonparametric_bootstrap_analysis(
|
||||
100_000
|
||||
)
|
||||
functions_to_remove = [
|
||||
result.id.test_function_name
|
||||
for result in behavioral_results
|
||||
|
|
@ -1042,6 +1080,14 @@ class Optimizer:
|
|||
)
|
||||
console.rule()
|
||||
logger.debug(f"Total original code runtime (ns): {total_timing}")
|
||||
console.rule()
|
||||
logger.info(
|
||||
f"Overall code runtime (95% Credible Interval) = ["
|
||||
f"{humanize_runtime(round(runtime_statistics['credible_interval_lower_bound']))}, "
|
||||
f"{humanize_runtime(round(runtime_statistics['credible_interval_upper_bound']))}], median: "
|
||||
f"{humanize_runtime(round(runtime_statistics['median']))}"
|
||||
)
|
||||
|
||||
return Success(
|
||||
(
|
||||
OriginalCodeBaseline(
|
||||
|
|
@ -1050,13 +1096,15 @@ class Optimizer:
|
|||
runtime=total_timing,
|
||||
coverage_results=coverage_results,
|
||||
),
|
||||
runtime_distribution,
|
||||
runtime_statistics,
|
||||
functions_to_remove,
|
||||
)
|
||||
)
|
||||
|
||||
def run_optimized_candidate(
|
||||
self, *, optimization_candidate_index: int, baseline_results: OriginalCodeBaseline
|
||||
) -> Result[OptimizedCandidateResult, str]:
|
||||
) -> Result[tuple[OptimizedCandidateResult, npt.NDArray[np.float64], dict[str, np.float64]], str]:
|
||||
assert (test_framework := self.args.test_framework) in ["pytest", "unittest"]
|
||||
|
||||
with progress_bar("Testing optimization candidate"):
|
||||
|
|
@ -1138,9 +1186,20 @@ class Optimizer:
|
|||
if (total_candidate_timing := candidate_benchmarking_results.total_passed_runtime()) == 0:
|
||||
logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
|
||||
console.rule()
|
||||
runtime_distribution, runtime_statistics = (
|
||||
candidate_benchmarking_results.bayesian_nonparametric_bootstrap_analysis(100_000)
|
||||
)
|
||||
|
||||
logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
|
||||
console.rule()
|
||||
logger.debug(
|
||||
f"Overall code runtime (95% Credible Interval) = ["
|
||||
f"{humanize_runtime(round(runtime_statistics['credible_interval_lower_bound']))}, "
|
||||
f"{humanize_runtime(round(runtime_statistics['credible_interval_upper_bound']))}], median: "
|
||||
f"{humanize_runtime(round(runtime_statistics['median']))}"
|
||||
)
|
||||
return Success(
|
||||
(
|
||||
OptimizedCandidateResult(
|
||||
max_loop_count=loop_count,
|
||||
best_test_runtime=total_candidate_timing,
|
||||
|
|
@ -1148,6 +1207,9 @@ class Optimizer:
|
|||
benchmarking_test_results=candidate_benchmarking_results,
|
||||
optimization_candidate_index=optimization_candidate_index,
|
||||
total_candidate_timing=total_candidate_timing,
|
||||
),
|
||||
runtime_distribution,
|
||||
runtime_statistics,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ def compute_statistics(distribution: npt.NDArray[np.float64], gamma: float = 0.9
|
|||
}
|
||||
|
||||
|
||||
def analyse_function_runtime_data(
|
||||
def analyze_function_runtime_data(
|
||||
function_runtime_data: list[list[int]], bootstrap_size: int
|
||||
) -> tuple[npt.NDArray[np.float64], dict[str, np.float64]]:
|
||||
rng = np.random.default_rng()
|
||||
|
|
@ -107,6 +107,6 @@ def analyse_function_runtime_data(
|
|||
|
||||
def compare_function_runtime_distributions(
|
||||
function1_runtime_distribution: npt.NDArray[np.float64], function2_runtime_distribution: npt.NDArray[np.float64]
|
||||
) -> tuple[dict[str, np.float64], np.float64]:
|
||||
) -> dict[str, np.float64]:
|
||||
speedup_distribution = function1_runtime_distribution / function2_runtime_distribution
|
||||
return compute_statistics(speedup_distribution), np.mean(speedup_distribution > 1)
|
||||
return compute_statistics(speedup_distribution)
|
||||
|
|
|
|||
|
|
@ -1,9 +1,16 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import Iterator
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
import sys
|
||||
from enum import Enum
|
||||
from typing import Optional, cast
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -11,6 +18,7 @@ from pydantic.dataclasses import dataclass
|
|||
from rich.tree import Tree
|
||||
|
||||
from codeflash.cli_cmds.console import DEBUG_MODE, logger
|
||||
from codeflash.verification.bayesian_analysis import analyze_function_runtime_data
|
||||
from codeflash.verification.comparator import comparator
|
||||
|
||||
|
||||
|
|
@ -179,6 +187,11 @@ class TestResults(BaseModel):
|
|||
]
|
||||
)
|
||||
|
||||
def bayesian_nonparametric_bootstrap_analysis(
|
||||
self, bootstrap_size: int
|
||||
) -> tuple[npt.NDArray[np.float64], dict[str, np.float64]]:
|
||||
return analyze_function_runtime_data(list(self.usable_runtime_data_by_test_case().values()), bootstrap_size)
|
||||
|
||||
def __iter__(self) -> Iterator[FunctionTestInvocation]:
|
||||
return iter(self.test_results)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import numpy as np
|
||||
from codeflash.verification.bayesian_analysis import (
|
||||
analyse_function_runtime_data,
|
||||
analyze_function_runtime_data,
|
||||
compare_function_runtime_distributions,
|
||||
)
|
||||
|
||||
|
|
@ -46,13 +46,11 @@ def test_bayesian_analysis() -> None:
|
|||
opt1 = [list(data[("opt1", i)]) for i in inputs]
|
||||
opt2 = [list(data[("opt2", i)]) for i in inputs]
|
||||
|
||||
original_distribution, original_stats = analyse_function_runtime_data(orig, 10000)
|
||||
optimized_distribution1, optimized_stats1 = analyse_function_runtime_data(opt1, 10000)
|
||||
optimized_distribution2, optimized_stats2 = analyse_function_runtime_data(opt2, 10000)
|
||||
original_distribution, original_stats = analyze_function_runtime_data(orig, 10000)
|
||||
optimized_distribution1, optimized_stats1 = analyze_function_runtime_data(opt1, 10000)
|
||||
optimized_distribution2, optimized_stats2 = analyze_function_runtime_data(opt2, 10000)
|
||||
|
||||
speedup_stats1, faster_prob1 = compare_function_runtime_distributions(
|
||||
original_distribution, optimized_distribution1
|
||||
)
|
||||
speedup_stats1 = compare_function_runtime_distributions(original_distribution, optimized_distribution1)
|
||||
assert (
|
||||
1.162
|
||||
< speedup_stats1["credible_interval_lower_bound"]
|
||||
|
|
@ -62,11 +60,8 @@ def test_bayesian_analysis() -> None:
|
|||
< speedup_stats1["credible_interval_upper_bound"]
|
||||
< 1.174
|
||||
)
|
||||
assert faster_prob1 == 1.0
|
||||
|
||||
speedup_stats2, faster_prob2 = compare_function_runtime_distributions(
|
||||
original_distribution, optimized_distribution2
|
||||
)
|
||||
speedup_stats2 = compare_function_runtime_distributions(original_distribution, optimized_distribution2)
|
||||
assert (
|
||||
1.046
|
||||
< speedup_stats2["credible_interval_lower_bound"]
|
||||
|
|
@ -76,4 +71,3 @@ def test_bayesian_analysis() -> None:
|
|||
< speedup_stats2["credible_interval_upper_bound"]
|
||||
< 1.057
|
||||
)
|
||||
assert faster_prob1 == 1.0
|
||||
|
|
|
|||
Loading…
Reference in a new issue