mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
fix: raise JS/TS noise floor to 3x Python to reduce false positive speedups
Separate V8 processes have significant JIT/GC variance (15%+) that causes false positive speedups at the current 5% threshold. This raises the JS/TS noise floor to 15% (45% for <10μs functions) via a 3x multiplier. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4b8effa07b
commit
8474c36fa7
2 changed files with 75 additions and 6 deletions
|
|
@ -11,6 +11,7 @@ from codeflash.code_utils.config_consts import (
|
|||
MIN_TESTCASE_PASSED_THRESHOLD,
|
||||
MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
|
||||
)
|
||||
from codeflash.languages.current import is_javascript
|
||||
from codeflash.models.test_type import TestType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
@ -24,6 +25,23 @@ class AcceptanceReason(Enum):
|
|||
NONE = "none"
|
||||
|
||||
|
||||
JS_NOISE_MULTIPLIER = 3
|
||||
|
||||
|
||||
def compute_noise_floor(original_code_runtime: int, *, disable_gh_action_noise: bool = False) -> float:
|
||||
"""Compute the noise floor for speedup acceptance based on runtime and language.
|
||||
|
||||
JavaScript/TypeScript gets a higher noise floor because separate V8 processes
|
||||
have significant JIT/GC variance that creates false positive speedups.
|
||||
"""
|
||||
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
|
||||
if is_javascript():
|
||||
noise_floor *= JS_NOISE_MULTIPLIER
|
||||
if not disable_gh_action_noise and env_utils.is_ci():
|
||||
noise_floor *= 2
|
||||
return noise_floor
|
||||
|
||||
|
||||
def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
|
||||
"""Calculate the performance gain of an optimized code over the original code.
|
||||
|
||||
|
|
@ -91,9 +109,7 @@ def speedup_critic(
|
|||
- Concurrency improvements detect when blocking calls are replaced with non-blocking equivalents
|
||||
"""
|
||||
# Runtime performance evaluation
|
||||
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
|
||||
if not disable_gh_action_noise and env_utils.is_ci():
|
||||
noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode
|
||||
noise_floor = compute_noise_floor(original_code_runtime, disable_gh_action_noise=disable_gh_action_noise)
|
||||
|
||||
perf_gain = performance_gain(
|
||||
original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
|
||||
|
|
@ -151,9 +167,7 @@ def get_acceptance_reason(
|
|||
Returns the primary reason for acceptance, with priority:
|
||||
concurrency > throughput > runtime (for async code).
|
||||
"""
|
||||
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_runtime_ns < 10000 else MIN_IMPROVEMENT_THRESHOLD
|
||||
if env_utils.is_ci():
|
||||
noise_floor = noise_floor * 2
|
||||
noise_floor = compute_noise_floor(original_runtime_ns)
|
||||
|
||||
perf_gain = performance_gain(original_runtime_ns=original_runtime_ns, optimized_runtime_ns=optimized_runtime_ns)
|
||||
runtime_improved = perf_gain > noise_floor
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ import os
|
|||
from pathlib import Path
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from codeflash.code_utils.env_utils import get_pr_number
|
||||
from codeflash.models.models import (
|
||||
CodeOptimizationContext,
|
||||
|
|
@ -15,7 +17,9 @@ from codeflash.models.models import (
|
|||
TestResults,
|
||||
TestType,
|
||||
)
|
||||
from codeflash.languages.current import reset_current_language, set_current_language
|
||||
from codeflash.result.critic import (
|
||||
compute_noise_floor,
|
||||
concurrency_gain,
|
||||
coverage_critic,
|
||||
performance_gain,
|
||||
|
|
@ -799,3 +803,54 @@ def test_parse_concurrency_metrics() -> None:
|
|||
metrics_no_class = parse_concurrency_metrics(test_results_no_class, "my_function")
|
||||
assert metrics_no_class is not None
|
||||
assert metrics_no_class.concurrency_ratio == 2.0 # 5000000 / 2500000
|
||||
|
||||
|
||||
def test_compute_noise_floor_python() -> None:
|
||||
"""Python noise floor: 5% for >=10μs, 15% for <10μs."""
|
||||
reset_current_language()
|
||||
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.05)
|
||||
assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.15)
|
||||
|
||||
|
||||
def test_compute_noise_floor_javascript() -> None:
|
||||
"""JS noise floor is 3x Python: 15% for >=10μs, 45% for <10μs."""
|
||||
set_current_language("javascript")
|
||||
try:
|
||||
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
|
||||
assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.45)
|
||||
finally:
|
||||
reset_current_language()
|
||||
|
||||
|
||||
def test_compute_noise_floor_typescript() -> None:
|
||||
"""TypeScript gets the same JS multiplier."""
|
||||
set_current_language("typescript")
|
||||
try:
|
||||
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
|
||||
finally:
|
||||
reset_current_language()
|
||||
|
||||
|
||||
def test_speedup_critic_rejects_js_false_positive() -> None:
|
||||
"""A 10.6% speedup that passes for Python should be rejected for JS (noise floor 15%)."""
|
||||
original_code_runtime = 100_000 # 100μs — above the 10μs fast-function threshold
|
||||
|
||||
candidate_result = OptimizedCandidateResult(
|
||||
max_loop_count=5,
|
||||
best_test_runtime=90_500, # ~10.5% improvement
|
||||
behavior_test_results=TestResults(),
|
||||
benchmarking_test_results=TestResults(),
|
||||
optimization_candidate_index=0,
|
||||
total_candidate_timing=12,
|
||||
)
|
||||
|
||||
# Python: 10.5% > 5% noise floor → accepted
|
||||
reset_current_language()
|
||||
assert speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
|
||||
|
||||
# JavaScript: 10.5% < 15% noise floor → rejected
|
||||
set_current_language("javascript")
|
||||
try:
|
||||
assert not speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
|
||||
finally:
|
||||
reset_current_language()
|
||||
|
|
|
|||
Loading…
Reference in a new issue