fix: raise JS/TS noise floor to 3x Python to reduce false positive speedups

Separate V8 processes have significant JIT/GC variance (15%+) that causes
false positive speedups at the current 5% threshold. This raises the JS/TS
noise floor to 15% (45% for <10μs functions) via a 3x multiplier.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ali 2026-03-12 17:04:28 +02:00
parent 4b8effa07b
commit 8474c36fa7
No known key found for this signature in database
GPG key ID: 44F9B42770617B9B
2 changed files with 75 additions and 6 deletions

View file

@ -11,6 +11,7 @@ from codeflash.code_utils.config_consts import (
MIN_TESTCASE_PASSED_THRESHOLD,
MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
)
from codeflash.languages.current import is_javascript
from codeflash.models.test_type import TestType
if TYPE_CHECKING:
@ -24,6 +25,23 @@ class AcceptanceReason(Enum):
NONE = "none"
JS_NOISE_MULTIPLIER = 3
def compute_noise_floor(original_code_runtime: int, *, disable_gh_action_noise: bool = False) -> float:
"""Compute the noise floor for speedup acceptance based on runtime and language.
JavaScript/TypeScript gets a higher noise floor because separate V8 processes
have significant JIT/GC variance that creates false positive speedups.
"""
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
if is_javascript():
noise_floor *= JS_NOISE_MULTIPLIER
if not disable_gh_action_noise and env_utils.is_ci():
noise_floor *= 2
return noise_floor
def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
"""Calculate the performance gain of an optimized code over the original code.
@ -91,9 +109,7 @@ def speedup_critic(
- Concurrency improvements detect when blocking calls are replaced with non-blocking equivalents
"""
# Runtime performance evaluation
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
if not disable_gh_action_noise and env_utils.is_ci():
noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode
noise_floor = compute_noise_floor(original_code_runtime, disable_gh_action_noise=disable_gh_action_noise)
perf_gain = performance_gain(
original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
@ -151,9 +167,7 @@ def get_acceptance_reason(
Returns the primary reason for acceptance, with priority:
concurrency > throughput > runtime (for async code).
"""
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_runtime_ns < 10000 else MIN_IMPROVEMENT_THRESHOLD
if env_utils.is_ci():
noise_floor = noise_floor * 2
noise_floor = compute_noise_floor(original_runtime_ns)
perf_gain = performance_gain(original_runtime_ns=original_runtime_ns, optimized_runtime_ns=optimized_runtime_ns)
runtime_improved = perf_gain > noise_floor

View file

@ -2,6 +2,8 @@ import os
from pathlib import Path
from unittest.mock import Mock
import pytest
from codeflash.code_utils.env_utils import get_pr_number
from codeflash.models.models import (
CodeOptimizationContext,
@ -15,7 +17,9 @@ from codeflash.models.models import (
TestResults,
TestType,
)
from codeflash.languages.current import reset_current_language, set_current_language
from codeflash.result.critic import (
compute_noise_floor,
concurrency_gain,
coverage_critic,
performance_gain,
@ -799,3 +803,54 @@ def test_parse_concurrency_metrics() -> None:
metrics_no_class = parse_concurrency_metrics(test_results_no_class, "my_function")
assert metrics_no_class is not None
assert metrics_no_class.concurrency_ratio == 2.0 # 5000000 / 2500000
def test_compute_noise_floor_python() -> None:
"""Python noise floor: 5% for >=10μs, 15% for <10μs."""
reset_current_language()
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.05)
assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.15)
def test_compute_noise_floor_javascript() -> None:
"""JS noise floor is 3x Python: 15% for >=10μs, 45% for <10μs."""
set_current_language("javascript")
try:
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.45)
finally:
reset_current_language()
def test_compute_noise_floor_typescript() -> None:
"""TypeScript gets the same JS multiplier."""
set_current_language("typescript")
try:
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
finally:
reset_current_language()
def test_speedup_critic_rejects_js_false_positive() -> None:
"""A 10.6% speedup that passes for Python should be rejected for JS (noise floor 15%)."""
original_code_runtime = 100_000 # 100μs — above the 10μs fast-function threshold
candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=90_500, # ~10.5% improvement
behavior_test_results=TestResults(),
benchmarking_test_results=TestResults(),
optimization_candidate_index=0,
total_candidate_timing=12,
)
# Python: 10.5% > 5% noise floor → accepted
reset_current_language()
assert speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
# JavaScript: 10.5% < 15% noise floor → rejected
set_current_language("javascript")
try:
assert not speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
finally:
reset_current_language()