fix: prevent optimized code from one file being applied to another file
The bug was introduced in commit 06353ea1 which added a fallback that
applied a single code block to ANY file being processed. This caused
issues like PR #1309 where normalize_java_code was duplicated in
support.py because optimized code for formatter.py was incorrectly
applied to it.
The fix restricts the single-code-block fallback to non-Python languages
only, where flexible path matching is needed (Java/JS/TS). For Python,
exact path matching is now required.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
2fd26ba8b7
commit
7b72a7e6ad
2 changed files with 86 additions and 3 deletions
|
|
@ -966,8 +966,12 @@ def get_optimized_code_for_module(relative_path: Path, optimized_code: CodeStrin
|
|||
|
||||
|
||||
if module_optimized_code is None:
|
||||
# Also try matching if there's only one code file
|
||||
if len(file_to_code_context) == 1:
|
||||
# Also try matching if there's only one code file, but ONLY for non-Python
|
||||
# languages where path matching is less strict. For Python, we require
|
||||
# exact path matching to avoid applying code meant for one file to another.
|
||||
# This prevents bugs like PR #1309 where a function was duplicated because
|
||||
# optimized code for formatter.py was incorrectly applied to support.py.
|
||||
if len(file_to_code_context) == 1 and not is_python():
|
||||
only_key = next(iter(file_to_code_context.keys()))
|
||||
module_optimized_code = file_to_code_context[only_key]
|
||||
logger.debug(f"Using only code block {only_key} for {relative_path}")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
|
||||
from codeflash.models.models import CodeOptimizationContext, CodeStringsMarkdown
|
||||
from codeflash.models.models import CodeOptimizationContext, CodeStringsMarkdown, FunctionParent
|
||||
from codeflash.optimization.function_optimizer import FunctionOptimizer
|
||||
from codeflash.verification.verification_utils import TestConfig
|
||||
|
||||
|
|
@ -165,3 +165,82 @@ def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
|
|||
|
||||
assert new_code.rstrip() == original_main.rstrip() # No Change
|
||||
assert new_helper_code.rstrip() == expected_helper.rstrip()
|
||||
|
||||
|
||||
def test_optimized_code_for_different_file_not_applied_to_current_file() -> None:
|
||||
"""Test that optimized code for one file is not incorrectly applied to a different file.
|
||||
|
||||
This reproduces the bug from PR #1309 where optimized code for `formatter.py`
|
||||
was incorrectly applied to `support.py`, causing `normalize_java_code` to be
|
||||
duplicated. The bug was in `get_optimized_code_for_module` which had a fallback
|
||||
that applied a single code block to ANY file being processed.
|
||||
|
||||
The scenario:
|
||||
1. `support.py` imports `normalize_java_code` from `formatter.py`
|
||||
2. AI returns optimized code with a single code block for `formatter.py`
|
||||
3. BUG: When processing `support.py`, the fallback applies `formatter.py`'s code
|
||||
4. EXPECTED: No code should be applied to `support.py` since the paths don't match
|
||||
"""
|
||||
from codeflash.code_utils.code_extractor import find_preexisting_objects
|
||||
from codeflash.code_utils.code_replacer import replace_function_definitions_in_module
|
||||
from codeflash.models.models import CodeStringsMarkdown
|
||||
|
||||
root_dir = Path(__file__).parent.parent.resolve()
|
||||
|
||||
# Create support.py - the file that imports the helper
|
||||
support_file = (root_dir / "code_to_optimize/temp_pr1309_support.py").resolve()
|
||||
original_support = '''from temp_pr1309_formatter import normalize_java_code
|
||||
|
||||
|
||||
class JavaSupport:
|
||||
"""Support class for Java operations."""
|
||||
|
||||
def normalize_code(self, source: str) -> str:
|
||||
"""Normalize code for deduplication."""
|
||||
return normalize_java_code(source)
|
||||
'''
|
||||
support_file.write_text(original_support, encoding="utf-8")
|
||||
|
||||
# AI returns optimized code for formatter.py ONLY (with explicit path)
|
||||
# This simulates what happens when the AI optimizes the helper function
|
||||
optimized_markdown = '''```python:code_to_optimize/temp_pr1309_formatter.py
|
||||
def normalize_java_code(source: str) -> str:
|
||||
"""Optimized version with fast-path."""
|
||||
if not source:
|
||||
return ""
|
||||
return "\\n".join(line.strip() for line in source.splitlines() if line.strip())
|
||||
```
|
||||
'''
|
||||
|
||||
preexisting_objects = find_preexisting_objects(original_support)
|
||||
|
||||
# Process support.py with the optimized code that's meant for formatter.py
|
||||
replace_function_definitions_in_module(
|
||||
function_names=["JavaSupport.normalize_code"],
|
||||
optimized_code=CodeStringsMarkdown.parse_markdown_code(optimized_markdown),
|
||||
module_abspath=support_file,
|
||||
preexisting_objects=preexisting_objects,
|
||||
project_root_path=root_dir,
|
||||
)
|
||||
|
||||
new_support_code = support_file.read_text(encoding="utf-8")
|
||||
|
||||
# Cleanup
|
||||
support_file.unlink(missing_ok=True)
|
||||
|
||||
# CRITICAL: support.py should NOT have normalize_java_code defined!
|
||||
# The optimized code was for formatter.py, not support.py.
|
||||
def_count = new_support_code.count("def normalize_java_code")
|
||||
assert def_count == 0, (
|
||||
f"Bug: normalize_java_code was incorrectly added to support.py!\n"
|
||||
f"Found {def_count} definition(s) when there should be 0.\n"
|
||||
f"The optimized code was for formatter.py, not support.py.\n"
|
||||
f"Resulting code:\n{new_support_code}"
|
||||
)
|
||||
|
||||
# The file should remain unchanged since no code matched its path
|
||||
assert new_support_code.strip() == original_support.strip(), (
|
||||
f"support.py was modified when it shouldn't have been.\n"
|
||||
f"Original:\n{original_support}\n"
|
||||
f"New:\n{new_support_code}"
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue