eleminate the use of flat code for parsing

This commit is contained in:
mohammed 2025-08-06 22:48:03 +03:00
parent 6fc89260bc
commit a7ff701309
No known key found for this signature in database
GPG key ID: 44F9B42770617B9B
9 changed files with 40 additions and 33 deletions

View file

@ -73,9 +73,6 @@ class AiServiceClient:
url = f"{self.base_url}/ai{endpoint}"
if method.upper() == "POST":
json_payload = json.dumps(payload, indent=None, default=pydantic_encoder)
logger.debug(f"========JSON PAYLOAD FOR {url}==============")
logger.debug(json_payload)
logger.debug("======================")
headers = {**self.headers, "Content-Type": "application/json"}
response = requests.post(url, data=json_payload, headers=headers, timeout=timeout)
else:

View file

@ -85,14 +85,14 @@ def get_code_optimization_context(
)
# Handle token limits
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.flat)
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.markdown)
if final_read_writable_tokens > optim_token_limit:
raise ValueError("Read-writable code has exceeded token limit, cannot proceed")
# Setup preexisting objects for code replacer
preexisting_objects = set(
chain(
find_preexisting_objects(final_read_writable_code.flat),
*(find_preexisting_objects(codestring.code) for codestring in final_read_writable_code.code_strings),
*(find_preexisting_objects(codestring.code) for codestring in read_only_code_markdown.code_strings),
)
)

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import ast
from collections import defaultdict
from dataclasses import dataclass, field
from itertools import chain
from pathlib import Path
from typing import TYPE_CHECKING, Optional
@ -611,7 +612,9 @@ def _analyze_imports_in_optimized_code(
def detect_unused_helper_functions(
function_to_optimize: FunctionToOptimize, code_context: CodeOptimizationContext, optimized_code: str
function_to_optimize: FunctionToOptimize,
code_context: CodeOptimizationContext,
optimized_code: str | CodeStringsMarkdown,
) -> list[FunctionSource]:
"""Detect helper functions that are no longer called by the optimized entrypoint function.
@ -624,6 +627,14 @@ def detect_unused_helper_functions(
List of FunctionSource objects representing unused helper functions
"""
if isinstance(optimized_code, CodeStringsMarkdown) and len(optimized_code.code_strings) > 0:
return list(
chain.from_iterable(
detect_unused_helper_functions(function_to_optimize, code_context, code.code)
for code in optimized_code.code_strings
)
)
try:
# Parse the optimized code to analyze function calls and imports
optimized_ast = ast.parse(optimized_code)

View file

@ -222,7 +222,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization
generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests
]
optimizations_dict = {
candidate.optimization_id: {"source_code": candidate.source_code.flat, "explanation": candidate.explanation}
candidate.optimization_id: {"source_code": candidate.source_code.markdown, "explanation": candidate.explanation}
for candidate in optimizations_set.control + optimizations_set.experiment
}
@ -330,7 +330,7 @@ def perform_function_optimization( # noqa: PLR0911
"message": f"No best optimizations found for function {function_to_optimize_qualified_name}",
}
optimized_source = best_optimization.candidate.source_code.flat
optimized_source = best_optimization.candidate.source_code.markdown
speedup = original_code_baseline.runtime / best_optimization.runtime
server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")

View file

@ -157,12 +157,8 @@ class CodeString(BaseModel):
file_path: Optional[Path] = None
# Used to split files by adding a marker at the start of each file followed by the file path.
LINE_SPLITTER_MARKER_PREFIX = "# --codeflash:file--"
def get_code_block_splitter(file_path: Path) -> str:
return f"{LINE_SPLITTER_MARKER_PREFIX}{file_path}"
return f"# file: {file_path}"
markdown_pattern = re.compile(r"```python:([^\n]+)\n(.*?)\n```", re.DOTALL)
@ -182,6 +178,11 @@ class CodeStringsMarkdown(BaseModel):
Returns:
str: The concatenated code of all blocks with file path annotations.
!! Important !!:
Avoid parsing the flat code with multiple files,
parsing may result in unexpected behavior.
"""
if self._cache.get("flat") is not None:
return self._cache["flat"]

View file

@ -62,7 +62,6 @@ from codeflash.discovery.functions_to_optimize import was_function_previously_op
from codeflash.either import Failure, Success, is_successful
from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import (
LINE_SPLITTER_MARKER_PREFIX,
BestOptimization,
CodeOptimizationContext,
GeneratedTests,
@ -171,7 +170,10 @@ class FunctionOptimizer:
helper_code = f.read()
original_helper_code[helper_function_path] = helper_code
if has_any_async_functions(code_context.read_writable_code.flat):
async_code = any(
has_any_async_functions(code_string.code) for code_string in code_context.read_writable_code.code_strings
)
if async_code:
return Failure("Codeflash does not support async functions in the code to optimize.")
# Random here means that we still attempt optimization with a fractional chance to see if
# last time we could not find an optimization, maybe this time we do.
@ -731,7 +733,7 @@ class FunctionOptimizer:
preexisting_objects=code_context.preexisting_objects,
project_root_path=self.project_root,
)
unused_helpers = detect_unused_helper_functions(self.function_to_optimize, code_context, optimized_code.flat)
unused_helpers = detect_unused_helper_functions(self.function_to_optimize, code_context, optimized_code)
# Revert unused helper functions to their original definitions
if unused_helpers:
@ -1165,15 +1167,10 @@ class FunctionOptimizer:
optimized_runtimes_all=optimized_runtime_by_test,
)
new_explanation_raw_str = self.aiservice_client.get_new_explanation(
source_code=code_context.read_writable_code.flat.replace(
LINE_SPLITTER_MARKER_PREFIX,
"# file: ", # for better readability
),
source_code=code_context.read_writable_code.flat,
dependency_code=code_context.read_only_context_code,
trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
optimized_code=best_optimization.candidate.source_code.flat.replace(
LINE_SPLITTER_MARKER_PREFIX, "# file: "
),
optimized_code=best_optimization.candidate.source_code.flat,
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
original_code_runtime=humanize_runtime(original_code_baseline.runtime),

View file

@ -9,7 +9,7 @@ from pathlib import Path
import pytest
from codeflash.context.code_context_extractor import get_code_optimization_context
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.models.models import FunctionParent, get_code_block_splitter
from codeflash.models.models import FunctionParent
from codeflash.optimization.optimizer import Optimizer
from codeflash.code_utils.code_replacer import replace_functions_and_add_imports
from codeflash.code_utils.code_extractor import add_global_assignments

View file

@ -123,6 +123,7 @@ print("Hello world")
function_name: str = "NewClass.new_function"
preexisting_objects: set[tuple[str, tuple[FunctionParent, ...]]] = find_preexisting_objects(original_code)
print(f"Preexisting objects: {preexisting_objects}")
new_code: str = replace_functions_and_add_imports(
source_code=original_code,
function_names=[function_name],

View file

@ -92,7 +92,7 @@ def helper_function_2(x):
code_context = ctx_result.unwrap()
# Test unused helper detection
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
# Should detect helper_function_2 as unused
unused_names = {uh.qualified_name for uh in unused_helpers}
@ -267,7 +267,7 @@ def helper_function_2(x):
original_helper_code = {main_file: main_file.read_text()}
# Test detection - should find no unused helpers
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
assert len(unused_helpers) == 0, "No helpers should be detected as unused"
# Apply optimization
@ -350,7 +350,7 @@ def entrypoint_function(n):
code_context = ctx_result.unwrap()
# Test unused helper detection
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
# Should detect helper_function_2 as unused
unused_names = {uh.qualified_name for uh in unused_helpers}
@ -538,7 +538,7 @@ class Calculator:
code_context = ctx_result.unwrap()
# Test unused helper detection
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
# Should detect Calculator.helper_method_2 as unused
unused_names = {uh.qualified_name for uh in unused_helpers}
@ -683,7 +683,7 @@ class Processor:
code_context = ctx_result.unwrap()
# Test unused helper detection
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
# Should detect external_helper_2 as unused
unused_names = {uh.qualified_name for uh in unused_helpers}
@ -889,7 +889,7 @@ class OuterClass:
]
},
)(),
CodeStringsMarkdown.parse_markdown_code(optimized_code).flat,
CodeStringsMarkdown.parse_markdown_code(optimized_code),
)
# Should detect global_helper_2 as unused
@ -1018,7 +1018,7 @@ def entrypoint_function(n):
code_context = ctx_result.unwrap()
# Test unused helper detection
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
# Should detect multiply, process_data as unused (at minimum)
unused_names = {uh.qualified_name for uh in unused_helpers}
@ -1178,7 +1178,7 @@ def entrypoint_function(n):
code_context = ctx_result.unwrap()
# Test unused helper detection
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
# Should detect multiply_numbers and divide_numbers as unused
unused_names = {uh.qualified_name for uh in unused_helpers}
@ -1400,7 +1400,7 @@ class MathUtils:
# Test unused helper detection for static method
unused_helpers = detect_unused_helper_functions(
optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_static_code).flat
optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_static_code)
)
# Should detect utility_function_2 as unused