mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
eleminate the use of flat code for parsing
This commit is contained in:
parent
6fc89260bc
commit
a7ff701309
9 changed files with 40 additions and 33 deletions
|
|
@ -73,9 +73,6 @@ class AiServiceClient:
|
|||
url = f"{self.base_url}/ai{endpoint}"
|
||||
if method.upper() == "POST":
|
||||
json_payload = json.dumps(payload, indent=None, default=pydantic_encoder)
|
||||
logger.debug(f"========JSON PAYLOAD FOR {url}==============")
|
||||
logger.debug(json_payload)
|
||||
logger.debug("======================")
|
||||
headers = {**self.headers, "Content-Type": "application/json"}
|
||||
response = requests.post(url, data=json_payload, headers=headers, timeout=timeout)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -85,14 +85,14 @@ def get_code_optimization_context(
|
|||
)
|
||||
|
||||
# Handle token limits
|
||||
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.flat)
|
||||
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.markdown)
|
||||
if final_read_writable_tokens > optim_token_limit:
|
||||
raise ValueError("Read-writable code has exceeded token limit, cannot proceed")
|
||||
|
||||
# Setup preexisting objects for code replacer
|
||||
preexisting_objects = set(
|
||||
chain(
|
||||
find_preexisting_objects(final_read_writable_code.flat),
|
||||
*(find_preexisting_objects(codestring.code) for codestring in final_read_writable_code.code_strings),
|
||||
*(find_preexisting_objects(codestring.code) for codestring in read_only_code_markdown.code_strings),
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
import ast
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
|
|
@ -611,7 +612,9 @@ def _analyze_imports_in_optimized_code(
|
|||
|
||||
|
||||
def detect_unused_helper_functions(
|
||||
function_to_optimize: FunctionToOptimize, code_context: CodeOptimizationContext, optimized_code: str
|
||||
function_to_optimize: FunctionToOptimize,
|
||||
code_context: CodeOptimizationContext,
|
||||
optimized_code: str | CodeStringsMarkdown,
|
||||
) -> list[FunctionSource]:
|
||||
"""Detect helper functions that are no longer called by the optimized entrypoint function.
|
||||
|
||||
|
|
@ -624,6 +627,14 @@ def detect_unused_helper_functions(
|
|||
List of FunctionSource objects representing unused helper functions
|
||||
|
||||
"""
|
||||
if isinstance(optimized_code, CodeStringsMarkdown) and len(optimized_code.code_strings) > 0:
|
||||
return list(
|
||||
chain.from_iterable(
|
||||
detect_unused_helper_functions(function_to_optimize, code_context, code.code)
|
||||
for code in optimized_code.code_strings
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
# Parse the optimized code to analyze function calls and imports
|
||||
optimized_ast = ast.parse(optimized_code)
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization
|
|||
generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests
|
||||
]
|
||||
optimizations_dict = {
|
||||
candidate.optimization_id: {"source_code": candidate.source_code.flat, "explanation": candidate.explanation}
|
||||
candidate.optimization_id: {"source_code": candidate.source_code.markdown, "explanation": candidate.explanation}
|
||||
for candidate in optimizations_set.control + optimizations_set.experiment
|
||||
}
|
||||
|
||||
|
|
@ -330,7 +330,7 @@ def perform_function_optimization( # noqa: PLR0911
|
|||
"message": f"No best optimizations found for function {function_to_optimize_qualified_name}",
|
||||
}
|
||||
|
||||
optimized_source = best_optimization.candidate.source_code.flat
|
||||
optimized_source = best_optimization.candidate.source_code.markdown
|
||||
speedup = original_code_baseline.runtime / best_optimization.runtime
|
||||
|
||||
server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")
|
||||
|
|
|
|||
|
|
@ -157,12 +157,8 @@ class CodeString(BaseModel):
|
|||
file_path: Optional[Path] = None
|
||||
|
||||
|
||||
# Used to split files by adding a marker at the start of each file followed by the file path.
|
||||
LINE_SPLITTER_MARKER_PREFIX = "# --codeflash:file--"
|
||||
|
||||
|
||||
def get_code_block_splitter(file_path: Path) -> str:
|
||||
return f"{LINE_SPLITTER_MARKER_PREFIX}{file_path}"
|
||||
return f"# file: {file_path}"
|
||||
|
||||
|
||||
markdown_pattern = re.compile(r"```python:([^\n]+)\n(.*?)\n```", re.DOTALL)
|
||||
|
|
@ -182,6 +178,11 @@ class CodeStringsMarkdown(BaseModel):
|
|||
Returns:
|
||||
str: The concatenated code of all blocks with file path annotations.
|
||||
|
||||
!! Important !!:
|
||||
Avoid parsing the flat code with multiple files,
|
||||
parsing may result in unexpected behavior.
|
||||
|
||||
|
||||
"""
|
||||
if self._cache.get("flat") is not None:
|
||||
return self._cache["flat"]
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ from codeflash.discovery.functions_to_optimize import was_function_previously_op
|
|||
from codeflash.either import Failure, Success, is_successful
|
||||
from codeflash.models.ExperimentMetadata import ExperimentMetadata
|
||||
from codeflash.models.models import (
|
||||
LINE_SPLITTER_MARKER_PREFIX,
|
||||
BestOptimization,
|
||||
CodeOptimizationContext,
|
||||
GeneratedTests,
|
||||
|
|
@ -171,7 +170,10 @@ class FunctionOptimizer:
|
|||
helper_code = f.read()
|
||||
original_helper_code[helper_function_path] = helper_code
|
||||
|
||||
if has_any_async_functions(code_context.read_writable_code.flat):
|
||||
async_code = any(
|
||||
has_any_async_functions(code_string.code) for code_string in code_context.read_writable_code.code_strings
|
||||
)
|
||||
if async_code:
|
||||
return Failure("Codeflash does not support async functions in the code to optimize.")
|
||||
# Random here means that we still attempt optimization with a fractional chance to see if
|
||||
# last time we could not find an optimization, maybe this time we do.
|
||||
|
|
@ -731,7 +733,7 @@ class FunctionOptimizer:
|
|||
preexisting_objects=code_context.preexisting_objects,
|
||||
project_root_path=self.project_root,
|
||||
)
|
||||
unused_helpers = detect_unused_helper_functions(self.function_to_optimize, code_context, optimized_code.flat)
|
||||
unused_helpers = detect_unused_helper_functions(self.function_to_optimize, code_context, optimized_code)
|
||||
|
||||
# Revert unused helper functions to their original definitions
|
||||
if unused_helpers:
|
||||
|
|
@ -1165,15 +1167,10 @@ class FunctionOptimizer:
|
|||
optimized_runtimes_all=optimized_runtime_by_test,
|
||||
)
|
||||
new_explanation_raw_str = self.aiservice_client.get_new_explanation(
|
||||
source_code=code_context.read_writable_code.flat.replace(
|
||||
LINE_SPLITTER_MARKER_PREFIX,
|
||||
"# file: ", # for better readability
|
||||
),
|
||||
source_code=code_context.read_writable_code.flat,
|
||||
dependency_code=code_context.read_only_context_code,
|
||||
trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
|
||||
optimized_code=best_optimization.candidate.source_code.flat.replace(
|
||||
LINE_SPLITTER_MARKER_PREFIX, "# file: "
|
||||
),
|
||||
optimized_code=best_optimization.candidate.source_code.flat,
|
||||
original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
|
||||
optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
|
||||
original_code_runtime=humanize_runtime(original_code_baseline.runtime),
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
import pytest
|
||||
from codeflash.context.code_context_extractor import get_code_optimization_context
|
||||
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
|
||||
from codeflash.models.models import FunctionParent, get_code_block_splitter
|
||||
from codeflash.models.models import FunctionParent
|
||||
from codeflash.optimization.optimizer import Optimizer
|
||||
from codeflash.code_utils.code_replacer import replace_functions_and_add_imports
|
||||
from codeflash.code_utils.code_extractor import add_global_assignments
|
||||
|
|
|
|||
|
|
@ -123,6 +123,7 @@ print("Hello world")
|
|||
|
||||
function_name: str = "NewClass.new_function"
|
||||
preexisting_objects: set[tuple[str, tuple[FunctionParent, ...]]] = find_preexisting_objects(original_code)
|
||||
print(f"Preexisting objects: {preexisting_objects}")
|
||||
new_code: str = replace_functions_and_add_imports(
|
||||
source_code=original_code,
|
||||
function_names=[function_name],
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ def helper_function_2(x):
|
|||
code_context = ctx_result.unwrap()
|
||||
|
||||
# Test unused helper detection
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
|
||||
# Should detect helper_function_2 as unused
|
||||
unused_names = {uh.qualified_name for uh in unused_helpers}
|
||||
|
|
@ -267,7 +267,7 @@ def helper_function_2(x):
|
|||
original_helper_code = {main_file: main_file.read_text()}
|
||||
|
||||
# Test detection - should find no unused helpers
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
assert len(unused_helpers) == 0, "No helpers should be detected as unused"
|
||||
|
||||
# Apply optimization
|
||||
|
|
@ -350,7 +350,7 @@ def entrypoint_function(n):
|
|||
code_context = ctx_result.unwrap()
|
||||
|
||||
# Test unused helper detection
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
|
||||
# Should detect helper_function_2 as unused
|
||||
unused_names = {uh.qualified_name for uh in unused_helpers}
|
||||
|
|
@ -538,7 +538,7 @@ class Calculator:
|
|||
code_context = ctx_result.unwrap()
|
||||
|
||||
# Test unused helper detection
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
|
||||
# Should detect Calculator.helper_method_2 as unused
|
||||
unused_names = {uh.qualified_name for uh in unused_helpers}
|
||||
|
|
@ -683,7 +683,7 @@ class Processor:
|
|||
code_context = ctx_result.unwrap()
|
||||
|
||||
# Test unused helper detection
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
|
||||
# Should detect external_helper_2 as unused
|
||||
unused_names = {uh.qualified_name for uh in unused_helpers}
|
||||
|
|
@ -889,7 +889,7 @@ class OuterClass:
|
|||
]
|
||||
},
|
||||
)(),
|
||||
CodeStringsMarkdown.parse_markdown_code(optimized_code).flat,
|
||||
CodeStringsMarkdown.parse_markdown_code(optimized_code),
|
||||
)
|
||||
|
||||
# Should detect global_helper_2 as unused
|
||||
|
|
@ -1018,7 +1018,7 @@ def entrypoint_function(n):
|
|||
code_context = ctx_result.unwrap()
|
||||
|
||||
# Test unused helper detection
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
|
||||
# Should detect multiply, process_data as unused (at minimum)
|
||||
unused_names = {uh.qualified_name for uh in unused_helpers}
|
||||
|
|
@ -1178,7 +1178,7 @@ def entrypoint_function(n):
|
|||
code_context = ctx_result.unwrap()
|
||||
|
||||
# Test unused helper detection
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code).flat)
|
||||
unused_helpers = detect_unused_helper_functions(optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_code))
|
||||
|
||||
# Should detect multiply_numbers and divide_numbers as unused
|
||||
unused_names = {uh.qualified_name for uh in unused_helpers}
|
||||
|
|
@ -1400,7 +1400,7 @@ class MathUtils:
|
|||
|
||||
# Test unused helper detection for static method
|
||||
unused_helpers = detect_unused_helper_functions(
|
||||
optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_static_code).flat
|
||||
optimizer.function_to_optimize, code_context, CodeStringsMarkdown.parse_markdown_code(optimized_static_code)
|
||||
)
|
||||
|
||||
# Should detect utility_function_2 as unused
|
||||
|
|
|
|||
Loading…
Reference in a new issue