mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
165 lines
7.3 KiB
Python
165 lines
7.3 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
import subprocess
|
|
from enum import IntEnum
|
|
from typing import TYPE_CHECKING
|
|
|
|
from codeflash.cli_cmds.console import code_print, console, logger
|
|
|
|
if TYPE_CHECKING:
|
|
from pathlib import Path
|
|
|
|
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
|
|
from codeflash.models.models import OptimizedCandidate
|
|
|
|
|
|
class DiffbehaviorReturnCode(IntEnum):
|
|
DID_NOT_RUN = -1
|
|
NO_DIFFERENCES = 0
|
|
COUNTER_EXAMPLES = 1
|
|
ERROR = 2
|
|
|
|
|
|
def run_concolic_correctness(
|
|
function_to_optimize: FunctionToOptimize,
|
|
function_to_optimize_original_worktree_fqn: str,
|
|
candidates: list[OptimizedCandidate],
|
|
worktrees: list[Path],
|
|
worktree_root: Path,
|
|
git_root: Path,
|
|
) -> None:
|
|
logger.info("Generating concolic coverage tests for the original code…")
|
|
original_code_coverage_tests = subprocess.run(
|
|
[
|
|
"crosshair",
|
|
"cover",
|
|
"--example_output_format=pytest",
|
|
"--max_uninteresting_iterations=256",
|
|
function_to_optimize_original_worktree_fqn,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
cwd=worktree_root,
|
|
check=False,
|
|
)
|
|
logger.info(f"Tests generated through concolic coverage:\n{original_code_coverage_tests.stdout}")
|
|
console.rule()
|
|
logger.info("Running concolic behavior correctness checking and coverage generation on optimized code…")
|
|
console.rule()
|
|
diffbehavior_results: dict[str, DiffbehaviorReturnCode] = {}
|
|
for candidate_index, candidate in enumerate(candidates, start=1):
|
|
logger.info(f"Optimization candidate {candidate_index}/{len(candidates)}:")
|
|
code_print(candidate.source_code)
|
|
function_to_optimize_optimized_worktree_fqn = (
|
|
str(
|
|
worktrees[candidate_index].name / function_to_optimize.file_path.relative_to(git_root).with_suffix("")
|
|
).replace("/", ".")
|
|
+ "."
|
|
+ function_to_optimize.qualified_name
|
|
)
|
|
result = subprocess.run(
|
|
[
|
|
"crosshair",
|
|
"diffbehavior",
|
|
"--max_uninteresting_iterations=256",
|
|
function_to_optimize_optimized_worktree_fqn,
|
|
function_to_optimize_original_worktree_fqn,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
cwd=worktree_root,
|
|
check=False,
|
|
)
|
|
optimized_code_tests = subprocess.run(
|
|
[
|
|
"crosshair",
|
|
"cover",
|
|
"--example_output_format=pytest",
|
|
"--max_uninteresting_iterations=256",
|
|
function_to_optimize_optimized_worktree_fqn,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
cwd=worktree_root,
|
|
check=False,
|
|
)
|
|
|
|
if result.returncode == DiffbehaviorReturnCode.ERROR:
|
|
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.ERROR
|
|
logger.info("Inconclusive results from concolic behavior correctness checking.")
|
|
logger.warning(f"Error running crosshair diffbehavior{': ' + result.stderr if result.stderr else '.'}")
|
|
elif result.returncode == DiffbehaviorReturnCode.COUNTER_EXAMPLES:
|
|
split_counter_examples = re.split("(Given: )", result.stdout)[1:]
|
|
joined_counter_examples = [
|
|
"".join(map(str, split_counter_examples[i : i + 2])) for i in range(0, len(split_counter_examples), 2)
|
|
]
|
|
concrete_counter_examples = "".join(
|
|
[elt for elt in joined_counter_examples if not re.search(r" object at 0x[0-9a-fA-F]+", elt)]
|
|
)
|
|
if concrete_counter_examples:
|
|
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.COUNTER_EXAMPLES
|
|
logger.info(
|
|
f"Optimization candidate failed concolic behavior correctness "
|
|
f"checking:\n{concrete_counter_examples}"
|
|
)
|
|
if result.stdout != concrete_counter_examples:
|
|
object_id_counter_examples = "".join(
|
|
[elt for elt in joined_counter_examples if re.search(r" object at 0x[0-9a-fA-F]+", elt)]
|
|
)
|
|
logger.warning(f"Counter-examples with object ID found:\n{object_id_counter_examples}")
|
|
else:
|
|
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.ERROR
|
|
logger.info("Inconclusive results from concolic behavior correctness checking.")
|
|
console.rule()
|
|
logger.warning(f"Counter-examples with object ID found:\n{result.stdout}")
|
|
elif result.returncode == DiffbehaviorReturnCode.NO_DIFFERENCES:
|
|
diffbehavior_results[candidate.optimization_id] = DiffbehaviorReturnCode.NO_DIFFERENCES
|
|
first_line = "".join([": ", chr(10), result.stdout.split(chr(10), 1)[0]])
|
|
logger.info(
|
|
f"Optimization candidate passed concolic behavior correctness checking"
|
|
f"{first_line if chr(10) in result.stdout else '.'}"
|
|
)
|
|
paths_exhausted = "All paths exhausted, functions are likely the same!\n"
|
|
if result.stdout.endswith(paths_exhausted):
|
|
logger.info(paths_exhausted)
|
|
else:
|
|
logger.warning("Consider increasing the --max_uninteresting_iterations option.")
|
|
else:
|
|
logger.info("Inconclusive results from concolic behavior correctness checking.")
|
|
logger.error("Unknown return code running crosshair diffbehavior.")
|
|
console.rule()
|
|
logger.info(f"Tests generated through concolic coverage:\n{optimized_code_tests.stdout}")
|
|
console.rule()
|
|
|
|
def report_concolic_results(
|
|
equal_results: bool, diffbehavior_result: DiffbehaviorReturnCode, success: bool
|
|
) -> tuple[bool, bool]:
|
|
if diffbehavior_result == DiffbehaviorReturnCode.NO_DIFFERENCES:
|
|
logger.info("Concolic behavior correctness check successful!")
|
|
console.rule()
|
|
if equal_results:
|
|
logger.info("True negative: Concolic behavior correctness check successful and test results matched.")
|
|
else:
|
|
logger.warning(
|
|
"False negative for concolic testing: Concolic behavior correctness check successful but test "
|
|
"results did not match."
|
|
)
|
|
console.rule()
|
|
elif diffbehavior_result == DiffbehaviorReturnCode.COUNTER_EXAMPLES:
|
|
logger.warning("Concolic behavior correctness check failed.")
|
|
console.rule()
|
|
if equal_results:
|
|
logger.warning(
|
|
"False negative for regression testing: Concolic behavior correctness check failed but test "
|
|
"results matched."
|
|
)
|
|
success = False
|
|
equal_results = False
|
|
else:
|
|
logger.info("True positive: Concolic behavior correctness check failed and test results did not match.")
|
|
console.rule()
|
|
elif diffbehavior_result == DiffbehaviorReturnCode.ERROR:
|
|
logger.warning("Concolic behavior correctness check inconclusive.")
|
|
console.rule()
|
|
return equal_results, success
|