mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
Add project overlay infrastructure for isolated candidate evaluation
Introduces symlink-based temporary directories that mirror the project root, replacing only the target module file with candidate code. This allows test subprocesses to run against candidate code without mutating the original source on disk, enabling safe parallel evaluation.
This commit is contained in:
parent
1d48b7792d
commit
b455c1e69f
3 changed files with 330 additions and 3 deletions
|
|
@ -135,16 +135,21 @@ def run_tests_and_benchmark( # noqa: PLR0913
|
|||
candidate_bench_results: dict[str, TestResults],
|
||||
*,
|
||||
evaluate_async_fn: _EvalAsyncFn | None = None,
|
||||
cwd: Path | None = None,
|
||||
) -> float | None:
|
||||
"""Run behavioral tests and benchmarks for a candidate.
|
||||
|
||||
Expects the updated source to already be written to disk.
|
||||
When *cwd* is given, test subprocesses use that directory
|
||||
instead of ``ctx.project_root``.
|
||||
"""
|
||||
project_cwd = cwd or ctx.project_root
|
||||
|
||||
# 3. Behavioral tests.
|
||||
xml_path, run_result, _, _ = run_behavioral_tests(
|
||||
test_files=test_files,
|
||||
test_env=test_env,
|
||||
cwd=ctx.project_root,
|
||||
cwd=project_cwd,
|
||||
pytest_cmd=ctx.test_cfg.pytest_cmd,
|
||||
)
|
||||
candidate_results = parse_test_results(
|
||||
|
|
@ -182,13 +187,13 @@ def run_tests_and_benchmark( # noqa: PLR0913
|
|||
func = fn_input.function
|
||||
originals = add_async_perf_decorator(
|
||||
func if func.is_async else None,
|
||||
ctx.project_root,
|
||||
project_cwd,
|
||||
)
|
||||
try:
|
||||
bench_xml, bench_result = run_benchmarking_tests(
|
||||
test_files=test_files,
|
||||
test_env=test_env,
|
||||
cwd=ctx.project_root,
|
||||
cwd=project_cwd,
|
||||
pytest_cmd=ctx.test_cfg.pytest_cmd,
|
||||
)
|
||||
bench_results = parse_test_results(
|
||||
|
|
@ -247,6 +252,78 @@ def run_tests_and_benchmark( # noqa: PLR0913
|
|||
return speedup
|
||||
|
||||
|
||||
def evaluate_candidate_isolated( # noqa: PLR0913
|
||||
candidate: Candidate,
|
||||
fn_input: FunctionInput,
|
||||
baseline: OriginalCodeBaseline,
|
||||
eval_ctx: EvaluationContext,
|
||||
test_files: TestFiles,
|
||||
test_env: dict[str, str],
|
||||
ctx: OptimizationContext,
|
||||
failed_candidate_code: dict[str, str],
|
||||
failed_candidate_diffs: dict[str, list[Any]],
|
||||
candidate_bench_results: dict[str, TestResults],
|
||||
*,
|
||||
evaluate_async_fn: _EvalAsyncFn | None = None,
|
||||
) -> float | None:
|
||||
"""Evaluate a candidate using a project overlay.
|
||||
|
||||
Creates an isolated overlay directory so the original source
|
||||
is never modified. Safe to call from multiple threads.
|
||||
"""
|
||||
from ._eval_worktree import ( # noqa: PLC0415
|
||||
cleanup_overlay,
|
||||
create_project_overlay,
|
||||
)
|
||||
|
||||
cid = candidate.candidate_id
|
||||
|
||||
try:
|
||||
updated_source = replace_functions_in_file(
|
||||
source_code=fn_input.source_code,
|
||||
original_function_names=[
|
||||
fn_input.function.function_name,
|
||||
],
|
||||
optimized_code=candidate.code,
|
||||
preexisting_objects=set(),
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
log.info(
|
||||
"Replacement failed for candidate %s",
|
||||
cid,
|
||||
exc_info=True,
|
||||
)
|
||||
eval_ctx.record_failed(cid)
|
||||
return None
|
||||
|
||||
overlay = create_project_overlay(
|
||||
fn_input.module_path,
|
||||
ctx.project_root,
|
||||
updated_source,
|
||||
)
|
||||
try:
|
||||
result = run_tests_and_benchmark(
|
||||
cid=cid,
|
||||
fn_input=fn_input,
|
||||
baseline=baseline,
|
||||
eval_ctx=eval_ctx,
|
||||
test_files=test_files,
|
||||
test_env=test_env,
|
||||
ctx=ctx,
|
||||
failed_candidate_diffs=failed_candidate_diffs,
|
||||
candidate_bench_results=candidate_bench_results,
|
||||
evaluate_async_fn=evaluate_async_fn,
|
||||
cwd=overlay,
|
||||
)
|
||||
if result is None:
|
||||
failed_candidate_code[cid] = candidate.code
|
||||
else:
|
||||
eval_ctx.optimizations_post[cid] = candidate.code
|
||||
return result
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
|
||||
def rank_candidates( # noqa: PLR0913
|
||||
ai_client: AIClient,
|
||||
function_trace_id: str,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
"""Isolated project overlays for parallel candidate evaluation.
|
||||
|
||||
Creates lightweight temporary directories that mirror the
|
||||
project root using symlinks. Only the target module file is a
|
||||
real file containing candidate code; everything else resolves
|
||||
through symlinks to the original project.
|
||||
|
||||
Using the overlay as the subprocess ``cwd`` ensures the
|
||||
candidate code is found on ``sys.path[0]`` while all other
|
||||
imports resolve to the originals.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def create_project_overlay(
|
||||
module_path: Path,
|
||||
project_root: Path,
|
||||
candidate_code: str,
|
||||
) -> Path:
|
||||
"""Create a project-root overlay with one module replaced.
|
||||
|
||||
Mirrors the directory structure from *project_root* down to
|
||||
*module_path* using symlinks for siblings at each level.
|
||||
Only the target module file contains *candidate_code*;
|
||||
everything else is a symlink to the original.
|
||||
|
||||
Returns the overlay root (to use as subprocess ``cwd``).
|
||||
The caller must clean up via :func:`cleanup_overlay`.
|
||||
"""
|
||||
overlay = Path(
|
||||
tempfile.mkdtemp(prefix="codeflash_eval_"),
|
||||
)
|
||||
|
||||
try:
|
||||
rel = module_path.relative_to(project_root)
|
||||
except ValueError:
|
||||
shutil.rmtree(overlay)
|
||||
raise
|
||||
|
||||
parts = rel.parts
|
||||
current_orig = project_root
|
||||
current_over = overlay
|
||||
|
||||
for i, part in enumerate(parts):
|
||||
is_last = i == len(parts) - 1
|
||||
|
||||
if is_last:
|
||||
(current_over / part).write_text(
|
||||
candidate_code,
|
||||
encoding="utf-8",
|
||||
)
|
||||
for item in current_orig.iterdir():
|
||||
if item.name == part:
|
||||
continue
|
||||
link = current_over / item.name
|
||||
if not link.exists() and not link.is_symlink():
|
||||
link.symlink_to(item)
|
||||
else:
|
||||
next_orig = current_orig / part
|
||||
next_over = current_over / part
|
||||
next_over.mkdir()
|
||||
|
||||
for item in current_orig.iterdir():
|
||||
if item.name == part:
|
||||
continue
|
||||
link = current_over / item.name
|
||||
if not link.exists() and not link.is_symlink():
|
||||
link.symlink_to(item)
|
||||
|
||||
current_orig = next_orig
|
||||
current_over = next_over
|
||||
|
||||
return overlay
|
||||
|
||||
|
||||
def cleanup_overlay(overlay: Path) -> None:
|
||||
"""Remove an overlay created by :func:`create_project_overlay`."""
|
||||
shutil.rmtree(overlay, ignore_errors=True)
|
||||
167
packages/codeflash-python/tests/test_eval_worktree.py
Normal file
167
packages/codeflash-python/tests/test_eval_worktree.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Tests for project overlay infrastructure."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from codeflash_python.pipeline._eval_worktree import (
|
||||
cleanup_overlay,
|
||||
create_project_overlay,
|
||||
)
|
||||
|
||||
|
||||
def _make_project(tmp_path: Path) -> tuple[Path, Path]:
|
||||
"""Build a minimal src-layout project.
|
||||
|
||||
Returns (project_root, module_path).
|
||||
"""
|
||||
root = tmp_path / "project"
|
||||
root.mkdir()
|
||||
(root / "pyproject.toml").write_text("[project]\nname='demo'\n")
|
||||
src = root / "src"
|
||||
pkg = src / "mypkg"
|
||||
pkg.mkdir(parents=True)
|
||||
(pkg / "__init__.py").write_text("# init\n")
|
||||
(pkg / "util.py").write_text("def helper(): ...\n")
|
||||
mod = pkg / "core.py"
|
||||
mod.write_text("def original(): ...\n")
|
||||
return root, mod
|
||||
|
||||
|
||||
class TestCreateProjectOverlay:
|
||||
"""create_project_overlay directory structure."""
|
||||
|
||||
def test_overlay_contains_candidate_code(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
"""The target module file has the candidate code."""
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(mod, root, "def fast(): ...")
|
||||
try:
|
||||
target = overlay / "src" / "mypkg" / "core.py"
|
||||
assert target.exists()
|
||||
assert "def fast(): ..." == target.read_text("utf-8")
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
def test_siblings_are_symlinked(self, tmp_path: Path) -> None:
|
||||
"""Other files in the same package are symlinks."""
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(mod, root, "def fast(): ...")
|
||||
try:
|
||||
init_link = overlay / "src" / "mypkg" / "__init__.py"
|
||||
util_link = overlay / "src" / "mypkg" / "util.py"
|
||||
assert init_link.is_symlink()
|
||||
assert util_link.is_symlink()
|
||||
assert init_link.resolve() == (
|
||||
root / "src" / "mypkg" / "__init__.py"
|
||||
)
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
def test_project_root_siblings_are_symlinked(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
"""Files at project root level are symlinked."""
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(mod, root, "code")
|
||||
try:
|
||||
pyproject = overlay / "pyproject.toml"
|
||||
assert pyproject.is_symlink()
|
||||
assert pyproject.resolve() == root / "pyproject.toml"
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
def test_intermediate_dir_siblings_are_symlinked(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
"""Directories on the path to the module are real, not symlinks."""
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(mod, root, "code")
|
||||
try:
|
||||
src_over = overlay / "src"
|
||||
assert src_over.is_dir()
|
||||
assert not src_over.is_symlink()
|
||||
mypkg_over = src_over / "mypkg"
|
||||
assert mypkg_over.is_dir()
|
||||
assert not mypkg_over.is_symlink()
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
def test_target_is_not_symlink(self, tmp_path: Path) -> None:
|
||||
"""The candidate file is a real file, not a symlink."""
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(mod, root, "def fast(): ...")
|
||||
try:
|
||||
assert not (
|
||||
overlay / "src" / "mypkg" / "core.py"
|
||||
).is_symlink()
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
def test_cleanup_removes_overlay(self, tmp_path: Path) -> None:
|
||||
"""cleanup_overlay removes the entire directory."""
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(mod, root, "code")
|
||||
cleanup_overlay(overlay)
|
||||
assert not overlay.exists()
|
||||
|
||||
def test_module_not_under_project_root_raises(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
"""Raises ValueError when module_path is outside project_root."""
|
||||
other = tmp_path / "other" / "mod.py"
|
||||
other.parent.mkdir(parents=True)
|
||||
other.write_text("x = 1\n")
|
||||
project_root = tmp_path / "project"
|
||||
project_root.mkdir()
|
||||
with pytest.raises(ValueError, match="is not in the subpath of"):
|
||||
create_project_overlay(other, project_root, "x = 2")
|
||||
|
||||
def test_flat_layout_module(self, tmp_path: Path) -> None:
|
||||
"""Works for flat-layout projects where module is at root."""
|
||||
root = tmp_path / "project"
|
||||
root.mkdir()
|
||||
mod = root / "mymodule.py"
|
||||
mod.write_text("def old(): ...\n")
|
||||
(root / "setup.py").write_text("# setup\n")
|
||||
|
||||
overlay = create_project_overlay(mod, root, "def new(): ...")
|
||||
try:
|
||||
target = overlay / "mymodule.py"
|
||||
assert target.exists()
|
||||
assert not target.is_symlink()
|
||||
assert "def new(): ..." == target.read_text("utf-8")
|
||||
setup_link = overlay / "setup.py"
|
||||
assert setup_link.is_symlink()
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
|
||||
def test_overlay_usable_as_cwd(self, tmp_path: Path) -> None:
|
||||
"""A subprocess with cwd=overlay can import the candidate."""
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
root, mod = _make_project(tmp_path)
|
||||
overlay = create_project_overlay(
|
||||
mod, root, "VALUE = 42\n"
|
||||
)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-c",
|
||||
"import sys; sys.path.insert(0, 'src'); "
|
||||
"from mypkg.core import VALUE; "
|
||||
"print(VALUE)",
|
||||
],
|
||||
cwd=overlay,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
assert "42" == result.stdout.strip()
|
||||
finally:
|
||||
cleanup_overlay(overlay)
|
||||
Loading…
Reference in a new issue