Add project overlay infrastructure for isolated candidate evaluation

Introduces symlink-based temporary directories that mirror the project
root, replacing only the target module file with candidate code. This
allows test subprocesses to run against candidate code without mutating
the original source on disk, enabling safe parallel evaluation.
This commit is contained in:
Kevin Turcios 2026-04-21 04:16:34 -05:00
parent 1d48b7792d
commit b455c1e69f
3 changed files with 330 additions and 3 deletions

View file

@ -135,16 +135,21 @@ def run_tests_and_benchmark( # noqa: PLR0913
candidate_bench_results: dict[str, TestResults],
*,
evaluate_async_fn: _EvalAsyncFn | None = None,
cwd: Path | None = None,
) -> float | None:
"""Run behavioral tests and benchmarks for a candidate.
Expects the updated source to already be written to disk.
When *cwd* is given, test subprocesses use that directory
instead of ``ctx.project_root``.
"""
project_cwd = cwd or ctx.project_root
# 3. Behavioral tests.
xml_path, run_result, _, _ = run_behavioral_tests(
test_files=test_files,
test_env=test_env,
cwd=ctx.project_root,
cwd=project_cwd,
pytest_cmd=ctx.test_cfg.pytest_cmd,
)
candidate_results = parse_test_results(
@ -182,13 +187,13 @@ def run_tests_and_benchmark( # noqa: PLR0913
func = fn_input.function
originals = add_async_perf_decorator(
func if func.is_async else None,
ctx.project_root,
project_cwd,
)
try:
bench_xml, bench_result = run_benchmarking_tests(
test_files=test_files,
test_env=test_env,
cwd=ctx.project_root,
cwd=project_cwd,
pytest_cmd=ctx.test_cfg.pytest_cmd,
)
bench_results = parse_test_results(
@ -247,6 +252,78 @@ def run_tests_and_benchmark( # noqa: PLR0913
return speedup
def evaluate_candidate_isolated( # noqa: PLR0913
candidate: Candidate,
fn_input: FunctionInput,
baseline: OriginalCodeBaseline,
eval_ctx: EvaluationContext,
test_files: TestFiles,
test_env: dict[str, str],
ctx: OptimizationContext,
failed_candidate_code: dict[str, str],
failed_candidate_diffs: dict[str, list[Any]],
candidate_bench_results: dict[str, TestResults],
*,
evaluate_async_fn: _EvalAsyncFn | None = None,
) -> float | None:
"""Evaluate a candidate using a project overlay.
Creates an isolated overlay directory so the original source
is never modified. Safe to call from multiple threads.
"""
from ._eval_worktree import ( # noqa: PLC0415
cleanup_overlay,
create_project_overlay,
)
cid = candidate.candidate_id
try:
updated_source = replace_functions_in_file(
source_code=fn_input.source_code,
original_function_names=[
fn_input.function.function_name,
],
optimized_code=candidate.code,
preexisting_objects=set(),
)
except Exception: # noqa: BLE001
log.info(
"Replacement failed for candidate %s",
cid,
exc_info=True,
)
eval_ctx.record_failed(cid)
return None
overlay = create_project_overlay(
fn_input.module_path,
ctx.project_root,
updated_source,
)
try:
result = run_tests_and_benchmark(
cid=cid,
fn_input=fn_input,
baseline=baseline,
eval_ctx=eval_ctx,
test_files=test_files,
test_env=test_env,
ctx=ctx,
failed_candidate_diffs=failed_candidate_diffs,
candidate_bench_results=candidate_bench_results,
evaluate_async_fn=evaluate_async_fn,
cwd=overlay,
)
if result is None:
failed_candidate_code[cid] = candidate.code
else:
eval_ctx.optimizations_post[cid] = candidate.code
return result
finally:
cleanup_overlay(overlay)
def rank_candidates( # noqa: PLR0913
ai_client: AIClient,
function_trace_id: str,

View file

@ -0,0 +1,83 @@
"""Isolated project overlays for parallel candidate evaluation.
Creates lightweight temporary directories that mirror the
project root using symlinks. Only the target module file is a
real file containing candidate code; everything else resolves
through symlinks to the original project.
Using the overlay as the subprocess ``cwd`` ensures the
candidate code is found on ``sys.path[0]`` while all other
imports resolve to the originals.
"""
from __future__ import annotations
import shutil
import tempfile
from pathlib import Path
def create_project_overlay(
module_path: Path,
project_root: Path,
candidate_code: str,
) -> Path:
"""Create a project-root overlay with one module replaced.
Mirrors the directory structure from *project_root* down to
*module_path* using symlinks for siblings at each level.
Only the target module file contains *candidate_code*;
everything else is a symlink to the original.
Returns the overlay root (to use as subprocess ``cwd``).
The caller must clean up via :func:`cleanup_overlay`.
"""
overlay = Path(
tempfile.mkdtemp(prefix="codeflash_eval_"),
)
try:
rel = module_path.relative_to(project_root)
except ValueError:
shutil.rmtree(overlay)
raise
parts = rel.parts
current_orig = project_root
current_over = overlay
for i, part in enumerate(parts):
is_last = i == len(parts) - 1
if is_last:
(current_over / part).write_text(
candidate_code,
encoding="utf-8",
)
for item in current_orig.iterdir():
if item.name == part:
continue
link = current_over / item.name
if not link.exists() and not link.is_symlink():
link.symlink_to(item)
else:
next_orig = current_orig / part
next_over = current_over / part
next_over.mkdir()
for item in current_orig.iterdir():
if item.name == part:
continue
link = current_over / item.name
if not link.exists() and not link.is_symlink():
link.symlink_to(item)
current_orig = next_orig
current_over = next_over
return overlay
def cleanup_overlay(overlay: Path) -> None:
"""Remove an overlay created by :func:`create_project_overlay`."""
shutil.rmtree(overlay, ignore_errors=True)

View file

@ -0,0 +1,167 @@
"""Tests for project overlay infrastructure."""
from __future__ import annotations
from pathlib import Path
import pytest
from codeflash_python.pipeline._eval_worktree import (
cleanup_overlay,
create_project_overlay,
)
def _make_project(tmp_path: Path) -> tuple[Path, Path]:
"""Build a minimal src-layout project.
Returns (project_root, module_path).
"""
root = tmp_path / "project"
root.mkdir()
(root / "pyproject.toml").write_text("[project]\nname='demo'\n")
src = root / "src"
pkg = src / "mypkg"
pkg.mkdir(parents=True)
(pkg / "__init__.py").write_text("# init\n")
(pkg / "util.py").write_text("def helper(): ...\n")
mod = pkg / "core.py"
mod.write_text("def original(): ...\n")
return root, mod
class TestCreateProjectOverlay:
"""create_project_overlay directory structure."""
def test_overlay_contains_candidate_code(
self, tmp_path: Path
) -> None:
"""The target module file has the candidate code."""
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(mod, root, "def fast(): ...")
try:
target = overlay / "src" / "mypkg" / "core.py"
assert target.exists()
assert "def fast(): ..." == target.read_text("utf-8")
finally:
cleanup_overlay(overlay)
def test_siblings_are_symlinked(self, tmp_path: Path) -> None:
"""Other files in the same package are symlinks."""
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(mod, root, "def fast(): ...")
try:
init_link = overlay / "src" / "mypkg" / "__init__.py"
util_link = overlay / "src" / "mypkg" / "util.py"
assert init_link.is_symlink()
assert util_link.is_symlink()
assert init_link.resolve() == (
root / "src" / "mypkg" / "__init__.py"
)
finally:
cleanup_overlay(overlay)
def test_project_root_siblings_are_symlinked(
self, tmp_path: Path
) -> None:
"""Files at project root level are symlinked."""
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(mod, root, "code")
try:
pyproject = overlay / "pyproject.toml"
assert pyproject.is_symlink()
assert pyproject.resolve() == root / "pyproject.toml"
finally:
cleanup_overlay(overlay)
def test_intermediate_dir_siblings_are_symlinked(
self, tmp_path: Path
) -> None:
"""Directories on the path to the module are real, not symlinks."""
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(mod, root, "code")
try:
src_over = overlay / "src"
assert src_over.is_dir()
assert not src_over.is_symlink()
mypkg_over = src_over / "mypkg"
assert mypkg_over.is_dir()
assert not mypkg_over.is_symlink()
finally:
cleanup_overlay(overlay)
def test_target_is_not_symlink(self, tmp_path: Path) -> None:
"""The candidate file is a real file, not a symlink."""
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(mod, root, "def fast(): ...")
try:
assert not (
overlay / "src" / "mypkg" / "core.py"
).is_symlink()
finally:
cleanup_overlay(overlay)
def test_cleanup_removes_overlay(self, tmp_path: Path) -> None:
"""cleanup_overlay removes the entire directory."""
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(mod, root, "code")
cleanup_overlay(overlay)
assert not overlay.exists()
def test_module_not_under_project_root_raises(
self, tmp_path: Path
) -> None:
"""Raises ValueError when module_path is outside project_root."""
other = tmp_path / "other" / "mod.py"
other.parent.mkdir(parents=True)
other.write_text("x = 1\n")
project_root = tmp_path / "project"
project_root.mkdir()
with pytest.raises(ValueError, match="is not in the subpath of"):
create_project_overlay(other, project_root, "x = 2")
def test_flat_layout_module(self, tmp_path: Path) -> None:
"""Works for flat-layout projects where module is at root."""
root = tmp_path / "project"
root.mkdir()
mod = root / "mymodule.py"
mod.write_text("def old(): ...\n")
(root / "setup.py").write_text("# setup\n")
overlay = create_project_overlay(mod, root, "def new(): ...")
try:
target = overlay / "mymodule.py"
assert target.exists()
assert not target.is_symlink()
assert "def new(): ..." == target.read_text("utf-8")
setup_link = overlay / "setup.py"
assert setup_link.is_symlink()
finally:
cleanup_overlay(overlay)
def test_overlay_usable_as_cwd(self, tmp_path: Path) -> None:
"""A subprocess with cwd=overlay can import the candidate."""
import subprocess
import sys
root, mod = _make_project(tmp_path)
overlay = create_project_overlay(
mod, root, "VALUE = 42\n"
)
try:
result = subprocess.run(
[
sys.executable,
"-c",
"import sys; sys.path.insert(0, 'src'); "
"from mypkg.core import VALUE; "
"print(VALUE)",
],
cwd=overlay,
capture_output=True,
text=True,
check=True,
)
assert "42" == result.stdout.strip()
finally:
cleanup_overlay(overlay)