codeflash-internal/experiments/optimization-factory/scripts/run_optimization.sh

#!/bin/bash
set -e
set -u
# Enable pipefail when supported (works under bash; safely ignored under sh)
if (set -o 2>/dev/null | grep -q 'pipefail') 2>/dev/null; then
  set -o pipefail
fi

echo "--- Starting Codeflash Optimization ---"

# Helper to record stage transitions for BE tracking
_stage() {
  local name="$1"; shift || true
  local extra="$*"
  if [ -n "${STAGE_FILE:-}" ]; then
    printf '{"ts":"%s","stage":"%s"%s}\n' "$(date -Is)" "$name" "${extra:+,$extra}" >> "$STAGE_FILE" 2>/dev/null || true
  fi
}

_stage "start"

# Ensure we always record final exit and persist EXIT_FILE if not already set
trap '_rc=$?; _stage "runner_exit" "\"rc\":$_rc"; if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then echo "$_rc" > "${EXIT_FILE}" 2>/dev/null || true; fi; exit $_rc' EXIT

if [ -z "${GITHUB_TOKEN:-}" ]; then echo "GITHUB_TOKEN is required"; exit 1; fi
if [ -z "${CODEFLASH_API_KEY:-}" ]; then echo "CODEFLASH_API_KEY is required"; exit 1; fi
if [ -z "${GITHUB_REPO_URL:-}" ]; then echo "GITHUB_REPO_URL is required"; exit 1; fi

# Prefer LLM-provided overrides if present; fall back to CSV/env; then to auto
MODULE_ROOT_VALUE="${LLM_MODULE_ROOT:-${MODULE_ROOT:-auto}}"
TESTS_ROOT_VALUE="${LLM_TESTS_ROOT:-${TESTS_ROOT:-auto}}"
PYTEST_CMD_VALUE="${LLM_PYTEST_CMD:-${PYTEST_CMD:-pytest}}"
FORMATTER_CMDS_VALUE="${LLM_FORMATTER_CMDS:-${FORMATTER_CMDS:-[\"disabled\"]}}"

# Normalize pytest command: drop leading 'poetry run '
LOWER_PYTEST=$(echo "$PYTEST_CMD_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ "$LOWER_PYTEST" == poetry\ run* ]]; then
  PYTEST_CMD_VALUE="$(echo "$PYTEST_CMD_VALUE" | sed 's/^poetry[[:space:]]\+run[[:space:]]\+//')"
fi

# Normalize formatter cmds to Codeflash-per-file style per docs
# See https://docs.codeflash.ai/configuration
FORMATTER_CMDS_NORM="$FORMATTER_CMDS_VALUE"
LOWER_FMT=$(echo "$FORMATTER_CMDS_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ -z "$LOWER_FMT" || "$LOWER_FMT" == "[]" || "$LOWER_FMT" == "[\"disabled\"]" ]]; then
  FORMATTER_CMDS_NORM='["disabled"]'
elif [[ "$LOWER_FMT" == *"ruff"* ]]; then
  FORMATTER_CMDS_NORM='["ruff check --exit-zero --fix $file","ruff format $file"]'
elif [[ "$LOWER_FMT" == *"black"* ]]; then
  FORMATTER_CMDS_NORM='["black $file"]'
fi

# Summary of analyzed/exported config (no secrets)
echo "=== Configuration Summary (analyzer + effective) ==="
echo "Repo URL: ${GITHUB_REPO_URL}"
echo "CSV/ENV defaults: MODULE_ROOT='${MODULE_ROOT:-}', TESTS_ROOT='${TESTS_ROOT:-}', PYTEST_CMD='${PYTEST_CMD:-}'"
echo "Analyzer: LLM_MODULE_ROOT='${LLM_MODULE_ROOT:-}', LLM_TESTS_ROOT='${LLM_TESTS_ROOT:-}', LLM_PYTEST_CMD='${LLM_PYTEST_CMD:-}'"
echo "Analyzer: LLM_FORMATTER_CMDS='${LLM_FORMATTER_CMDS:-}', LLM_PIP_PACKAGES='${LLM_PIP_PACKAGES:-}'"
echo "Derived: MODULE_ROOT_VALUE='${MODULE_ROOT_VALUE}', TESTS_ROOT_VALUE='${TESTS_ROOT_VALUE}', PYTEST_CMD_VALUE='${PYTEST_CMD_VALUE}'"
echo "Derived: FORMATTER_CMDS_NORM=${FORMATTER_CMDS_NORM}"
echo "=== End Configuration Summary ==="

# Derive test framework for Codeflash config from the test command
TEST_FRAMEWORK_VALUE="pytest"
LOWER_CMD=$(echo "${PYTEST_CMD_VALUE}" | tr '[:upper:]' '[:lower:]')
if [[ "${LOWER_CMD}" =~ (^|[[:space:]])pytest([[:space:]]|$) ]]; then
  TEST_FRAMEWORK_VALUE="pytest"
elif [[ "${LOWER_CMD}" == *"unittest"* ]]; then
  TEST_FRAMEWORK_VALUE="unittest"
elif [[ "${LOWER_CMD}" == *"nose"* ]] || [[ "${LOWER_CMD}" == *"nosetests"* ]]; then
  TEST_FRAMEWORK_VALUE="nose"
fi

_stage "auth_gh_start"
echo "Authenticating gh..."
if gh auth status -h github.com >/dev/null 2>&1; then
  echo "gh auth status OK"
else
  echo "Using GITHUB_TOKEN from environment for gh commands"
fi

_stage "fork_repo_start"
echo "Forking repository if needed..."
gh repo fork "${GITHUB_REPO_URL}" --clone=false --remote=false || true

REPO_PATH=$(echo "${GITHUB_REPO_URL}" | sed 's#https://github.com/##')
FORK_OWNER=$(gh api user --jq .login)
FORK_REPO="${FORK_OWNER}/$(basename "${REPO_PATH}")"

_stage "clone_start" "\"repo\":\"${FORK_REPO}\""
echo "Cloning fork ${FORK_REPO}..."
# Working directory (must be writable by current user)
WORK_DIR="${WORK_DIR:-/home/ubuntu/work}"
rm -rf "$WORK_DIR" || true
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"

# Retry clone with exponential backoff for GitHub service issues
for attempt in 1 2 3; do
  echo "Clone attempt $attempt/3..."
  if gh repo clone "${FORK_REPO}" repo; then
    echo "Clone successful"
    break
  else
    if [ $attempt -lt 3 ]; then
      echo "Clone failed, retrying in $((attempt * 10)) seconds..."
      sleep $((attempt * 10))
    else
      echo "Clone failed after 3 attempts, continuing with original repo..."
      # Fallback to original repo if fork clone fails
      gh repo clone "${GITHUB_REPO_URL}" repo || {
        echo "Failed to clone both fork and original repo"
        exit 1
      }
    fi
  fi
done
cd repo

git remote add upstream "${GITHUB_REPO_URL}" || true
git fetch --all || true

if [ "${MODULE_ROOT_VALUE}" = "auto" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
  echo "Detecting module/tests roots..."
  PY_CMD=$(command -v python3 || command -v python || echo "")
  if [ -z "$PY_CMD" ]; then echo "No Python interpreter found for detection"; else $PY_CMD /app/scripts/detect_roots.py > roots.json || true; fi
  if [ -f roots.json ]; then
    DETECTED_MODULE=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("module_root",""))' || echo "")
    DETECTED_TESTS=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("tests_root",""))' || echo "")
    if [ "${MODULE_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_MODULE}" ]; then MODULE_ROOT_VALUE="${DETECTED_MODULE}"; fi
    if [ "${TESTS_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_TESTS}" ]; then TESTS_ROOT_VALUE="${DETECTED_TESTS}"; fi
  fi
fi

if [ -z "${MODULE_ROOT_VALUE}" ] || [ "${MODULE_ROOT_VALUE}" = "auto" ]; then
  echo "Failed to detect module-root; please set MODULE_ROOT env."; exit 2
fi

if [ -z "${TESTS_ROOT_VALUE}" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
  echo "No tests-root detected; tracing will be skipped."
fi

_stage "write_codeflash_config"
echo "Writing pyproject.toml..."
cat > pyproject.toml <<EOF
[tool.codeflash]
module-root = "${MODULE_ROOT_VALUE}"
tests-root = "${TESTS_ROOT_VALUE}"
test-framework = "${TEST_FRAMEWORK_VALUE}"
formatter-cmds = ${FORMATTER_CMDS_NORM}
disable-telemetry = false
EOF

# Also write a minimal parent pyproject for Sphinx (docs/conf.py may reference ../pyproject.toml)
if [ -d .. ]; then
  echo "Writing parent pyproject.toml for docs..."
  cat > ../pyproject.toml <<EOF
[project]
name = "autogenerated-project"
version = "0.0.0"
description = "Autogenerated to satisfy Sphinx config during CI"
authors = [{name = "Auto-generated", email = "noreply@example.com"}]
EOF
fi

if [ -n "${VENV_PATH:-}" ] && [ -d "${VENV_PATH}" ]; then
  echo "Using pre-created venv at ${VENV_PATH}"
  # shellcheck disable=SC1090
  source "${VENV_PATH}/bin/activate"
else
  _stage "venv_setup"
  echo "Setting up Python venv..."
  PY_CMD=$(command -v python3 || command -v python || echo "")
  if [ -z "$PY_CMD" ]; then echo "No Python interpreter found"; exit 1; fi
  "$PY_CMD" -m venv .venv
  # shellcheck disable=SC1091
  source .venv/bin/activate
  pip install --upgrade pip >/dev/null 2>&1 || true
  _stage "install_codeflash"
  echo "Installing codeflash CLI..."
  pip install --upgrade codeflash || pip install codeflash || true
fi

# Ensure 'python3' resolves to the venv interpreter (some venvs only expose 'python')
if ! command -v python3 >/dev/null 2>&1 && command -v python >/dev/null 2>&1; then
  ln -sf "$(command -v python)" "$(dirname "$(command -v python)")/python3" || true
fi

# Make local repo importable first, then utils/ for helper modules like testutils
export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
if [ -d "$PWD/utils" ]; then
  export PYTHONPATH="$PWD/utils:$PYTHONPATH"
fi

_stage "pre_test_setup"
# If coverage flags are present in test command, ensure pytest-cov is installed before any test run
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
  echo "Detected coverage flags in test command; installing pytest-cov..."
  pip install pytest-cov || true
fi

# If reruns flags are present in test command, ensure pytest-rerunfailures is installed
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --reruns"; then
  echo "Detected reruns flags in test command; installing pytest-rerunfailures..."
  pip install pytest-rerunfailures || true
fi

# Run install commands in the project directory (inside venv)
if [ -n "${PRE_INSTALL_CMDS:-}" ]; then
  echo "Running pre-install commands: ${PRE_INSTALL_CMDS}"
  bash -lc "${PRE_INSTALL_CMDS}" || echo "Pre-install commands failed, continuing..."
fi
_stage "project_install_start"
if [ -n "${INSTALL_CMDS:-}" ]; then
  echo "Running install commands: ${INSTALL_CMDS}"
  if bash -lc "${INSTALL_CMDS}"; then
    echo "Install commands completed successfully"
  else
    echo "Install commands failed (exit code: $?), continuing..."
    # For repositories with custom install scripts that may fail due to
    # non-standard configurations, we continue and rely on pip install fallbacks
  fi
_stage "project_install_end"
fi
if [ -n "${POST_INSTALL_CMDS:-}" ]; then
  echo "Running post-install commands: ${POST_INSTALL_CMDS}"
  bash -lc "${POST_INSTALL_CMDS}" || echo "Post-install commands failed, continuing..."
fi

# Normalize test command for use in two contexts:
# 1) Execution (must use venv's Python)
# 2) Codeflash tracing with -m (must be a Python module, not 'python3 <script>')
PYTEST_CMD_RUN="${PYTEST_CMD_VALUE}"
if [[ "${PYTEST_CMD_RUN}" == python3\ * ]]; then PYTEST_CMD_RUN="python ${PYTEST_CMD_RUN#python3 }"; fi

# Debug: Show the original and normalized test commands
echo "Debug: Original PYTEST_CMD_VALUE: '${PYTEST_CMD_VALUE}'"
echo "Debug: Normalized PYTEST_CMD_RUN: '${PYTEST_CMD_RUN}'"
echo "Debug: TESTS_ROOT_VALUE: '${TESTS_ROOT_VALUE}'"

# Helper: detect if command looks like invoking pytest directly
_is_pytest_runner() {
  case "$1" in
    pytest\ *|pytest) return 0 ;;
    python\ -m\ pytest*) return 0 ;;
    python3\ -m\ pytest*) return 0 ;;
    py.test\ *|py.test) return 0 ;;
    *) return 1 ;;
  esac
}

TRACE_CMD="${PYTEST_CMD_VALUE}"
# Convert interpreter-prefix forms to module forms
if [[ "${TRACE_CMD}" == python3\ -m\ * ]]; then
  TRACE_CMD="${TRACE_CMD#python3 -m }"
  # For pytest commands, extract just the pytest part and handle args separately
  if [[ "${TRACE_CMD}" == pytest\ * ]]; then
    TRACE_CMD="pytest"
  fi
fi
if [[ "${TRACE_CMD}" == python\ -m\ * ]]; then
  TRACE_CMD="${TRACE_CMD#python -m }"
  # For pytest commands, extract just the pytest part and handle args separately
  if [[ "${TRACE_CMD}" == pytest\ * ]]; then
    TRACE_CMD="pytest"
  fi
fi
if [[ "${TRACE_CMD}" == python3\ ./*.py* ]]; then
  SCRIPT_PATH="${TRACE_CMD#python3 }"
  SCRIPT_FILE="${SCRIPT_PATH%% *}"
  REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
  MOD="${SCRIPT_FILE#./}"
  MOD="${MOD%.py}"
  MOD="${MOD//\//.}"
  TRACE_CMD="${MOD}${REST}"
elif [[ "${TRACE_CMD}" == python\ ./*.py* ]]; then
  SCRIPT_PATH="${TRACE_CMD#python }"
  SCRIPT_FILE="${SCRIPT_PATH%% *}"
  REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
  MOD="${SCRIPT_FILE#./}"
  MOD="${MOD%.py}"
  MOD="${MOD//\//.}"
  TRACE_CMD="${MOD}${REST}"
fi

# Debug: Show the trace command after processing
echo "Debug: TRACE_CMD for codeflash: '${TRACE_CMD}'"

echo "Installing project dependencies (best-effort)..."

# 1) Install repo requirements first to pin base versions
if [ -f requirements.txt ]; then pip install -r requirements.txt || true; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt || true; fi
if [ -d requirements ]; then
  for f in requirements/*.txt; do
    [ -f "$f" ] && pip install -r "$f" || true
  done
fi

# 2) Only attempt editable install if packaging metadata likely exists
if [ -f pyproject.toml ] || [ -f setup.py ] || [ -f setup.cfg ]; then
  EDITABLE_OK=0
  if [ -f pyproject.toml ] && grep -qiE "^\s*\[tool\.poetry\]|^\s*\[project\]" pyproject.toml; then
    EDITABLE_OK=1
  fi
  if [ -f setup.py ]; then
    EDITABLE_OK=1
  fi
  if [ -f setup.cfg ] && grep -qiE "^\s*packages\s*=|^\s*package_dir\s*=|^\s*install_requires\s*=" setup.cfg; then
    EDITABLE_OK=1
  fi
  if [ "$EDITABLE_OK" -eq 1 ]; then
    echo "Attempting editable install (pip install -e .)..."
    if pip install -e .; then
      for extra in dev test tests ci all; do
        pip install -e ".[${extra}]" || true
      done
    else
      echo "Editable install failed; skipping editable extras and continuing without -e ."
    fi
  else
    echo "Packaging metadata not sufficient; skipping editable install."
  fi
fi

# 3) Freeze constraints and then install LLM-specified packages under constraints
if [ -n "${LLM_PIP_PACKAGES:-}" ] && [ "${LLM_PIP_PACKAGES}" != "[]" ]; then
  echo "Freezing constraints before LLM package install..."
  pip freeze > .cf_constraints.txt || true
  echo "Installing LLM-suggested Python packages under constraints: ${LLM_PIP_PACKAGES}"
  python - <<'PY'
import os, json, subprocess, sys
pkgs = []
try:
    raw = os.environ.get('LLM_PIP_PACKAGES','[]')
    pkgs = json.loads(raw)
    if not isinstance(pkgs, list):
        pkgs = []
except Exception:
    pkgs = []
specs = []
for p in pkgs:
    if isinstance(p, str) and p.strip():
        specs.append(p.strip())
    elif isinstance(p, dict) and p.get('name'):
        name = str(p['name']).strip()
        spec = str(p.get('version_spec') or '').strip()
        if name:
            specs.append(name + (spec if spec else ''))
if specs:
    cmd = [sys.executable, '-m', 'pip', 'install', '--disable-pip-version-check', '-c', '.cf_constraints.txt'] + specs
    try:
        subprocess.run(cmd, check=False)
    except Exception:
        pass
PY
fi
# Ensure formatters/tools exist if referenced (check both normalized and original values)
if [[ "$FORMATTER_CMDS_NORM" == *"black "* ]] || [[ "${LOWER_FMT}" == *"black"* ]]; then
  pip install black || true
fi
if [[ "$FORMATTER_CMDS_NORM" == *"ruff "* ]] || [[ "${LOWER_FMT}" == *"ruff"* ]]; then
  pip install ruff || true
fi

# Install anthropic if key is present to enable Claude Code CLI
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
  pip install --upgrade anthropic || true
fi

python -c "import pytest" 2>/dev/null || pip install pytest || true

# Optional: preflight test run to detect missing modules
if [ -d "${TESTS_ROOT_VALUE}" ]; then
  echo "Preflight test run to detect missing modules..."
  set +e
  if [ -d "${TESTS_ROOT_VALUE}" ]; then
    if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
      if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
        eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight.out 2>&1
      else
        eval "${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/" >/tmp/preflight.out 2>&1
      fi
    else
      # Non-pytest runner; avoid appending tests path that may be unsupported
      eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight.out 2>&1
    fi
  else
    eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight.out 2>&1
  fi
  PRE_RC=$?
  set -e
  # Detect and register unknown pytest marks to avoid collection errors under -Werror
  # Why this exists:
  # - Some repositories use custom pytest markers (e.g., `@pytest.mark.download`, `@pytest.mark.slow`)
  #   but forget to register them in their config (pyproject.toml/setup.cfg/pytest.ini).
  # - With `-Werror` or strict settings, pytest turns the UnknownMark warning into an error during
  #   collection, causing the test run to fail before even starting.
  #
  # What we do:
  # 1) We parse the preflight test output (`/tmp/preflight.out`) for lines like:
  #      "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
  # 2) We extract the marker name (e.g., `download`) using `sed` with a capturing group, then de-duplicate
  #    with `sort -u`.
  # 3) If any unknown markers are found, we append a minimal `conftest.py` shim at repo root that registers
  #    each discovered marker via `config.addinivalue_line("markers", ...)`. This is the official mechanism
  #    to declare custom markers so pytest accepts them.
  #
  # Example:
  #   If preflight output contains multiple instances of:
  #     "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
  #   then MARKS will contain `download`, and this block will append a conftest.py snippet like:
  #     def pytest_configure(config):
  #         config.addinivalue_line("markers", "download: auto-registered marker")
  #   After that, subsequent pytest runs will collect tests without failing on the unknown mark.
  #
  # Notes:
  # - We only add to conftest.py; we do NOT overwrite existing content, keeping it non-destructive.
  # - If no unknown markers are detected, nothing is changed.
  # - This does not alter test behavior; it simply declares markers so pytest won’t error on them.
  if [ -s /tmp/preflight.out ]; then
    MARKS=$(sed -n "s/.*Unknown pytest\.mark\.\([A-Za-z0-9_][A-Za-z0-9_]*\).*/\1/p" /tmp/preflight.out | sort -u)
    if [ -n "${MARKS}" ]; then
      echo "Detected unknown pytest marks: ${MARKS}" | tee -a "$TEST_LOG_FILE"
      echo "Auto-registering markers via conftest.py shim..." | tee -a "$TEST_LOG_FILE"
      (
        echo "# Auto-added by optimizer to register pytest markers"
        echo "def pytest_configure(config):"
        # For each discovered unknown marker (e.g., download, slow, integration), write a declaration line.
        # This is equivalent to having `markers = download: ...` in pytest.ini/pyproject.toml.
        for m in ${MARKS}; do
          echo "    config.addinivalue_line(\"markers\", \"${m}: auto-registered marker\")"
        done
      ) >> conftest.py
    fi
  fi
  if [ $PRE_RC -ne 0 ]; then
    echo "Analyzing missing module errors..."
    MISSING=$(sed -n "s/.*ModuleNotFoundError: No module named '\([^']\+\)'.*/\1/p" /tmp/preflight.out | head -20)
    if [ -z "$MISSING" ]; then
      MISSING=$(sed -n "s/.*ImportError: No module named \([^ ]\+\).*/\1/p" /tmp/preflight.out | head -20)
    fi
    if [ -n "$MISSING" ]; then
      echo "Attempting to install missing modules:"
      echo "$MISSING" | while read -r mod; do
        [ -z "$mod" ] && continue
        pkg="$mod"
        case "$pkg" in
          PIL) pkg="Pillow";;
          cv2) pkg="opencv-python";;
          yaml) pkg="PyYAML";;
          skimage) pkg="scikit-image";;
          sklearn) pkg="scikit-learn";;
          Crypto) pkg="pycryptodome";;
        esac
        echo " - pip install $pkg"
        pip install "$pkg" || true
      done
      echo "Re-running preflight tests after installs..."
      set +e
      if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
        eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight2.out 2>&1
      else
        eval "${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/" >/tmp/preflight2.out 2>&1
      fi
      set -e
    fi
  fi
fi

# Full tests before optimization (with detailed logging)
TEST_LOG_DIR="${TEST_LOG_DIR:-/home/ubuntu/app/logs}"
mkdir -p "$TEST_LOG_DIR"
TS2=$(date -Is | sed 's/[:+]/-/g')
TEST_LOG_FILE="$TEST_LOG_DIR/tests-$TS2.log"
touch "$TEST_LOG_FILE" && chmod 666 "$TEST_LOG_FILE"
ln -sfn "$TEST_LOG_FILE" "$TEST_LOG_DIR/tests.log" || true

_stage "pre_tests_start"
echo "Running pre-optimization tests: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
set +e
if [ -d "${TESTS_ROOT_VALUE}" ]; then
  if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
    # Check if the command already includes the tests directory
    if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
      # Command already includes tests directory, execute as-is
      echo "Debug: Executing command as-is (already includes tests dir): ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
      eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
    else
      # Command doesn't include tests directory, append it
      echo "Debug: Appending tests directory: ${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
      eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
    fi
  else
    # Non-pytest runner, execute as-is
    echo "Debug: Non-pytest runner, executing as-is: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
    eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
  fi
else
  # No tests directory, execute as-is
  echo "Debug: No tests directory, executing as-is: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
  eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
# Dynamic fallback for non-pytest runners emitting argparse errors
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
  echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
  if [ -d "${TESTS_ROOT_VALUE}" ]; then
    pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
  else
    pytest -q | tee -a "$TEST_LOG_FILE"
  fi
  TEST_RC=${PIPESTATUS[0]}
fi
set -e
echo "Pre-optimization tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
_stage "pre_tests_end" "\"rc\":$TEST_RC"
# Persist exit code early if wrapper provided EXIT_FILE
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi

# If tests below threshold, run Claude Code CLI setup loop
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
  echo "Evaluating test pass ratio for Claude Code CLI setup gate..." | tee -a "$TEST_LOG_FILE"
  # Attempt to extract passed/failed/errors from log tail (robust to order)
  PASSED=$(sed -n "s/.* \([0-9]\+\) passed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
  FAILED=$(sed -n "s/.* \([0-9]\+\) failed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
  ERRORS=$(sed -n "s/.* \([0-9]\+\) errors.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
  PASSED=${PASSED:-0}
  FAILED=${FAILED:-0}
  ERRORS=${ERRORS:-0}
  TOTAL=$((PASSED + FAILED + ERRORS))
  RATIO=0
  if [ "$TOTAL" -gt 0 ]; then
    RATIO=$(( 100 * PASSED / TOTAL ))
  fi
  echo "Parsed test summary: passed=$PASSED failed=$FAILED errors=$ERRORS ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
  if [ "$TOTAL" -eq 0 ] || [ "$RATIO" -lt 50 ]; then
         echo "Tests below threshold; invoking Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE"
         _stage "claude_round0_start"

     # ============================================================================
     # CLAUDE CODE CLI SETUP INTEGRATION
     # ============================================================================
     # This section replaces the previous Python-based LLM setup helper with
     # Claude Code CLI, which provides a more robust and interactive approach to
     # repository setup. Claude Code CLI can:
     #
     # 1. Analyze repository structure and dependencies
     # 2. Install missing packages and fix import issues
     # 3. Handle custom test runners and build systems
     # 4. Iteratively debug and resolve setup problems
     # 5. Work directly in the terminal with full context
     #
     # The integration includes:
     # - Comprehensive setup prompts with project context
     # - Automatic CLI installation (npm or pip fallback)
     # - Timeout protection (30 min initial, 20 min additional rounds)
     # - Detailed logging of all Claude actions
     # - Graceful fallback if Claude Code CLI is unavailable
     # ============================================================================

     # Create comprehensive prompt for Claude Code CLI
    cat > /tmp/claude_setup_prompt.md << 'EOF'
# Repository Setup Assistant

You are an expert Python developer tasked with setting up a repository for testing. Your goal is to analyze the repository, install missing dependencies, and ensure tests can run successfully.

## Your Mission
1. **Analyze the repository structure** to understand the project layout
2. **Identify and install missing dependencies** that are causing test failures
3. **Fix common setup issues** like import path problems, missing packages, or configuration issues
4. **Run tests** to verify the setup is working
5. **Achieve at least 50% test pass rate** (you don't need 100% - some tests may legitimately fail)

## Available Tools
- You have full access to the terminal in the repository directory
- Python virtual environment is already activated at `.venv/`
- Use `.venv/bin/python` and `.venv/bin/pip` for Python operations
- You can read any file in the repository
- You can install packages using pip

## Key Guidelines
- **Focus on missing dependencies**: Look for ImportError, ModuleNotFoundError in test outputs
- **Use project's own install scripts** when available (like `devscripts/install_deps.py`, `setup.py`, etc.)
- **Install from requirements files** if they exist (`requirements.txt`, `requirements-dev.txt`, etc.)
- **Check pyproject.toml** for project dependencies and optional dependencies
- **Handle custom test runners**: Some projects use custom test scripts instead of pytest
- **Fix import path issues**: Add PYTHONPATH exports if needed
- **Install test-specific dependencies**: pytest plugins, coverage tools, etc.

## Common Patterns to Handle
1. **Custom dependency installers**: `python devscripts/install_deps.py`, `pip install -e .`
2. **Test runners with special args**: Projects may have `run_tests.py` or similar
3. **Missing test dependencies**: pytest plugins, mock libraries, etc.
4. **Path issues**: Repository modules not in PYTHONPATH
5. **Optional dependencies**: Install extras like `pip install -e .[test]`

## Success Criteria
- Tests run without ImportError/ModuleNotFoundError
- At least 50% of tests pass (some failures are acceptable)
- No critical setup errors that prevent test execution

## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions

## Current Context
- Repository: {REPO_URL}
- Tests directory: {TESTS_ROOT}
- Test command: {PYTEST_CMD}
- Previous test output shows dependency/setup issues

## Recent Test Errors (Summary)
{TEST_ERRORS_SUMMARY}

Start by examining the repository structure and recent test failures, then systematically address the issues.
EOF

    # Replace placeholders in prompt
    sed -i "s|{REPO_URL}|${GITHUB_REPO_URL:-unknown}|g" /tmp/claude_setup_prompt.md
    sed -i "s|{TESTS_ROOT}|${TESTS_ROOT_VALUE:-test}|g" /tmp/claude_setup_prompt.md
    sed -i "s|{PYTEST_CMD}|${PYTEST_CMD_VALUE:-pytest}|g" /tmp/claude_setup_prompt.md
    # Build a short error summary from the current test log (first 30 error lines)
    TEST_ERRORS_SUMMARY=$(sed -n '1,400p' "$TEST_LOG_FILE" | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30 | sed 's/|/\|/g' | sed 's/\\/\\\\/g')
    TEST_ERRORS_SUMMARY=${TEST_ERRORS_SUMMARY:-"No error summary available"}
    # Escape newlines for sed replacement
    TEST_ERRORS_SUMMARY=$(printf "%s" "$TEST_ERRORS_SUMMARY" | sed ':a;N;$!ba;s/\n/\\n/g')
    sed -i "s|{TEST_ERRORS_SUMMARY}|${TEST_ERRORS_SUMMARY}|g" /tmp/claude_setup_prompt.md

    # Snapshot current environment into constraints to avoid breaking pinned deps
    pip freeze > .cf_constraints.txt || true
    export PIP_CONSTRAINTS="$(pwd)/.cf_constraints.txt"

  # If common system deps are missing based on errors, try lightweight installs (best-effort)
  if grep -q "libGL.so.1" "$TEST_LOG_FILE" 2>/dev/null; then
    echo "Detected missing libGL.so.1; installing headless OpenGL libs (libgl1, libglib2.0-0, libsm6, libxrender1, libxext6)..." | tee -a "$TEST_LOG_FILE"
    sudo apt-get update -y >/dev/null 2>&1 || true
    sudo apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 || true
    # As a fallback in headless environments, prefer opencv-python-headless to avoid GUI backends
    if pip show opencv-python >/dev/null 2>&1; then
      echo "Installing opencv-python-headless as fallback for headless environment" | tee -a "$TEST_LOG_FILE"
      pip install --upgrade opencv-python-headless || true
    fi
  fi
  if grep -q "No module named 'tkinter'" "$TEST_LOG_FILE" 2>/dev/null; then
    echo "Detected missing tkinter; installing Python Tk..." | tee -a "$TEST_LOG_FILE"
    sudo apt-get update -y >/dev/null 2>&1 || true
    sudo apt-get install -y --no-install-recommends python3-tk || true
  fi
  if grep -q "cannot import name 'Aer' from 'qiskit'" "$TEST_LOG_FILE" 2>/dev/null; then
    echo "Detected missing qiskit-aer; installing..." | tee -a "$TEST_LOG_FILE"
    pip install qiskit-aer || true
  fi

    # Run Claude Code CLI with the setup prompt
    echo "Starting Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
    _stage "claude_setup_init"

    # Prepare a session log regardless of availability so FE always finds a file
    CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-$(date -u +%Y-%m-%dT%H-%M-%S).log"
    touch "$CLAUDE_LOG" 2>/dev/null || true
    chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
    echo "[claude] initializing setup session" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

    # Ensure common local bin directory is in PATH (curl installer often writes here)
    export PATH="$HOME/.local/bin:$PATH"

    # Check if claude (Claude Code CLI) is available, or fallback to npx runner
    CLAUDE_CMD=""
    echo "Checking for Claude Code CLI availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

    # Check for global claude command first
    if command -v claude >/dev/null 2>&1; then
      CLAUDE_CMD="claude"
      echo "Found global claude CLI at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      # Test if it's actually Claude Code CLI
      if timeout 10 claude --version 2>&1 | grep -q "Claude Code"; then
        echo "Confirmed: Global claude is Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "Warning: Global claude may not be Claude Code CLI, will try npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        CLAUDE_CMD=""
      fi
    fi

    # Check for npx if claude not found or not confirmed
    if [ -z "$CLAUDE_CMD" ] && command -v npx >/dev/null 2>&1; then
      CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
      echo "Will use npx runner for Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      # Test npx availability
      if timeout 10 npx --version >/dev/null 2>&1; then
        echo "npx is available and working" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        CLAUDE_CMD=""
      fi
    fi

    if [ -z "$CLAUDE_CMD" ]; then
      echo "Claude Code CLI not found; attempting installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Try npm global install first (with proper permissions)
      if command -v npm >/dev/null 2>&1; then
        echo "Installing @anthropic-ai/claude-code via npm..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Try with sudo first (for system-wide install)
        if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "npm install with sudo succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          export PATH="$HOME/.local/bin:$PATH"
          if command -v claude >/dev/null 2>&1; then
            CLAUDE_CMD="claude"
            echo "Found claude CLI after npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        else
          echo "npm install with sudo failed, trying user-level install..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Try user-level install (no sudo)
          if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
            echo "npm user-level install succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            export PATH="$HOME/.local/bin:$PATH"
            if command -v claude >/dev/null 2>&1; then
              CLAUDE_CMD="claude"
              echo "Found claude CLI after user-level install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            fi
          else
            echo "npm user-level install also failed, will rely on npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        fi
      fi

      # If still no claude, try installing Node.js
      if [ -z "$CLAUDE_CMD" ] && command -v apt-get >/dev/null 2>&1; then
        echo "Installing Node.js LTS to enable Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        sudo apt-get update -y >/dev/null 2>&1 || true

        # Install Node.js repository
        if command -v curl >/dev/null 2>&1; then
          echo "Adding Node.js repository..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - 2>&1 | tee -a "$CLAUDE_LOG" || true
        fi

        # Install Node.js
        echo "Installing Node.js..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if sudo apt-get install -y nodejs 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Node.js installation succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Try npm install again with proper permission handling
          if command -v npm >/dev/null 2>&1; then
            echo "Retrying npm install after Node.js installation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

            # Try with sudo first
            if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
              echo "npm install with sudo succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
              export PATH="$HOME/.local/bin:$PATH"
              if command -v claude >/dev/null 2>&1; then
                CLAUDE_CMD="claude"
                echo "Found claude CLI after Node.js + npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
              fi
            else
              echo "npm install with sudo failed after Node.js install, trying user-level..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

              # Try user-level install
              if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
                echo "npm user-level install succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
                export PATH="$HOME/.local/bin:$PATH"
                if command -v claude >/dev/null 2>&1; then
                  CLAUDE_CMD="claude"
                  echo "Found claude CLI after Node.js + user-level npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
                fi
              else
                echo "npm user-level install also failed after Node.js install, will use npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
              fi
            fi
          fi
        else
          echo "Node.js installation failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      fi

      # Final check for available commands
      export PATH="$HOME/.local/bin:$PATH"
      if command -v claude >/dev/null 2>&1; then
        CLAUDE_CMD="claude"
        echo "Found claude CLI after installation at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      elif command -v npx >/dev/null 2>&1; then
        CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
        echo "Will use npx runner for Claude Code CLI after installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Test npx availability with a simple command
        echo "Testing npx availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if timeout 30 npx --version 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Test if we can actually run the Claude Code CLI via npx
          echo "Testing Claude Code CLI via npx..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          if timeout 60 npx -y @anthropic-ai/claude-code --version 2>&1 | tee -a "$CLAUDE_LOG"; then
            echo "Claude Code CLI via npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          else
            echo "Warning: Claude Code CLI via npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "This could be due to network issues or package availability" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        else
          echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      fi
    fi

    # Guard: if still unavailable, skip gracefully
    if [ -z "$CLAUDE_CMD" ]; then
      echo "❌ Claude Code CLI unavailable; skipping setup assistance" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "All installation attempts failed:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "  - Global npm install failed (permission issues)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "  - User-level npm install failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "  - npx fallback not available" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Continuing without Claude Code CLI assistance..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      _stage "claude_round0_end" "\"rc\":127,\"unavailable\":true"
    else
      echo "✅ Claude Code CLI is available and ready to use" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Using command: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      # Ensure Claude Code CLI is authenticated (headless)
      if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
        echo "ANTHROPIC_API_KEY not set; Claude CLI may fail to authenticate" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Setting ANTHROPIC_API_KEY environment variable for Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
      else
        echo "ANTHROPIC_API_KEY is set; configuring Claude CLI authentication..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        # Best-effort non-interactive auth via config (ignore failures)
        echo "Attempting to set API key via Claude CLI config..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if timeout 10 $CLAUDE_CMD config set api_key "${ANTHROPIC_API_KEY}" 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Claude CLI API key configuration succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "Claude CLI API key configuration failed, will rely on environment variable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
        # Also set as environment variable as backup
        export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY}"
      fi

      # Run Claude Code CLI with the setup prompt using print mode for automation
      echo "Running Claude Code CLI setup session using: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "=== CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      cat /tmp/claude_setup_prompt.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "=== END CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"

      # Build Claude Code CLI command with proper arguments
      # Note: Claude Code CLI uses different argument structure than expected
      CLAUDE_BASE_ARGS=""

      # Add model if specified
      if [ -n "${ANTHROPIC_MODEL:-}" ]; then
        CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --model ${ANTHROPIC_MODEL}"
      fi

      # Add directory if it exists
      if [ -d "$REPO_DIR" ]; then
        CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --add-dir $REPO_DIR"
      fi

      # Set up different flag combinations for different CLI versions
      CLAUDE_FLAGS_PERM="$CLAUDE_BASE_ARGS --print --max-turns 40 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit"
      CLAUDE_FLAGS_MIN="$CLAUDE_BASE_ARGS --print --max-turns 40"
      CLAUDE_FLAGS_BASIC="$CLAUDE_BASE_ARGS --print"

      echo "Executing: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md" | sed 's/  */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Test Claude CLI is working with a simple command first
      echo "Testing Claude CLI availability (version)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Handle npx differently as it may take longer to download and run
      if [[ "$CLAUDE_CMD" == npx* ]]; then
        echo "Testing npx-based Claude CLI (may take longer for first run)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if timeout 120 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Claude CLI via npx version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "Claude CLI via npx version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "This is common for npx on first run due to package download time" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      else
        if timeout 30 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Claude CLI version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "Claude CLI version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      fi

      # Use timeout to enforce a hard limit; wait synchronously so tests run after it finishes/timeout
      set +e  # Don't exit on failure
      set -o pipefail

      # Try with full permissions first (pipe prompt via stdin to avoid argument parsing issues)
      echo "Attempting Claude CLI execution with elevated permissions (non-interactive)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      CLAUDE_EXIT_CODE=1

      # First attempt: Try with full permissions
      # Use longer timeout for npx as it may need to download packages
      TIMEOUT_DURATION=2700
      if [[ "$CLAUDE_CMD" == npx* ]]; then
        TIMEOUT_DURATION=3600  # 60 minutes for npx (includes download time)
        echo "Using extended timeout (60 min) for npx-based Claude CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      if [ -d "$REPO_DIR" ]; then
        echo "Executing Claude CLI in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        ( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        CLI_STATUS=${PIPESTATUS[0]}
      else
        echo "Executing Claude CLI in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        CLI_STATUS=${PIPESTATUS[0]}
      fi
      CLAUDE_EXIT_CODE=$CLI_STATUS
      echo "Claude CLI attempt 1 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Second attempt: Try with fewer flags if first attempt failed
      if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
        echo "Retrying Claude CLI with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if [ -d "$REPO_DIR" ]; then
          ( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi
        CLAUDE_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI attempt 2 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Third attempt: Try with minimal flags if second attempt failed
      if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
        echo "Retrying Claude CLI with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if [ -d "$REPO_DIR" ]; then
          ( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi
        CLAUDE_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI attempt 3 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      _stage "claude_round0_end" "\"rc\":$CLAUDE_EXIT_CODE"

      set -e
      trap - ERR

      if [ $CLAUDE_EXIT_CODE -eq 0 ]; then
        echo "✅ Claude Code CLI session finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Claude CLI completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      elif [ $CLAUDE_EXIT_CODE -eq 124 ]; then
        TIMEOUT_MINUTES=$((TIMEOUT_DURATION / 60))
        echo "⏰ Claude Code CLI session timed out after $TIMEOUT_DURATION seconds ($TIMEOUT_MINUTES minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "This is normal for complex setup tasks; continuing with post-CLAUDE tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "❌ Claude Code CLI session failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Possible causes:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Continuing with post-CLAUDE tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi
    fi

    # Clean up prompt file
    rm -f /tmp/claude_setup_prompt.md

    _stage "post_llm_tests_start"
    echo "Re-running full tests after Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
    set +e
    if [ -d "${TESTS_ROOT_VALUE}" ]; then
      if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
        if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
          eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
        else
          eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
        fi
      else
        eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
      fi
    else
      eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
    fi
    TEST_RC=${PIPESTATUS[0]}
    # Fallback if run_tests.py argparse error persists
    if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
      echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
      if [ -d "${TESTS_ROOT_VALUE}" ]; then
        pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
      else
        pytest -q | tee -a "$TEST_LOG_FILE"
      fi
      TEST_RC=${PIPESTATUS[0]}
    fi
    set -e
    echo "Post-LLM tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
    _stage "post_llm_tests_end" "\"rc\":$TEST_RC"
    # Persist exit code after post-LLM run
    if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
    # If conftest import path mismatch detected, enable importlib mode for next runs
    if grep -q "ImportPathMismatchError: ('.*conftest'" "$TEST_LOG_FILE"; then
      echo "Detected conftest import path mismatch; enabling --import-mode=importlib for subsequent pytest runs" | tee -a "$TEST_LOG_FILE"
      export PYTEST_ADDOPTS="--import-mode=importlib ${PYTEST_ADDOPTS:-}"
    fi
    # Re-evaluate pass ratio; if still below threshold, run additional setup rounds
    SETUP_MAX_ROUNDS=${LLM_SETUP_MAX_ROUNDS:-2}
    ROUND=0
    while : ; do
      PASSED=$(sed -n "s/.* \([0-9]\+\) passed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
      FAILED=$(sed -n "s/.* \([0-9]\+\) failed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
      ERRORS=$(sed -n "s/.* \([0-9]\+\) errors.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
      PASSED=${PASSED:-0}
      FAILED=${FAILED:-0}
      ERRORS=${ERRORS:-0}
      TOTAL=$((PASSED + FAILED + ERRORS))
      RATIO=0
      if [ "$TOTAL" -gt 0 ]; then
        RATIO=$(( 100 * PASSED / TOTAL ))
      fi
      echo "Post-LLM summary: passed=$PASSED failed=$FAILED errors=$ERRORS ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
      if [ "$TOTAL" -gt 0 ] && [ "$RATIO" -ge 50 ]; then
        break
      fi
      if [ "$ROUND" -ge "$SETUP_MAX_ROUNDS" ]; then
        echo "Tests still below threshold after $ROUND additional rounds. Skipping optimization." | tee -a "$TEST_LOG_FILE"
        exit 4
      fi
      ROUND=$((ROUND + 1))
      _stage "claude_round_start" "\"round\":$ROUND"
      echo "Starting additional Claude Code CLI setup round $ROUND..." | tee -a "$TEST_LOG_FILE"

      # Create focused prompt for additional round
      cat > /tmp/claude_setup_round_${ROUND}.md << EOF
# Repository Setup Assistant - Round $ROUND

You are continuing to fix repository setup issues. Previous attempts have been made but tests are still failing.

## Current Situation
- This is setup round $ROUND of maximum $SETUP_MAX_ROUNDS
- Previous rounds have attempted to fix dependencies and setup issues
- Tests are still below 50% pass rate

## Your Focus This Round
1. **Analyze recent test failures** - look at the latest test output for new clues
2. **Try different approaches** - if pip installs didn't work, try other methods
3. **Check for version conflicts** - some packages might need specific versions
4. **Look for missing system dependencies** - some Python packages need system libs
5. **Consider alternative test commands** - the project might use a different test runner

## Available Information
- Repository: ${GITHUB_REPO_URL:-unknown}
- Tests directory: ${TESTS_ROOT_VALUE:-test}
- Test command: ${PYTEST_CMD_VALUE:-pytest}
- Round: $ROUND/$SETUP_MAX_ROUNDS

## Strategies to Try
- Check if there are alternative dependency installation methods
- Look for version pinning in setup files
- Try installing development/test extras: \`pip install -e .[dev,test]\`
- Check for conda/mamba environment files
- Look for Docker setup if available
- Try running individual test files to isolate issues

## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions

## Recent Test Errors (Summary)
$(sed -n '1,400p' "$TEST_LOG_FILE" | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30)

Focus on getting tests to run successfully, even if not all pass.
EOF

      # Run Claude Code CLI for additional round
      CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-round-${ROUND}-$(date -u +%Y-%m-%dT%H-%M-%S).log"
      touch "$CLAUDE_LOG" 2>/dev/null || true
      chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
      if [ -n "$CLAUDE_CMD" ]; then
        echo "=== CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        cat /tmp/claude_setup_round_${ROUND}.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "=== END CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"

        # Build Claude Code CLI command for additional rounds
        CLAUDE_ROUND_BASE_ARGS=""

        # Add model if specified
        if [ -n "${ANTHROPIC_MODEL:-}" ]; then
          CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --model ${ANTHROPIC_MODEL}"
        fi

        # Add directory if it exists
        if [ -d "$REPO_DIR" ]; then
          CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --add-dir $REPO_DIR"
        fi

        # Set up different flag combinations for additional rounds
        CLAUDE_FLAGS_ROUND_PERM="$CLAUDE_ROUND_BASE_ARGS --print --max-turns 25 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit"
        CLAUDE_FLAGS_ROUND_MIN="$CLAUDE_ROUND_BASE_ARGS --print --max-turns 25"
        CLAUDE_FLAGS_ROUND_BASIC="$CLAUDE_ROUND_BASE_ARGS --print"

        echo "Executing round $ROUND: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md" | sed 's/  */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        set +e  # Don't exit on failure
        set -o pipefail

        # Try with full permissions first
        echo "Attempting Claude CLI round $ROUND with elevated permissions..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        CLAUDE_ROUND_EXIT_CODE=1

        # First attempt: Try with full permissions
        if [ -d "$REPO_DIR" ]; then
          echo "Executing Claude CLI round $ROUND in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          echo "Executing Claude CLI round $ROUND in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi
        CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI round $ROUND attempt 1 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Second attempt: Try with fewer flags if first attempt failed
        if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
          echo "Retrying Claude CLI round $ROUND with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          if [ -d "$REPO_DIR" ]; then
            ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          else
            timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          fi
          CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
          echo "Claude CLI round $ROUND attempt 2 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi

        # Third attempt: Try with minimal flags if second attempt failed
        if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
          echo "Retrying Claude CLI round $ROUND with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          if [ -d "$REPO_DIR" ]; then
            ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          else
            timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          fi
          CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
          echo "Claude CLI round $ROUND attempt 3 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi

        _stage "claude_round_end" "\"round\":$ROUND,\"rc\":$CLAUDE_ROUND_EXIT_CODE"
        set -e

        if [ $CLAUDE_ROUND_EXIT_CODE -eq 0 ]; then
          echo "✅ Claude Code CLI round $ROUND finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Claude CLI round $ROUND completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        elif [ $CLAUDE_ROUND_EXIT_CODE -eq 124 ]; then
          echo "⏰ Claude Code CLI round $ROUND timed out after 1800 seconds (30 minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "This is normal for complex setup tasks; continuing with tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "❌ Claude Code CLI round $ROUND failed with exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Possible causes for round $ROUND:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Continuing with tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      else
        echo "Claude Code CLI not available in round $ROUND; skipping" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Clean up round prompt file
      rm -f /tmp/claude_setup_round_${ROUND}.md
      _stage "round_tests_start" "\"round\":$ROUND"
      echo "Re-running full tests (round $ROUND)..." | tee -a "$TEST_LOG_FILE"
      set +e
      if [ -d "${TESTS_ROOT_VALUE}" ]; then
        if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
          if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
            eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
          else
            eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
          fi
        else
          eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
        fi
      else
        eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
      fi
      TEST_RC=${PIPESTATUS[0]}
      _stage "round_tests_end" "\"round\":$ROUND,\"rc\":$TEST_RC"
      # Fallback if run_tests.py argparse error persists
      if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
        echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
        if [ -d "${TESTS_ROOT_VALUE}" ]; then
          pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
        else
          pytest -q | tee -a "$TEST_LOG_FILE"
        fi
        TEST_RC=${PIPESTATUS[0]}
      fi
      set -e
      # Persist exit code on each round
      if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
    done
  fi
fi

if [ -z "${CF_TARGET_FILE:-}" ]; then
  if [ -d "${TESTS_ROOT_VALUE}" ]; then
    echo "Trace-first: ${TRACE_CMD} ${TESTS_ROOT_VALUE}/"
    # Ensure pytest-cov if coverage flags present
    if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
      pip install pytest-cov || true
    fi
    set +e
    # If TRACE_CMD is pytest, pass the tests-root as args so tracer gets a non-empty split
    if [[ "${TRACE_CMD}" == pytest* ]]; then
      codeflash optimize --trace-only -m pytest -- "${TESTS_ROOT_VALUE}/" || true
    else
    codeflash optimize --trace-only -m "${TRACE_CMD}" || true
    fi
    set -e
  else
    echo "Skipping trace: tests root not found."
  fi
fi

if [ -n "${CF_TARGET_FILE:-}" ]; then
  echo "Running Codeflash single-file: ${CF_TARGET_FILE} ${CF_TARGET_FUNCTION:-}"
  if [ ! -f "${CF_TARGET_FILE}" ]; then
    echo "Target file not found: ${CF_TARGET_FILE}" >&2
    exit 3
  fi
  if [ -n "${CF_TARGET_FUNCTION:-}" ]; then
    codeflash --file "${CF_TARGET_FILE}" --function "${CF_TARGET_FUNCTION}" --verbose
  else
    codeflash --file "${CF_TARGET_FILE}" --verbose
  fi
else
  echo "Running Codeflash --all without staging-review flag..."
  codeflash --all --verbose
fi

# If we reach here normally, ensure EXIT_FILE reflects last known code (0 if unset)
if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then echo "0" > "${EXIT_FILE}" 2>/dev/null || true; fi

echo "--- Finished Codeflash Optimization ---"

# In skip/failure paths earlier we may exit non-zero; ensure EXIT_FILE set there as well
exit 0