codeflash-internal/experiments/optimization-factory/scripts/run_optimization.sh

#!/bin/bash
set -e
set -u
# Enable pipefail when supported (works under bash; safely ignored under sh)
if (set -o 2>/dev/null | grep -q 'pipefail') 2>/dev/null; then
  set -o pipefail
fi

echo "--- Starting Codeflash Optimization ---"

# Helper to record stage transitions for BE tracking
_stage() {
  local name="$1"; shift || true
  local extra="$*"
  if [ -n "${STAGE_FILE:-}" ]; then
    printf '{"ts":"%s","stage":"%s"%s}\n' "$(date -Is)" "$name" "${extra:+,$extra}" >> "$STAGE_FILE" 2>/dev/null || true
  fi
}

# Helper to safely log environment variables without exposing sensitive values
_safe_log_env() {
  local pattern="$1"
  local log_file="${2:-/dev/stdout}"

  # List of sensitive environment variable patterns to mask
  local sensitive_patterns="(TOKEN|KEY|SECRET|PASSWORD|CREDENTIAL|AUTH|API_KEY|PRIVATE|ACCESS|BEARER)"

  env | grep -E "$pattern" | while IFS='=' read -r key value; do
    if echo "$key" | grep -qiE "$sensitive_patterns"; then
      # Mask sensitive values - show first 4 and last 4 characters with asterisks in between
      local masked_value=""
      if [ ${#value} -le 8 ]; then
        masked_value="***MASKED***"
      else
        masked_value="${value:0:4}***${value: -4}"
      fi
      echo "${key}=${masked_value}" >> "$log_file"
    else
      echo "${key}=${value}" >> "$log_file"
    fi
  done
}

_stage "start"

# Ensure we always record final exit and persist EXIT_FILE if not already set
trap '_rc=$?; _stage "runner_exit" "\"rc\":$_rc"; if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then echo "$_rc" > "${EXIT_FILE}" 2>/dev/null || true; fi; exit $_rc' EXIT

# Check if this is a custom run
CUSTOM_RUN_MODE=false
if [ -f "/home/ubuntu/custom_run_config.json" ]; then
    CUSTOM_RUN_MODE=true
    echo "=== Custom Run Mode Detected ==="
    echo "Loading custom configuration from /home/ubuntu/custom_run_config.json"
    _stage "custom_run_start"
fi

if [ -z "${GITHUB_TOKEN:-}" ]; then echo "GITHUB_TOKEN is required"; exit 1; fi
if [ -z "${CODEFLASH_API_KEY:-}" ]; then echo "CODEFLASH_API_KEY is required"; exit 1; fi
if [ -z "${GITHUB_REPO_URL:-}" ]; then echo "GITHUB_REPO_URL is required"; exit 1; fi

# Handle custom run configuration
if [ "$CUSTOM_RUN_MODE" = true ]; then
    echo "=== Processing Custom Run Configuration ==="
    _stage "custom_config_processing"

    # Load custom configuration
    if command -v python3 >/dev/null 2>&1; then
        CUSTOM_CONFIG=$(python3 -c "
import json
try:
    with open('/home/ubuntu/custom_run_config.json', 'r') as f:
        config = json.load(f)
    print(json.dumps(config))
except Exception as e:
    print('{}')
")
    else
        echo "Warning: python3 not available for custom config parsing"
        CUSTOM_CONFIG='{}'
    fi

    # Extract configuration values
    OPTIMIZATION_MODE=$(echo "$CUSTOM_CONFIG" | python3 -c "
import json, sys
try:
    config = json.load(sys.stdin)
    print(config.get('optimization_mode', ''))
except:
    print('')
" 2>/dev/null || echo "")

    echo "Custom optimization mode: ${OPTIMIZATION_MODE:-'not specified'}"

    # Log custom configuration details
    echo "=== Custom Run Configuration Details ==="
    echo "Configuration file: /home/ubuntu/custom_run_config.json"
    echo "Optimization mode: ${OPTIMIZATION_MODE:-'not specified'}"

    # Extract and log configuration sections
    CONFIG_DATA=$(echo "$CUSTOM_CONFIG" | python3 -c "
import json, sys
try:
    config = json.load(sys.stdin)
    config_section = config.get('config', {})
    flags_section = config.get('flags', {})
    advanced_section = config.get('advanced', {})

    print('Config section keys:', list(config_section.keys()))
    print('Flags section keys:', list(flags_section.keys()))
    print('Advanced section keys:', list(advanced_section.keys()))
except:
    print('Error parsing configuration sections')
" 2>/dev/null || echo "Error parsing configuration")

    echo "$CONFIG_DATA"
    _stage "custom_config_loaded" "\"mode\":\"$OPTIMIZATION_MODE\""

    # Override environment variables with custom values if provided
    if [ -n "${CUSTOM_MODULE_ROOT:-}" ]; then
        MODULE_ROOT_VALUE="$CUSTOM_MODULE_ROOT"
        echo "Using custom module root: $MODULE_ROOT_VALUE"
    fi

    if [ -n "${CUSTOM_TESTS_ROOT:-}" ]; then
        TESTS_ROOT_VALUE="$CUSTOM_TESTS_ROOT"
        echo "Using custom tests root: $TESTS_ROOT_VALUE"
    fi

    if [ -n "${CUSTOM_TEST_FRAMEWORK:-}" ]; then
        echo "Using custom test framework: $CUSTOM_TEST_FRAMEWORK"
        TEST_FRAMEWORK_VALUE="$CUSTOM_TEST_FRAMEWORK"
    fi

    if [ -n "${CUSTOM_PYTEST_CMD:-}" ]; then
        PYTEST_CMD_VALUE="$CUSTOM_PYTEST_CMD"
        echo "Using custom pytest command: $PYTEST_CMD_VALUE"
    fi

    if [ -n "${CUSTOM_FORMATTER_CMDS:-}" ]; then
        FORMATTER_CMDS_VALUE="$CUSTOM_FORMATTER_CMDS"
        echo "Using custom formatter commands: $FORMATTER_CMDS_VALUE"
    fi

    _stage "custom_config_processed" "\"mode\":\"$OPTIMIZATION_MODE\""
fi

# Prefer LLM-provided overrides if present; fall back to CSV/env; then to auto
MODULE_ROOT_VALUE="${LLM_MODULE_ROOT:-${MODULE_ROOT:-auto}}"
TESTS_ROOT_VALUE="${LLM_TESTS_ROOT:-${TESTS_ROOT:-auto}}"
PYTEST_CMD_VALUE="${LLM_PYTEST_CMD:-${PYTEST_CMD:-pytest}}"
FORMATTER_CMDS_VALUE="${LLM_FORMATTER_CMDS:-${FORMATTER_CMDS:-[\"disabled\"]}}"

# Normalize pytest command: drop leading 'poetry run '
LOWER_PYTEST=$(echo "$PYTEST_CMD_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ "$LOWER_PYTEST" == poetry\ run* ]]; then
  PYTEST_CMD_VALUE="$(echo "$PYTEST_CMD_VALUE" | sed 's/^poetry[[:space:]]\+run[[:space:]]\+//')"
fi

# Normalize formatter cmds to Codeflash-per-file style per docs
# See https://docs.codeflash.ai/configuration
FORMATTER_CMDS_NORM="$FORMATTER_CMDS_VALUE"
LOWER_FMT=$(echo "$FORMATTER_CMDS_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ -z "$LOWER_FMT" || "$LOWER_FMT" == "[]" || "$LOWER_FMT" == "[\"disabled\"]" ]]; then
  FORMATTER_CMDS_NORM='["disabled"]'
elif [[ "$LOWER_FMT" == *"ruff"* ]]; then
  FORMATTER_CMDS_NORM='["ruff check --exit-zero --fix $file","ruff format $file"]'
elif [[ "$LOWER_FMT" == *"black"* ]]; then
  FORMATTER_CMDS_NORM='["black $file"]'
fi

# Summary of analyzed/exported config (no secrets)
echo "=== Configuration Summary (analyzer + effective) ==="
echo "Repo URL: ${GITHUB_REPO_URL}"
echo "CSV/ENV defaults: MODULE_ROOT='${MODULE_ROOT:-}', TESTS_ROOT='${TESTS_ROOT:-}', PYTEST_CMD='${PYTEST_CMD:-}'"
echo "Analyzer: LLM_MODULE_ROOT='${LLM_MODULE_ROOT:-}', LLM_TESTS_ROOT='${LLM_TESTS_ROOT:-}', LLM_PYTEST_CMD='${LLM_PYTEST_CMD:-}'"
echo "Analyzer: LLM_FORMATTER_CMDS='${LLM_FORMATTER_CMDS:-}', LLM_PIP_PACKAGES='${LLM_PIP_PACKAGES:-}'"
echo "Derived: MODULE_ROOT_VALUE='${MODULE_ROOT_VALUE}', TESTS_ROOT_VALUE='${TESTS_ROOT_VALUE}', PYTEST_CMD_VALUE='${PYTEST_CMD_VALUE}'"
echo "Derived: FORMATTER_CMDS_NORM=${FORMATTER_CMDS_NORM}"
echo "=== End Configuration Summary ==="

# Derive test framework for Codeflash config from the test command
TEST_FRAMEWORK_VALUE="pytest"
LOWER_CMD=$(echo "${PYTEST_CMD_VALUE}" | tr '[:upper:]' '[:lower:]')
if [[ "${LOWER_CMD}" =~ (^|[[:space:]])pytest([[:space:]]|$) ]]; then
  TEST_FRAMEWORK_VALUE="pytest"
elif [[ "${LOWER_CMD}" == *"unittest"* ]]; then
  TEST_FRAMEWORK_VALUE="unittest"
elif [[ "${LOWER_CMD}" == *"nose"* ]] || [[ "${LOWER_CMD}" == *"nosetests"* ]]; then
  TEST_FRAMEWORK_VALUE="nose"
fi

_stage "auth_gh_start"
echo "Authenticating gh..."

# Check if GitHub CLI is available
if ! command -v gh >/dev/null 2>&1; then
  echo "⚠️ GitHub CLI (gh) not found, falling back to direct git clone"
  echo "This will clone the original repository directly without forking"

  _stage "clone_start" "\"repo\":\"${GITHUB_REPO_URL}\""
  echo "Cloning original repository directly: ${GITHUB_REPO_URL}"

  # Working directory (must be writable by current user)
  WORK_DIR="${WORK_DIR:-/home/ubuntu/work}"
  rm -rf "$WORK_DIR" || true
  mkdir -p "$WORK_DIR"
  cd "$WORK_DIR"

  # Clone the original repository directly
  if git clone "${GITHUB_REPO_URL}" repo; then
    echo "✅ Successfully cloned original repository: ${GITHUB_REPO_URL}"
    cd repo
    git remote add upstream "${GITHUB_REPO_URL}" || true
    git fetch --all || true
  else
    echo "❌ Failed to clone original repository"
    exit 1
  fi

else
  echo "✅ GitHub CLI (gh) is available"

  if gh auth status -h github.com >/dev/null 2>&1; then
  echo "gh auth status OK"
else
  echo "Using GITHUB_TOKEN from environment for gh commands"
  # Ensure gh CLI is authenticated with the token
  if [ -n "${GITHUB_TOKEN:-}" ]; then
    echo "Setting up gh authentication with provided token..."
    # Note: Token is passed to gh auth login but not logged for security
    echo "${GITHUB_TOKEN}" | gh auth login --with-token 2>/dev/null || {
      echo "Failed to authenticate gh with token, but continuing..."
      echo "This may cause fork/clone operations to fail"
    }
  else
    echo "❌ No GITHUB_TOKEN provided - fork operations will likely fail"
  fi
fi

_stage "fork_repo_start"
echo "Forking repository if needed..."
echo "Attempting to fork: ${GITHUB_REPO_URL}"
echo "Target organization: codeflash-ai"

# Extract repository name for fork checking
REPO_NAME=$(basename "${GITHUB_REPO_URL}")
FORK_REPO_NAME="${REPO_NAME}"
FORK_REPO="codeflash-ai/${FORK_REPO_NAME}"

# Check if fork already exists
echo "Checking if fork already exists: ${FORK_REPO}"
if gh repo view "${FORK_REPO}" >/dev/null 2>&1; then
  echo "✅ Fork already exists: ${FORK_REPO}"
  echo "Updating fork from upstream before cloning..."

  _stage "sync_fork_start" "\"fork_repo\":\"${FORK_REPO}\",\"upstream\":\"${GITHUB_REPO_URL}\""
  echo "Syncing fork ${FORK_REPO} with upstream ${GITHUB_REPO_URL}..."

  # Sync the fork with its upstream repository
  # gh repo sync automatically syncs a fork with its upstream if the repo is a fork
  # This updates the fork on GitHub with the latest changes from the upstream repository
  # We use --force to perform a hard reset, ensuring the fork matches upstream exactly even if it has diverged
  if gh repo sync "${FORK_REPO}" --force 2>&1 | tee -a /tmp/sync_debug.log; then
    echo "✅ Successfully synced fork ${FORK_REPO} with upstream"
    _stage "sync_fork_complete" "\"fork_repo\":\"${FORK_REPO}\""
  else
    SYNC_EXIT_CODE=${PIPESTATUS[0]}
    echo "⚠️ Fork sync failed with exit code: $SYNC_EXIT_CODE"
    echo "Sync debug log:"
    cat /tmp/sync_debug.log 2>/dev/null || echo "No sync debug log available"
    echo "Note: If the fork doesn't have upstream configured, sync may fail."
    echo "This is non-fatal - continuing with clone anyway - fork may still be usable"
    _stage "sync_fork_warning" "\"fork_repo\":\"${FORK_REPO}\",\"exit_code\":$SYNC_EXIT_CODE"
  fi
else
  echo "Fork does not exist, creating new fork..."

  # Try to fork with better error handling
  if gh repo fork "${GITHUB_REPO_URL}" --org codeflash-ai --clone=false --remote=false 2>&1 | tee -a /tmp/fork_debug.log; then
    echo "✅ Fork operation completed successfully"

    # Check if GitHub created a numbered fork instead
    FORK_OUTPUT=$(cat /tmp/fork_debug.log 2>/dev/null | grep -o 'https://github.com/codeflash-ai/[^[:space:]]*' | tail -1)
    if [ -n "$FORK_OUTPUT" ]; then
      ACTUAL_FORK_REPO=$(echo "$FORK_OUTPUT" | sed 's#https://github.com/##')
      if [ "$ACTUAL_FORK_REPO" != "$FORK_REPO" ]; then
        echo "⚠️ GitHub created a numbered fork: ${ACTUAL_FORK_REPO}"
        echo "This suggests the original fork name was already taken"
        echo "Using the created fork: ${ACTUAL_FORK_REPO}"
        FORK_REPO="$ACTUAL_FORK_REPO"
      fi
    fi
  else
    FORK_EXIT_CODE=${PIPESTATUS[0]}
    echo "❌ Fork operation failed with exit code: $FORK_EXIT_CODE"
    echo "Debug information:"
    echo "Repository URL: ${GITHUB_REPO_URL}"
    echo "GitHub token status: $([ -n "${GITHUB_TOKEN:-}" ] && echo "Set" || echo "Not set")"
    echo "gh auth status:"
    gh auth status 2>&1 || echo "gh auth status failed"
    echo "Fork debug log:"
    cat /tmp/fork_debug.log 2>/dev/null || echo "No fork debug log available"
    echo "Continuing anyway - fork may already exist or we'll clone the original repo"
  fi
fi

# FORK_REPO is already set above based on whether fork exists or was created

_stage "clone_start" "\"repo\":\"${FORK_REPO}\""
echo "Cloning fork ${FORK_REPO}..."
# Working directory (must be writable by current user)
WORK_DIR="${WORK_DIR:-/home/ubuntu/work}"
rm -rf "$WORK_DIR" || true
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"

# Retry clone with exponential backoff for GitHub service issues
for attempt in 1 2 3; do
  echo "Clone attempt $attempt/3..."
  echo "Attempting to clone fork: ${FORK_REPO}"

  if gh repo clone "${FORK_REPO}" repo 2>&1 | tee -a /tmp/clone_debug.log; then
    echo "✅ Successfully cloned fork: ${FORK_REPO}"
    break
  else
    CLONE_EXIT_CODE=${PIPESTATUS[0]}
    echo "❌ Fork clone failed with exit code: $CLONE_EXIT_CODE"
    echo "Clone debug log:"
    cat /tmp/clone_debug.log 2>/dev/null || echo "No clone debug log available"

    if [ $attempt -lt 3 ]; then
      echo "Clone failed, retrying in $((attempt * 10)) seconds..."
      sleep $((attempt * 10))
    else
      echo "Fork clone failed after 3 attempts, trying original repo..."
      echo "Attempting to clone original repo: ${GITHUB_REPO_URL}"

      # Fallback to original repo if fork clone fails
      if gh repo clone "${GITHUB_REPO_URL}" repo 2>&1 | tee -a /tmp/clone_debug.log; then
        echo "✅ Successfully cloned original repo: ${GITHUB_REPO_URL}"
        break
      else
        echo "❌ Original repo clone also failed"
        echo "Final clone debug log:"
        cat /tmp/clone_debug.log 2>/dev/null || echo "No clone debug log available"
        echo "Failed to clone both fork and original repo"
        exit 1
      fi
    fi
  fi
done
cd repo

git remote add upstream "${GITHUB_REPO_URL}" || true
git fetch --all || true

fi  # End of GitHub CLI availability check

if [ "${MODULE_ROOT_VALUE}" = "auto" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
  echo "Detecting module/tests roots..."
  PY_CMD=$(command -v python3 || command -v python || echo "")
  if [ -z "$PY_CMD" ]; then echo "No Python interpreter found for detection"; else $PY_CMD /app/scripts/detect_roots.py > roots.json || true; fi
  if [ -f roots.json ]; then
    DETECTED_MODULE=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("module_root",""))' || echo "")
    DETECTED_TESTS=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("tests_root",""))' || echo "")
    if [ "${MODULE_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_MODULE}" ]; then MODULE_ROOT_VALUE="${DETECTED_MODULE}"; fi
    if [ "${TESTS_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_TESTS}" ]; then TESTS_ROOT_VALUE="${DETECTED_TESTS}"; fi
  fi
fi

if [ -z "${MODULE_ROOT_VALUE}" ] || [ "${MODULE_ROOT_VALUE}" = "auto" ]; then
  echo "Failed to detect module-root; please set MODULE_ROOT env."; exit 2
fi

if [ -z "${TESTS_ROOT_VALUE}" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
  echo "No tests-root detected; tracing will be skipped."
fi

_stage "write_codeflash_config"
echo "Writing pyproject.toml..."

# Verify we're in the correct working directory
echo "Current working directory: $(pwd)"
if [ ! -d "/home/ubuntu/work/repo" ]; then
    echo "Error: Repository directory /home/ubuntu/work/repo not found" >&2
    _stage "error" "\"error\":\"repository_directory_not_found\""
    exit 1
fi

# Ensure we're in the repository directory
if [ "$(pwd)" != "/home/ubuntu/work/repo" ]; then
    echo "Changing to repository directory: /home/ubuntu/work/repo"
    cd "/home/ubuntu/work/repo" || {
        echo "Error: Failed to change to repository directory" >&2
        _stage "error" "\"error\":\"failed_to_change_directory\""
        exit 1
    }
    echo "Now in directory: $(pwd)"
fi

# ROBUST PYPROJECT.TOML HANDLING
echo "=== ANALYZING EXISTING PYPROJECT.TOML STRUCTURE ==="

# Handle custom pyproject.toml location
PYPROJECT_FILE="pyproject.toml"
if [ "$CUSTOM_RUN_MODE" = true ] && [ -n "${CUSTOM_PYPROJECT_LOCATION:-}" ]; then
    PYPROJECT_FILE="$CUSTOM_PYPROJECT_LOCATION"
    echo "Using custom pyproject.toml location: $PYPROJECT_FILE"

    # Verify custom pyproject.toml location is accessible
    if [ ! -f "$PYPROJECT_FILE" ] && [ ! -d "$(dirname "$PYPROJECT_FILE")" ]; then
        echo "Error: Custom pyproject.toml location not accessible: $PYPROJECT_FILE" >&2
        _stage "error" "\"error\":\"custom_pyproject_location_inaccessible\",\"path\":\"$PYPROJECT_FILE\""
        exit 1
    fi
fi

# Check if pyproject.toml already exists
EXISTING_PYPROJECT=""
if [ -f "$PYPROJECT_FILE" ]; then
    echo "✅ Found existing pyproject.toml at: $PYPROJECT_FILE"
    EXISTING_PYPROJECT="$PYPROJECT_FILE"

    # Analyze existing pyproject.toml structure
    echo "Analyzing existing pyproject.toml structure..."

    # Check for different project types
    if grep -q "\[tool\.poetry\]" "$PYPROJECT_FILE"; then
        echo "📦 Detected Poetry project structure"
        PROJECT_TYPE="poetry"
    elif grep -q "\[tool\.uv\]" "$PYPROJECT_FILE"; then
        echo "📦 Detected UV project structure"
        PROJECT_TYPE="uv"
    elif grep -q "\[project\]" "$PYPROJECT_FILE"; then
        echo "📦 Detected PEP 621 project structure"
        PROJECT_TYPE="pep621"
    elif grep -q "\[build-system\]" "$PYPROJECT_FILE"; then
        echo "📦 Detected build-system configuration"
        PROJECT_TYPE="build_system"
    else
        echo "📦 Detected generic pyproject.toml"
        PROJECT_TYPE="generic"
    fi

    # Check if Codeflash config already exists
    if grep -q "\[tool\.codeflash\]" "$PYPROJECT_FILE"; then
        echo "⚙️ Found existing [tool.codeflash] section"
        HAS_CODEFLASH_CONFIG=true
    else
        echo "⚙️ No existing [tool.codeflash] section found"
        HAS_CODEFLASH_CONFIG=false
    fi

    # Create backup
    cp "$PYPROJECT_FILE" "${PYPROJECT_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
    echo "📋 Created backup: ${PYPROJECT_FILE}.backup.$(date +%Y%m%d_%H%M%S)"

else
    echo "📝 No existing pyproject.toml found at: $PYPROJECT_FILE"
    PROJECT_TYPE="none"
    HAS_CODEFLASH_CONFIG=false
fi

echo "=== GENERATING CODEFLASH CONFIGURATION ==="

# Generate Codeflash configuration
CODEFLASH_CONFIG="[tool.codeflash]
module-root = \"${MODULE_ROOT_VALUE}\"
tests-root = \"${TESTS_ROOT_VALUE}\"
test-framework = \"${TEST_FRAMEWORK_VALUE}\"
formatter-cmds = ${FORMATTER_CMDS_NORM}"

# Add optional fields if they have custom values
if [ "$CUSTOM_RUN_MODE" = true ]; then
    if [ -n "${CUSTOM_BENCHMARKS_ROOT:-}" ]; then
        CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
benchmarks-root = \"${CUSTOM_BENCHMARKS_ROOT}\""
    fi

    if [ -n "${CUSTOM_IGNORE_PATHS:-}" ]; then
        CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
ignore-paths = ${CUSTOM_IGNORE_PATHS}"
    fi

    if [ -n "${CUSTOM_PYTEST_CMD:-}" ]; then
        CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
pytest-cmd = \"${CUSTOM_PYTEST_CMD}\""
    fi

    if [ -n "${CUSTOM_DISABLE_IMPORTS_SORTING:-}" ]; then
        CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
disable-imports-sorting = ${CUSTOM_DISABLE_IMPORTS_SORTING}"
    fi

    if [ -n "${CUSTOM_DISABLE_TELEMETRY:-}" ]; then
        CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
disable-telemetry = ${CUSTOM_DISABLE_TELEMETRY}"
    fi
fi

echo "=== HANDLING PYPROJECT.TOML BASED ON PROJECT TYPE ==="

# Handle different scenarios based on project type and existing configuration
case "$PROJECT_TYPE" in
    "poetry"|"uv"|"pep621"|"build_system"|"generic")
        echo "🔄 Merging Codeflash configuration with existing project structure..."

        if [ "$HAS_CODEFLASH_CONFIG" = true ]; then
            echo "📝 Updating existing [tool.codeflash] section..."
            # Remove existing [tool.codeflash] section and add new one
            awk '
                /^\[tool\.codeflash\]/ { in_codeflash = 1; next }
                in_codeflash && /^\[/ { in_codeflash = 0 }
                in_codeflash { next }
                { print }
            ' "$PYPROJECT_FILE" > "${PYPROJECT_FILE}.tmp"
            echo "" >> "${PYPROJECT_FILE}.tmp"
            echo "$CODEFLASH_CONFIG" >> "${PYPROJECT_FILE}.tmp"
            mv "${PYPROJECT_FILE}.tmp" "$PYPROJECT_FILE"
        else
            echo "📝 Adding new [tool.codeflash] section..."
            echo "" >> "$PYPROJECT_FILE"
            echo "$CODEFLASH_CONFIG" >> "$PYPROJECT_FILE"
        fi
        ;;
    "none")
        echo "📝 Creating new pyproject.toml with Codeflash configuration..."
        echo "$CODEFLASH_CONFIG" > "$PYPROJECT_FILE"
        ;;
    *)
        echo "⚠️ Unknown project type, creating new pyproject.toml..."
        echo "$CODEFLASH_CONFIG" > "$PYPROJECT_FILE"
        ;;
esac

echo "Written pyproject.toml to: $PYPROJECT_FILE"

# Verify pyproject.toml was written successfully
if [ ! -f "$PYPROJECT_FILE" ]; then
    echo "Error: Failed to write pyproject.toml file" >&2
    _stage "error" "\"error\":\"failed_to_write_pyproject_toml\",\"path\":\"$PYPROJECT_FILE\""
    exit 1
fi

# Verify pyproject.toml has content
if [ ! -s "$PYPROJECT_FILE" ]; then
    echo "Error: pyproject.toml file is empty" >&2
    _stage "error" "\"error\":\"pyproject_toml_empty\",\"path\":\"$PYPROJECT_FILE\""
    exit 1
fi

echo "=== VALIDATING FINAL PYPROJECT.TOML ==="

# Validate the final pyproject.toml structure
echo "Final pyproject.toml content:"
echo "----------------------------------------"
head -20 "$PYPROJECT_FILE"
echo "----------------------------------------"

# Verify Codeflash section exists
if grep -q "\[tool\.codeflash\]" "$PYPROJECT_FILE"; then
    echo "✅ [tool.codeflash] section found in final pyproject.toml"
else
    echo "❌ [tool.codeflash] section missing from final pyproject.toml" >&2
    _stage "error" "\"error\":\"codeflash_section_missing\",\"path\":\"$PYPROJECT_FILE\""
    exit 1
fi

# Verify required Codeflash fields
REQUIRED_FIELDS=("module-root" "tests-root" "test-framework")
for field in "${REQUIRED_FIELDS[@]}"; do
    if grep -q "$field" "$PYPROJECT_FILE"; then
        echo "✅ Found required field: $field"
    else
        echo "❌ Missing required field: $field" >&2
        _stage "error" "\"error\":\"missing_required_field\",\"field\":\"$field\",\"path\":\"$PYPROJECT_FILE\""
        exit 1
    fi
done

# Log project type preservation
if [ "$PROJECT_TYPE" != "none" ]; then
    echo "✅ Preserved existing project structure: $PROJECT_TYPE"
    echo "📋 Backup available at: ${PYPROJECT_FILE}.backup.*"
else
    echo "📝 Created new pyproject.toml (no existing project structure)"
fi

echo "pyproject.toml verification successful"
_stage "pyproject_toml_written" "\"path\":\"$PYPROJECT_FILE\",\"size\":$(wc -c < "$PYPROJECT_FILE"),\"project_type\":\"$PROJECT_TYPE\",\"preserved_existing\":$([ "$PROJECT_TYPE" != "none" ] && echo "true" || echo "false")"

# Also write a minimal parent pyproject for Sphinx (docs/conf.py may reference ../pyproject.toml)
if [ -d .. ]; then
  echo "Writing parent pyproject.toml for docs..."
  cat > ../pyproject.toml <<EOF
[project]
name = "autogenerated-project"
version = "0.0.0"
description = "Autogenerated to satisfy Sphinx config during CI"
authors = [{name = "Auto-generated", email = "noreply@example.com"}]
EOF
fi

if [ -n "${VENV_PATH:-}" ] && [ -d "${VENV_PATH}" ]; then
  echo "Using pre-created venv at ${VENV_PATH}"
  # shellcheck disable=SC1090
  source "${VENV_PATH}/bin/activate"
else
  _stage "venv_setup"
  echo "Setting up Python venv..."
  PY_CMD=$(command -v python3 || command -v python || echo "")
  if [ -z "$PY_CMD" ]; then echo "No Python interpreter found"; exit 1; fi

  # Try to create virtual environment with better error handling
  echo "Attempting to create virtual environment with $PY_CMD..."
  if "$PY_CMD" -m venv .venv 2>/tmp/venv_error.log; then
    echo "✅ Virtual environment created successfully"
  else
    echo "❌ Virtual environment creation failed. Error log:"
    cat /tmp/venv_error.log 2>/dev/null || echo "No error log available"

    # Try to install missing packages and retry
    echo "Attempting to install missing packages and retry..."
    sudo DEBIAN_FRONTEND=noninteractive apt-get install -y python3-venv python3.10-venv python3-distutils 2>/dev/null || true

    # Retry virtual environment creation
    echo "Retrying virtual environment creation..."
    if "$PY_CMD" -m venv .venv 2>/tmp/venv_error2.log; then
      echo "✅ Virtual environment created successfully on retry"
    else
      echo "❌ Virtual environment creation failed again. Error log:"
      cat /tmp/venv_error2.log 2>/dev/null || echo "No error log available"

      # Fallback: try using virtualenv if available
      if command -v virtualenv >/dev/null 2>&1; then
        echo "Attempting fallback with virtualenv..."
        if virtualenv .venv 2>/tmp/venv_error3.log; then
          echo "✅ Virtual environment created with virtualenv fallback"
        else
          echo "❌ All virtual environment creation methods failed"
          echo "Error log:"
          cat /tmp/venv_error3.log 2>/dev/null || echo "No error log available"
          echo "Continuing without virtual environment..."
          # Set up environment variables to simulate venv
          export PYTHONPATH="$PWD/.venv/lib/python*/site-packages:$PYTHONPATH"
        fi
      else
        echo "❌ All virtual environment creation methods failed"
        echo "Continuing without virtual environment..."
        # Set up environment variables to simulate venv
        export PYTHONPATH="$PWD/.venv/lib/python*/site-packages:$PYTHONPATH"
      fi
    fi
  fi

  # Only activate if .venv directory exists
  if [ -d ".venv" ] && [ -f ".venv/bin/activate" ]; then
    # shellcheck disable=SC1091
    source .venv/bin/activate
    pip install --upgrade pip >/dev/null 2>&1 || true
  else
    echo "⚠️ Virtual environment not available, using system Python"
  fi

  _stage "install_codeflash"
  echo "Installing codeflash CLI with asyncio support..."
  # Install codeflash with asyncio extra to support --async flag
  pip install --upgrade "codeflash[asyncio]" || pip install "codeflash[asyncio]" || true
fi

# Ensure 'python3' resolves to the venv interpreter (some venvs only expose 'python')
if ! command -v python3 >/dev/null 2>&1 && command -v python >/dev/null 2>&1; then
  ln -sf "$(command -v python)" "$(dirname "$(command -v python)")/python3" || true
fi

# Make local repo importable first, then utils/ for helper modules like testutils
export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
if [ -d "$PWD/utils" ]; then
  export PYTHONPATH="$PWD/utils:$PYTHONPATH"
fi

_stage "pre_test_setup"
# If coverage flags are present in test command, ensure pytest-cov is installed before any test run
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
  echo "Detected coverage flags in test command; installing pytest-cov..."
  pip install pytest-cov || true
fi

# If reruns flags are present in test command, ensure pytest-rerunfailures is installed
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --reruns"; then
  echo "Detected reruns flags in test command; installing pytest-rerunfailures..."
  pip install pytest-rerunfailures || true
fi

# Run install commands in the project directory (inside venv)
if [ -n "${PRE_INSTALL_CMDS:-}" ]; then
  echo "Running pre-install commands: ${PRE_INSTALL_CMDS}"
  bash -lc "${PRE_INSTALL_CMDS}" || echo "Pre-install commands failed, continuing..."
fi
_stage "project_install_start"
if [ -n "${INSTALL_CMDS:-}" ]; then
  echo "Running install commands: ${INSTALL_CMDS}"
  if bash -lc "${INSTALL_CMDS}"; then
    echo "Install commands completed successfully"
  else
    echo "Install commands failed (exit code: $?), continuing..."
    # For repositories with custom install scripts that may fail due to
    # non-standard configurations, we continue and rely on pip install fallbacks
  fi
_stage "project_install_end"
fi
if [ -n "${POST_INSTALL_CMDS:-}" ]; then
  echo "Running post-install commands: ${POST_INSTALL_CMDS}"
  bash -lc "${POST_INSTALL_CMDS}" || echo "Post-install commands failed, continuing..."
fi

# Normalize test command for use in two contexts:
# 1) Execution (must use venv's Python)
# 2) Codeflash tracing with -m (must be a Python module, not 'python3 <script>')
PYTEST_CMD_RUN="${PYTEST_CMD_VALUE}"
if [[ "${PYTEST_CMD_RUN}" == python3\ * ]]; then PYTEST_CMD_RUN="python ${PYTEST_CMD_RUN#python3 }"; fi

# Debug: Show the original and normalized test commands
echo "Debug: Original PYTEST_CMD_VALUE: '${PYTEST_CMD_VALUE}'"
echo "Debug: Normalized PYTEST_CMD_RUN: '${PYTEST_CMD_RUN}'"
echo "Debug: TESTS_ROOT_VALUE: '${TESTS_ROOT_VALUE}'"

# Helper: detect if command looks like invoking pytest directly
_is_pytest_runner() {
  case "$1" in
    pytest\ *|pytest) return 0 ;;
    python\ -m\ pytest*) return 0 ;;
    python3\ -m\ pytest*) return 0 ;;
    py.test\ *|py.test) return 0 ;;
    *) return 1 ;;
  esac
}

TRACE_CMD="${PYTEST_CMD_VALUE}"
# Convert interpreter-prefix forms to module forms
if [[ "${TRACE_CMD}" == python3\ -m\ * ]]; then
  TRACE_CMD="${TRACE_CMD#python3 -m }"
  # For pytest commands, extract just the pytest part and handle args separately
  if [[ "${TRACE_CMD}" == pytest\ * ]]; then
    TRACE_CMD="pytest"
  fi
fi
if [[ "${TRACE_CMD}" == python\ -m\ * ]]; then
  TRACE_CMD="${TRACE_CMD#python -m }"
  # For pytest commands, extract just the pytest part and handle args separately
  if [[ "${TRACE_CMD}" == pytest\ * ]]; then
    TRACE_CMD="pytest"
  fi
fi
if [[ "${TRACE_CMD}" == python3\ ./*.py* ]]; then
  SCRIPT_PATH="${TRACE_CMD#python3 }"
  SCRIPT_FILE="${SCRIPT_PATH%% *}"
  REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
  MOD="${SCRIPT_FILE#./}"
  MOD="${MOD%.py}"
  MOD="${MOD//\//.}"
  TRACE_CMD="${MOD}${REST}"
elif [[ "${TRACE_CMD}" == python\ ./*.py* ]]; then
  SCRIPT_PATH="${TRACE_CMD#python }"
  SCRIPT_FILE="${SCRIPT_PATH%% *}"
  REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
  MOD="${SCRIPT_FILE#./}"
  MOD="${MOD%.py}"
  MOD="${MOD//\//.}"
  TRACE_CMD="${MOD}${REST}"
fi

# Debug: Show the trace command after processing
echo "Debug: TRACE_CMD for codeflash: '${TRACE_CMD}'"

echo "Installing project dependencies (best-effort)..."

# 1) Install repo requirements first to pin base versions
if [ -f requirements.txt ]; then pip install -r requirements.txt || true; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt || true; fi
if [ -d requirements ]; then
  for f in requirements/*.txt; do
    [ -f "$f" ] && pip install -r "$f" || true
  done
fi

# 2) Only attempt editable install if packaging metadata likely exists
if [ -f pyproject.toml ] || [ -f setup.py ] || [ -f setup.cfg ]; then
  EDITABLE_OK=0
  if [ -f pyproject.toml ] && grep -qiE "^\s*\[tool\.poetry\]|^\s*\[project\]" pyproject.toml; then
    EDITABLE_OK=1
  fi
  if [ -f setup.py ]; then
    EDITABLE_OK=1
  fi
  if [ -f setup.cfg ] && grep -qiE "^\s*packages\s*=|^\s*package_dir\s*=|^\s*install_requires\s*=" setup.cfg; then
    EDITABLE_OK=1
  fi
  if [ "$EDITABLE_OK" -eq 1 ]; then
    echo "Attempting editable install (pip install -e .)..."
    if pip install -e .; then
      for extra in dev test tests ci all; do
        pip install -e ".[${extra}]" || true
      done
    else
      echo "Editable install failed; skipping editable extras and continuing without -e ."
    fi
  else
    echo "Packaging metadata not sufficient; skipping editable install."
  fi
fi

# 3) Freeze constraints and then install LLM-specified packages under constraints
if [ -n "${LLM_PIP_PACKAGES:-}" ] && [ "${LLM_PIP_PACKAGES}" != "[]" ]; then
  echo "Freezing constraints before LLM package install..."
  pip freeze > .cf_constraints.txt || true
  echo "Installing LLM-suggested Python packages under constraints: ${LLM_PIP_PACKAGES}"
  python - <<'PY'
import os, json, subprocess, sys
pkgs = []
try:
    raw = os.environ.get('LLM_PIP_PACKAGES','[]')
    pkgs = json.loads(raw)
    if not isinstance(pkgs, list):
        pkgs = []
except Exception:
    pkgs = []
specs = []
for p in pkgs:
    if isinstance(p, str) and p.strip():
        specs.append(p.strip())
    elif isinstance(p, dict) and p.get('name'):
        name = str(p['name']).strip()
        spec = str(p.get('version_spec') or '').strip()
        if name:
            specs.append(name + (spec if spec else ''))
if specs:
    cmd = [sys.executable, '-m', 'pip', 'install', '--disable-pip-version-check', '-c', '.cf_constraints.txt'] + specs
    try:
        subprocess.run(cmd, check=False)
    except Exception:
        pass
PY
fi
# Ensure formatters/tools exist if referenced (check both normalized and original values)
if [[ "$FORMATTER_CMDS_NORM" == *"black "* ]] || [[ "${LOWER_FMT}" == *"black"* ]]; then
  pip install black || true
fi
if [[ "$FORMATTER_CMDS_NORM" == *"ruff "* ]] || [[ "${LOWER_FMT}" == *"ruff"* ]]; then
  pip install ruff || true
fi

# Install anthropic if key is present to enable Claude Code CLI
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
  pip install --upgrade anthropic || true
fi

python -c "import pytest" 2>/dev/null || pip install pytest || true

# Ensure CodeFlash environment is properly set up before tests
echo "Setting up CodeFlash environment..."
if [ -n "${CODEFLASH_API_KEY:-}" ]; then
  export CODEFLASH_API_KEY="${CODEFLASH_API_KEY}"
  echo "✅ CodeFlash API key is set and exported"

  # Verify CodeFlash is accessible
  if command -v codeflash >/dev/null 2>&1; then
    echo "✅ CodeFlash CLI is available"
  else
    echo "⚠️ CodeFlash CLI not found, installing..."
    pip install codeflash[asyncio] || pip install codeflash || true
  fi

  # Test CodeFlash connectivity
  if command -v codeflash >/dev/null 2>&1; then
    echo "Testing CodeFlash connectivity..."
    timeout 10 codeflash --version 2>/dev/null && echo "✅ CodeFlash connectivity verified" || echo "⚠️ CodeFlash connectivity test failed"
  fi
else
  echo "⚠️ CODEFLASH_API_KEY not set - optimization may fail"
fi

# Optional: preflight test run to detect missing modules (with timeout protection)
if [ -d "${TESTS_ROOT_VALUE}" ]; then
  echo "Preflight test run to detect missing modules..."
  echo "Using timeout protection (30 minutes) to prevent hanging tests..."
  _stage "preflight_tests_start"
  set +e

  # Use timeout to prevent hanging tests
  PREFLIGHT_TIMEOUT=1800  # 30 minutes
  PRE_RC=124  # Default to timeout

  if [ -d "${TESTS_ROOT_VALUE}" ]; then
    if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
      if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
        echo "Running preflight test: ${PYTEST_CMD_RUN} -q (with timeout ${PREFLIGHT_TIMEOUT}s)"
        timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight.out 2>&1
        PRE_RC=${PIPESTATUS[0]}
      else
        echo "Running preflight test: ${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/ (with timeout ${PREFLIGHT_TIMEOUT}s)"
        timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/'" >/tmp/preflight.out 2>&1
        PRE_RC=${PIPESTATUS[0]}
      fi
    else
      # Non-pytest runner; avoid appending tests path that may be unsupported
      echo "Running preflight test: ${PYTEST_CMD_RUN} -q (with timeout ${PREFLIGHT_TIMEOUT}s)"
      timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight.out 2>&1
      PRE_RC=${PIPESTATUS[0]}
    fi
  else
    echo "Running preflight test: ${PYTEST_CMD_RUN} -q (with timeout ${PREFLIGHT_TIMEOUT}s)"
    timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight.out 2>&1
    PRE_RC=${PIPESTATUS[0]}
  fi

  # Log the result
  if [ $PRE_RC -eq 124 ]; then
    echo "⚠️ Preflight test timed out after ${PREFLIGHT_TIMEOUT} seconds - tests may be hanging or very slow"
    echo "Continuing with optimization despite timeout..."
  elif [ $PRE_RC -eq 0 ]; then
    echo "✅ Preflight test completed successfully"
  else
    echo "⚠️ Preflight test failed with exit code $PRE_RC - this is expected for some repositories"
  fi

  _stage "preflight_tests_end" "\"rc\":$PRE_RC"
  set -e
  # Detect and register unknown pytest marks to avoid collection errors under -Werror
  # Why this exists:
  # - Some repositories use custom pytest markers (e.g., `@pytest.mark.download`, `@pytest.mark.slow`)
  #   but forget to register them in their config (pyproject.toml/setup.cfg/pytest.ini).
  # - With `-Werror` or strict settings, pytest turns the UnknownMark warning into an error during
  #   collection, causing the test run to fail before even starting.
  #
  # What we do:
  # 1) We parse the preflight test output (`/tmp/preflight.out`) for lines like:
  #      "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
  # 2) We extract the marker name (e.g., `download`) using `sed` with a capturing group, then de-duplicate
  #    with `sort -u`.
  # 3) If any unknown markers are found, we append a minimal `conftest.py` shim at repo root that registers
  #    each discovered marker via `config.addinivalue_line("markers", ...)`. This is the official mechanism
  #    to declare custom markers so pytest accepts them.
  #
  # Example:
  #   If preflight output contains multiple instances of:
  #     "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
  #   then MARKS will contain `download`, and this block will append a conftest.py snippet like:
  #     def pytest_configure(config):
  #         config.addinivalue_line("markers", "download: auto-registered marker")
  #   After that, subsequent pytest runs will collect tests without failing on the unknown mark.
  #
  # Notes:
  # - We only add to conftest.py; we do NOT overwrite existing content, keeping it non-destructive.
  # - If no unknown markers are detected, nothing is changed.
  # - This does not alter test behavior; it simply declares markers so pytest won’t error on them.
  if [ -s /tmp/preflight.out ]; then
    MARKS=$(sed -n "s/.*Unknown pytest\.mark\.\([A-Za-z0-9_][A-Za-z0-9_]*\).*/\1/p" /tmp/preflight.out | sort -u)
    if [ -n "${MARKS}" ]; then
      echo "Detected unknown pytest marks: ${MARKS}"
      echo "Auto-registering markers via conftest.py shim..."
      (
        echo "# Auto-added by optimizer to register pytest markers"
        echo "def pytest_configure(config):"
        # For each discovered unknown marker (e.g., download, slow, integration), write a declaration line.
        # This is equivalent to having `markers = download: ...` in pytest.ini/pyproject.toml.
        for m in ${MARKS}; do
          echo "    config.addinivalue_line(\"markers\", \"${m}: auto-registered marker\")"
        done
      ) >> conftest.py
    fi
  fi
  if [ $PRE_RC -ne 0 ]; then
    echo "Analyzing missing module errors..."
    # Use more robust error handling to prevent SIGPIPE
    MISSING=""
    if [ -f /tmp/preflight.out ]; then
      MISSING=$(sed -n "s/.*ModuleNotFoundError: No module named '\([^']\+\)'.*/\1/p" /tmp/preflight.out 2>/dev/null | head -20 || true)
      if [ -z "$MISSING" ]; then
        MISSING=$(sed -n "s/.*ImportError: No module named \([^ ]\+\).*/\1/p" /tmp/preflight.out 2>/dev/null | head -20 || true)
      fi
    fi
    if [ -n "$MISSING" ]; then
      echo "Attempting to install missing modules:"
      # Use a more robust approach to avoid SIGPIPE issues
      while IFS= read -r mod || [ -n "$mod" ]; do
        [ -z "$mod" ] && continue
        pkg="$mod"
        case "$pkg" in
          PIL) pkg="Pillow";;
          cv2) pkg="opencv-python";;
          yaml) pkg="PyYAML";;
          skimage) pkg="scikit-image";;
          sklearn) pkg="scikit-learn";;
          Crypto) pkg="pycryptodome";;
        esac
        echo " - pip install $pkg"
        pip install "$pkg" || true
      done <<< "$MISSING"
      echo "Re-running preflight tests after installs..."
      set +e
      # Use timeout for re-running preflight tests to prevent hanging
      if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
        timeout 300 bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight2.out 2>&1 || true
      else
        timeout 300 bash -c "eval '${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/'" >/tmp/preflight2.out 2>&1 || true
      fi
      set -e
    fi
  fi
fi

# Full tests before optimization (with detailed logging)
TEST_LOG_DIR="${TEST_LOG_DIR:-/home/ubuntu/app/logs}"
mkdir -p "$TEST_LOG_DIR"
TS2=$(date -Is | sed 's/[:+]/-/g')
TEST_LOG_FILE="$TEST_LOG_DIR/tests-$TS2.log"
touch "$TEST_LOG_FILE" && chmod 666 "$TEST_LOG_FILE"
ln -sfn "$TEST_LOG_FILE" "$TEST_LOG_DIR/tests.log" || true

_stage "pre_tests_start"
echo "Running pre-optimization tests: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
echo "Using timeout protection (30 minutes) to prevent hanging tests..." | tee -a "$TEST_LOG_FILE"
set +e

# Use timeout to prevent hanging tests
MAIN_TEST_TIMEOUT=1800  # 30 minutes
TEST_RC=124  # Default to timeout

if [ -d "${TESTS_ROOT_VALUE}" ]; then
  if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
    # Check if the command already includes the tests directory
    if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
      # Command already includes tests directory, execute as-is
      echo "Debug: Executing command as-is (already includes tests dir): ${PYTEST_CMD_RUN} (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
      timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
      TEST_RC=${PIPESTATUS[0]}
    else
      # Command doesn't include tests directory, append it
      echo "Debug: Appending tests directory: ${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/ (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
      timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/'" | tee -a "$TEST_LOG_FILE"
      TEST_RC=${PIPESTATUS[0]}
    fi
  else
    # Non-pytest runner, execute as-is
    echo "Debug: Non-pytest runner, executing as-is: ${PYTEST_CMD_RUN} (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
    timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
    TEST_RC=${PIPESTATUS[0]}
  fi
else
  # No tests directory, execute as-is
  echo "Debug: No tests directory, executing as-is: ${PYTEST_CMD_RUN} (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
  timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
fi

# Log timeout result
if [ $TEST_RC -eq 124 ]; then
  echo "⚠️ Pre-optimization tests timed out after ${MAIN_TEST_TIMEOUT} seconds" | tee -a "$TEST_LOG_FILE"
  echo "This may indicate hanging tests or very slow test execution" | tee -a "$TEST_LOG_FILE"
fi
# Dynamic fallback for non-pytest runners emitting argparse errors
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
  echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
  if [ -d "${TESTS_ROOT_VALUE}" ]; then
    pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
  else
    pytest -q | tee -a "$TEST_LOG_FILE"
  fi
  TEST_RC=${PIPESTATUS[0]}
fi
set -e
echo "Pre-optimization tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
_stage "pre_tests_end" "\"rc\":$TEST_RC"
# Persist exit code early if wrapper provided EXIT_FILE
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi

# If tests below threshold, run Claude Code CLI setup loop
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
  echo "Evaluating test pass ratio for Claude Code CLI setup gate..." | tee -a "$TEST_LOG_FILE"

  # Add timeout protection for test parsing to prevent hanging
  echo "DEBUG: Starting test parsing with timeout protection..." | tee -a "$TEST_LOG_FILE"

  # Use a subshell with timeout to prevent hanging
  (
    # Attempt to extract passed/failed/errors from log tail (robust to order)
    # Debug: Show what we're looking for in the log
    echo "DEBUG: Looking for test summary in log file..." | tee -a "$TEST_LOG_FILE"
    echo "DEBUG: Last 10 lines of test log:" | tee -a "$TEST_LOG_FILE"
    # Use a temporary file to avoid reading from the same file being written to
    TEMP_LOG_TAIL=$(mktemp)
    tail -10 "$TEST_LOG_FILE" > "$TEMP_LOG_TAIL" 2>/dev/null || echo "No log content available"
    cat "$TEMP_LOG_TAIL" | tee -a "$TEST_LOG_FILE"
    rm -f "$TEMP_LOG_TAIL"

        # Extract test counts with corrected regex patterns
        # Handle different pytest output formats: "X passed, Y failed, Z errors" or "X passed, Y errors"
        # Use grep to extract the number immediately before the keyword (handles ANSI codes)
        PASSED=$(grep -oE '[0-9]+ passed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
        FAILED=$(grep -oE '[0-9]+ failed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
        ERRORS=$(grep -oE '[0-9]+ errors' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')

    # Debug: Show what each regex extracted
    echo "DEBUG: Raw extracted values - PASSED='$PASSED' FAILED='$FAILED' ERRORS='$ERRORS'" | tee -a "$TEST_LOG_FILE"

    # If no summary line found (interrupted test run), count individual test results
    if [ -z "$PASSED" ] && [ -z "$FAILED" ] && [ -z "$ERRORS" ]; then
      echo "DEBUG: No test summary found, analyzing test execution..." | tee -a "$TEST_LOG_FILE"

      # Check if tests actually ran by looking for common test execution indicators
      if grep -q "No such file or directory\|command not found\|make: \*\*\*" "$TEST_LOG_FILE"; then
        echo "DEBUG: Detected build/test command failure - tests never executed" | tee -a "$TEST_LOG_FILE"
        PASSED=0
        FAILED=0
        ERRORS=1  # Treat command failure as an error
      elif grep -q "PASSED\|FAILED\|SKIPPED" "$TEST_LOG_FILE"; then
        echo "DEBUG: Found individual test results, counting them..." | tee -a "$TEST_LOG_FILE"
        PASSED=$(grep -c "PASSED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
        FAILED=$(grep -c "FAILED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
        SKIPPED=$(grep -c "SKIPPED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
        ERRORS=0  # Individual test results don't show "errors" - they show as "FAILED"
        echo "DEBUG: Individual test counts - PASSED=$PASSED FAILED=$FAILED SKIPPED=$SKIPPED" | tee -a "$TEST_LOG_FILE"
      else
        echo "DEBUG: No test execution detected - treating as setup failure" | tee -a "$TEST_LOG_FILE"
        PASSED=0
        FAILED=0
        ERRORS=1  # Treat as setup failure
      fi
    fi

    # Set defaults if extraction failed
    PASSED=${PASSED:-0}
    FAILED=${FAILED:-0}
    ERRORS=${ERRORS:-0}

    echo "DEBUG: After extraction and defaults - PASSED=$PASSED FAILED=$FAILED ERRORS=$ERRORS" | tee -a "$TEST_LOG_FILE"

    # Calculate total and ratio
    TOTAL=$((PASSED + FAILED + ERRORS))
    RATIO=0
    if [ "$TOTAL" -gt 0 ]; then
      RATIO=$(( 100 * PASSED / TOTAL ))
    fi

    echo "DEBUG: Calculated totals - TOTAL=$TOTAL RATIO=$RATIO%" | tee -a "$TEST_LOG_FILE"
    echo "Parsed test summary: passed=$PASSED failed=$FAILED errors=$ERRORS total=$TOTAL ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"

    # Export variables for use outside the subshell
    echo "PASSED=$PASSED" > /tmp/test_parsing_result
    echo "FAILED=$FAILED" >> /tmp/test_parsing_result
    echo "ERRORS=$ERRORS" >> /tmp/test_parsing_result
    echo "TOTAL=$TOTAL" >> /tmp/test_parsing_result
    echo "RATIO=$RATIO" >> /tmp/test_parsing_result

  ) &
  PARSE_PID=$!

  # Wait for parsing with timeout
  if timeout 30 wait $PARSE_PID 2>/dev/null; then
    echo "DEBUG: Test parsing completed successfully" | tee -a "$TEST_LOG_FILE"
    # Load results from temp file
    if [ -f /tmp/test_parsing_result ]; then
      source /tmp/test_parsing_result
      rm -f /tmp/test_parsing_result
    else
      echo "DEBUG: No parsing results found, using fallback values" | tee -a "$TEST_LOG_FILE"
      PASSED=0
      FAILED=0
      ERRORS=1
      TOTAL=1
      RATIO=0
    fi
  else
    echo "DEBUG: Test parsing timed out, using fallback values" | tee -a "$TEST_LOG_FILE"
    kill $PARSE_PID 2>/dev/null || true
    PASSED=0
    FAILED=0
    ERRORS=1
    TOTAL=1
    RATIO=0
  fi

  echo "Final test parsing results: passed=$PASSED failed=$FAILED errors=$ERRORS total=$TOTAL ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"

  if [ "$TOTAL" -eq 0 ] || [ "$RATIO" -lt 50 ]; then
         echo "Tests below threshold; invoking Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE"
         echo "DEBUG: About to start Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE"
         echo "DEBUG: Current working directory: $(pwd)" | tee -a "$TEST_LOG_FILE"
         echo "DEBUG: Environment check - ANTHROPIC_API_KEY: $([ -n "${ANTHROPIC_API_KEY:-}" ] && echo "SET" || echo "NOT SET")" | tee -a "$TEST_LOG_FILE"
         echo "DEBUG: Environment check - GITHUB_TOKEN: $([ -n "${GITHUB_TOKEN:-}" ] && echo "SET" || echo "NOT SET")" | tee -a "$TEST_LOG_FILE"
         echo "DEBUG: Environment check - CODEFLASH_API_KEY: $([ -n "${CODEFLASH_API_KEY:-}" ] && echo "SET" || echo "NOT SET")" | tee -a "$TEST_LOG_FILE"
         echo "DEBUG: About to call _stage claude_round0_start..." | tee -a "$TEST_LOG_FILE"
         _stage "claude_round0_start"
         echo "DEBUG: Successfully called _stage claude_round0_start" | tee -a "$TEST_LOG_FILE"

     # ============================================================================
     # CLAUDE CODE CLI SETUP INTEGRATION
     # ============================================================================
     # This section replaces the previous Python-based LLM setup helper with
     # Claude Code CLI, which provides a more robust and interactive approach to
     # repository setup. Claude Code CLI can:
     #
     # 1. Analyze repository structure and dependencies
     # 2. Install missing packages and fix import issues
     # 3. Handle custom test runners and build systems
     # 4. Iteratively debug and resolve setup problems
     # 5. Work directly in the terminal with full context
     #
     # The integration includes:
     # - Comprehensive setup prompts with project context
     # - Automatic CLI installation (npm or pip fallback)
     # - Timeout protection (30 min initial, 20 min additional rounds)
     # - Detailed logging of all Claude actions
     # - Graceful fallback if Claude Code CLI is unavailable
     # ============================================================================

     echo "DEBUG: About to create Claude setup prompt..." | tee -a "$TEST_LOG_FILE"
     echo "DEBUG: Checking if /tmp is writable..." | tee -a "$TEST_LOG_FILE"
     if [ -w "/tmp" ]; then
       echo "DEBUG: /tmp is writable" | tee -a "$TEST_LOG_FILE"
     else
       echo "DEBUG: ERROR - /tmp is not writable!" | tee -a "$TEST_LOG_FILE"
     fi

     # Create comprehensive prompt for Claude Code CLI
     echo "DEBUG: Creating prompt file..." | tee -a "$TEST_LOG_FILE"
    cat > /tmp/claude_setup_prompt.md << 'EOF'
# Repository Setup Assistant

You are an expert Python developer tasked with setting up a repository for testing. Your goal is to analyze the repository, install missing dependencies, and ensure tests can run successfully.

## Your Mission
1. **Analyze the repository structure** to understand the project layout
2. **Identify and install missing dependencies** that are causing test failures
3. **Fix common setup issues** like import path problems, missing packages, or configuration issues
4. **Run tests** to verify the setup is working
5. **Achieve at least 50% test pass rate** (you don't need 100% - some tests may legitimately fail)

## Available Tools & Capabilities
- **Bash**: Full terminal access in the repository directory with sudo privileges
- **Edit**: Modify any file in the repository (Python, config files, setup files, etc.)
- **FileManager**: Browse, read, and analyze repository structure and files
- **WebSearch**: Look up documentation, package information, and solutions online
- **Python Environment**: Virtual environment activated at `.venv/` with `.venv/bin/python` and `.venv/bin/pip`
- **System Access**: Can install system packages with `sudo apt-get install`
- **Package Management**: Can install Python packages with pip, conda, or other package managers
- **File Operations**: Can create, modify, delete files and directories
- **Process Management**: Can run tests, scripts, and other processes

## Key Guidelines
- **Focus on missing dependencies**: Look for ImportError, ModuleNotFoundError in test outputs
- **Use project's own install scripts** when available (like `devscripts/install_deps.py`, `setup.py`, etc.)
- **Install from requirements files** if they exist (`requirements.txt`, `requirements-dev.txt`, etc.)
- **Check pyproject.toml** for project dependencies and optional dependencies
- **Handle custom test runners**: Some projects use custom test scripts instead of pytest
- **Fix import path issues**: Add PYTHONPATH exports if needed
- **Install test-specific dependencies**: pytest plugins, coverage tools, etc.

## Common Patterns to Handle
1. **Custom dependency installers**: `python devscripts/install_deps.py`, `pip install -e .`
2. **Test runners with special args**: Projects may have `run_tests.py` or similar
3. **Missing test dependencies**: pytest plugins, mock libraries, etc.
4. **Path issues**: Repository modules not in PYTHONPATH
5. **Optional dependencies**: Install extras like `pip install -e .[test]`

## Effective Tool Usage Strategy
- **Start with FileManager**: Explore repository structure to understand the project layout
- **Use WebSearch**: Look up package documentation, installation guides, and common issues
- **Use Edit strategically**: Modify configuration files, fix import paths, update dependencies
- **Use Bash systematically**: Run commands step by step, check outputs, iterate based on results
- **Combine tools**: Use FileManager to find files, WebSearch to understand them, Edit to fix them, Bash to test

## Advanced Troubleshooting Techniques
- **Version conflicts**: Use `pip install --upgrade` or specific versions
- **Environment issues**: Check Python version, virtual environment activation
- **Permission problems**: Use `sudo` for system packages, check file permissions
- **Network issues**: Use `--timeout` flags, check proxy settings
- **Build failures**: Install build tools like `build-essential`, `python3-dev`
- **Missing system libraries**: `sudo apt-get install -y libffi-dev libssl-dev libxml2-dev`

## Success Criteria
- Tests run without ImportError/ModuleNotFoundError
- At least 50% of tests pass (some failures are acceptable)
- No critical setup errors that prevent test execution

## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions

## Current Context
- Repository: {REPO_URL}
- Tests directory: {TESTS_ROOT}
- Test command: {PYTEST_CMD}
- Previous test output shows dependency/setup issues

## Custom Run Configuration (if applicable)
{CUSTOM_RUN_CONTEXT}

## Recent Test Errors (Summary)
{TEST_ERRORS_SUMMARY}

Start by examining the repository structure and recent test failures, then systematically address the issues.
EOF

    echo "DEBUG: Prompt file created successfully" | tee -a "$TEST_LOG_FILE"
    echo "DEBUG: Prompt file size: $(wc -c < /tmp/claude_setup_prompt.md) bytes" | tee -a "$TEST_LOG_FILE"
    echo "DEBUG: About to replace placeholders in prompt..." | tee -a "$TEST_LOG_FILE"

    # Replace placeholders in prompt
    sed -i "s|{REPO_URL}|${GITHUB_REPO_URL:-unknown}|g" /tmp/claude_setup_prompt.md
    sed -i "s|{TESTS_ROOT}|${TESTS_ROOT_VALUE:-}|g" /tmp/claude_setup_prompt.md
    sed -i "s|{PYTEST_CMD}|${PYTEST_CMD_VALUE:-}|g" /tmp/claude_setup_prompt.md

    # Add custom run context if this is a custom optimization
    CUSTOM_RUN_CONTEXT=""
    if [ "$CUSTOM_RUN_MODE" = true ]; then
        echo "DEBUG: Adding custom run context to Claude prompt..." | tee -a "$TEST_LOG_FILE"
        CUSTOM_RUN_CONTEXT="
### Custom Optimization Configuration
- **Optimization Mode**: ${OPTIMIZATION_MODE:-'not specified'}
- **Custom Module Root**: ${CUSTOM_MODULE_ROOT:-'using default'}
- **Custom Tests Root**: ${CUSTOM_TESTS_ROOT:-'using default'}
- **Async Mode**: ${CUSTOM_ASYNC_MODE:-'default'}
- **Verbose Output**: ${CUSTOM_VERBOSE:-'default'}
- **No Pull Request**: ${CUSTOM_NO_PR:-'default'}
- **Benchmark Mode**: ${CUSTOM_BENCHMARK:-'default'}

### Mode-Specific Targets"

        case "$OPTIMIZATION_MODE" in
            "single_function")
                CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
- **Target File**: ${CUSTOM_FILE_PATH:-'not specified'}
- **Target Function**: ${CUSTOM_FUNCTION_NAME:-'not specified'}
- **Focus**: Ensure the target function's dependencies are properly installed
- **Note**: This is a single function optimization targeting a specific function in a specific file"
                ;;
            "trace_and_optimize")
                CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
- **Target Script**: ${CUSTOM_SCRIPT_PATH:-'not specified'}
- **Trace File**: ${CUSTOM_TRACE_FILE:-'default location'}
- **Tracer Timeout**: ${CUSTOM_TRACER_TIMEOUT:-'default'}
- **Trace Only**: ${CUSTOM_TRACE_ONLY:-'false'}
- **Focus**: Ensure the script/entry point and its dependencies are ready
- **Note**: This will trace execution of a specific script and optimize all functions it calls"
                ;;
            "optimize_all")
                CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
- **Target Directory**: ${CUSTOM_TARGET_DIRECTORY:-'entire codebase'}
- **Focus**: Ensure all project dependencies and test infrastructure are ready
- **Note**: This will analyze and optimize all functions in the project"
                ;;
        esac

        CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}

### Custom Run Setup Requirements
- Pay special attention to any custom configuration requirements for this optimization mode
- Ensure codeflash[asyncio] is installed if async mode is enabled
- Verify that all target files/scripts exist and are accessible
- Install any additional dependencies needed for the specific optimization mode
- Ensure the custom module root and tests root are properly configured"
    else
        CUSTOM_RUN_CONTEXT="No custom run configuration - using standard optimization setup"
    fi

    echo "DEBUG: Custom run context prepared, length: ${#CUSTOM_RUN_CONTEXT}" | tee -a "$TEST_LOG_FILE"

    echo "DEBUG: About to build error summary..." | tee -a "$TEST_LOG_FILE"
    # Build a short error summary from the current test log (first 30 error lines) - simplified approach
    echo "DEBUG: Reading test log file for errors..." | tee -a "$TEST_LOG_FILE"
    TEST_ERRORS_SUMMARY=$(head -n 400 "$TEST_LOG_FILE" 2>/dev/null | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30 | sed 's/|/\|/g' | sed 's/\\/\\\\/g' || echo "No errors found")
    echo "DEBUG: Error summary extracted, length: ${#TEST_ERRORS_SUMMARY}" | tee -a "$TEST_LOG_FILE"
    TEST_ERRORS_SUMMARY=${TEST_ERRORS_SUMMARY:-"No error summary available"}
    echo "DEBUG: About to escape newlines..." | tee -a "$TEST_LOG_FILE"
    # Escape newlines for sed replacement (simplified approach)
    TEST_ERRORS_SUMMARY=$(echo "$TEST_ERRORS_SUMMARY" | tr '\n' '|' | sed 's/|/\\n/g')
    echo "DEBUG: About to replace TEST_ERRORS_SUMMARY placeholder..." | tee -a "$TEST_LOG_FILE"
    sed -i "s|{TEST_ERRORS_SUMMARY}|${TEST_ERRORS_SUMMARY}|g" /tmp/claude_setup_prompt.md

    echo "DEBUG: About to replace CUSTOM_RUN_CONTEXT placeholder..." | tee -a "$TEST_LOG_FILE"
    # Escape newlines for sed replacement
    CUSTOM_RUN_CONTEXT_ESCAPED=$(echo "$CUSTOM_RUN_CONTEXT" | tr '\n' '|' | sed 's/|/\\n/g')
    sed -i "s|{CUSTOM_RUN_CONTEXT}|${CUSTOM_RUN_CONTEXT_ESCAPED}|g" /tmp/claude_setup_prompt.md

    echo "DEBUG: Placeholder replacement completed" | tee -a "$TEST_LOG_FILE"
    echo "DEBUG: About to snapshot environment..." | tee -a "$TEST_LOG_FILE"

    # Snapshot current environment into constraints to avoid breaking pinned deps
    pip freeze > .cf_constraints.txt || true
    echo "DEBUG: Environment snapshot completed" | tee -a "$TEST_LOG_FILE"
    export PIP_CONSTRAINTS="$(pwd)/.cf_constraints.txt"

  # If common system deps are missing based on errors, try lightweight installs (best-effort)
  if grep -q "libGL.so.1" "$TEST_LOG_FILE" 2>/dev/null; then
    echo "Detected missing libGL.so.1; installing headless OpenGL libs (libgl1, libglib2.0-0, libsm6, libxrender1, libxext6)..." | tee -a "$TEST_LOG_FILE"

    # Handle APT lock issues by waiting and retrying
    APT_RETRY_COUNT=0
    APT_MAX_RETRIES=3
    while [ $APT_RETRY_COUNT -lt $APT_MAX_RETRIES ]; do
      if sudo apt-get update -y >/dev/null 2>&1; then
        echo "APT update succeeded on attempt $((APT_RETRY_COUNT + 1))" | tee -a "$TEST_LOG_FILE"
        break
      else
        APT_RETRY_COUNT=$((APT_RETRY_COUNT + 1))
        echo "APT update failed on attempt $APT_RETRY_COUNT, retrying in 10 seconds..." | tee -a "$TEST_LOG_FILE"
        sleep 10
      fi
    done

    # Install packages with retry logic
    APT_RETRY_COUNT=0
    while [ $APT_RETRY_COUNT -lt $APT_MAX_RETRIES ]; do
      if sudo apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 >/dev/null 2>&1; then
        echo "OpenGL libraries installed successfully on attempt $((APT_RETRY_COUNT + 1))" | tee -a "$TEST_LOG_FILE"
        break
      else
        APT_RETRY_COUNT=$((APT_RETRY_COUNT + 1))
        echo "OpenGL library installation failed on attempt $APT_RETRY_COUNT, retrying in 10 seconds..." | tee -a "$TEST_LOG_FILE"
        sleep 10
      fi
    done

    # As a fallback in headless environments, prefer opencv-python-headless to avoid GUI backends
    if pip show opencv-python >/dev/null 2>&1; then
      echo "Installing opencv-python-headless as fallback for headless environment" | tee -a "$TEST_LOG_FILE"
      pip install --upgrade opencv-python-headless || true
    fi
  fi
  if grep -q "No module named 'tkinter'" "$TEST_LOG_FILE" 2>/dev/null; then
    echo "Detected missing tkinter; installing Python Tk..." | tee -a "$TEST_LOG_FILE"
    sudo apt-get update -y >/dev/null 2>&1 || true
    sudo apt-get install -y --no-install-recommends python3-tk || true
  fi
  if grep -q "cannot import name 'Aer' from 'qiskit'" "$TEST_LOG_FILE" 2>/dev/null; then
    echo "Detected missing qiskit-aer; installing..." | tee -a "$TEST_LOG_FILE"
    pip install qiskit-aer || true
  fi

    # Run Claude Code CLI with the setup prompt
    echo "DEBUG: About to start Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
    echo "Starting Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
    echo "DEBUG: About to call _stage claude_setup_init..." | tee -a "$TEST_LOG_FILE"
    _stage "claude_setup_init"
    echo "DEBUG: Successfully called _stage claude_setup_init" | tee -a "$TEST_LOG_FILE"

    # Prepare a session log regardless of availability so FE always finds a file
    echo "DEBUG: About to create Claude log file..." | tee -a "$TEST_LOG_FILE"
    CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-$(date -u +%Y-%m-%dT%H-%M-%S).log"
    echo "DEBUG: Claude log path: $CLAUDE_LOG" | tee -a "$TEST_LOG_FILE"
    touch "$CLAUDE_LOG" 2>/dev/null || true
    echo "DEBUG: Claude log file created" | tee -a "$TEST_LOG_FILE"
    chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
    echo "DEBUG: Claude log file permissions set" | tee -a "$TEST_LOG_FILE"
    echo "[claude] initializing setup session" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
    echo "DEBUG: Initial log message written" | tee -a "$TEST_LOG_FILE"

    # Ensure common local bin directory is in PATH (curl installer often writes here)
    echo "DEBUG: About to update PATH..." | tee -a "$TEST_LOG_FILE"
    export PATH="$HOME/.local/bin:$PATH"
    echo "DEBUG: PATH updated, current PATH: $PATH" | tee -a "$TEST_LOG_FILE"

    # Check if claude (Claude Code CLI) is available, or fallback to npx runner
    echo "DEBUG: About to check for Claude Code CLI availability..." | tee -a "$TEST_LOG_FILE"
    CLAUDE_CMD=""
    echo "Checking for Claude Code CLI availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

    # Check for global claude command first (should be pre-installed)
    echo "DEBUG: Checking for global claude command..." | tee -a "$TEST_LOG_FILE"
    if command -v claude >/dev/null 2>&1; then
      CLAUDE_CMD="claude"
      echo "Found global claude CLI at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: Found global claude command" | tee -a "$TEST_LOG_FILE"
      # Test if it's actually Claude Code CLI
      echo "DEBUG: Testing if global claude is Claude Code CLI..." | tee -a "$TEST_LOG_FILE"
      if timeout 10 claude --version 2>&1 | grep -q "Claude Code"; then
        echo "Confirmed: Global claude is Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: Confirmed global claude is Claude Code CLI" | tee -a "$TEST_LOG_FILE"
      else
        echo "Warning: Global claude may not be Claude Code CLI, will try npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: Global claude is not Claude Code CLI, clearing CLAUDE_CMD" | tee -a "$TEST_LOG_FILE"
        CLAUDE_CMD=""
      fi
    else
      echo "DEBUG: Global claude command not found" | tee -a "$TEST_LOG_FILE"
    fi

    # Check for npx if claude not found or not confirmed
    echo "DEBUG: Checking for npx command..." | tee -a "$TEST_LOG_FILE"
    if [ -z "$CLAUDE_CMD" ] && command -v npx >/dev/null 2>&1; then
      CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
      echo "Will use npx runner for Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: Found npx command, set CLAUDE_CMD to npx" | tee -a "$TEST_LOG_FILE"
      # Test npx availability
      echo "DEBUG: Testing npx availability..." | tee -a "$TEST_LOG_FILE"
      if timeout 10 npx --version >/dev/null 2>&1; then
        echo "npx is available and working" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: npx is working correctly" | tee -a "$TEST_LOG_FILE"
      else
        echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: npx test failed, clearing CLAUDE_CMD" | tee -a "$TEST_LOG_FILE"
        CLAUDE_CMD=""
      fi
    else
      echo "DEBUG: npx not found or CLAUDE_CMD already set" | tee -a "$TEST_LOG_FILE"
    fi

    if [ -z "$CLAUDE_CMD" ]; then
      echo "DEBUG: CLAUDE_CMD is empty, attempting installation..." | tee -a "$TEST_LOG_FILE"
      echo "Claude Code CLI not found; attempting installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Try npm global install first (with proper permissions)
      echo "DEBUG: Checking for npm command..." | tee -a "$TEST_LOG_FILE"
      if command -v npm >/dev/null 2>&1; then
        echo "DEBUG: Found npm command" | tee -a "$TEST_LOG_FILE"
        echo "Installing @anthropic-ai/claude-code via npm..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Try with sudo first (for system-wide install)
        if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "npm install with sudo succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          export PATH="$HOME/.local/bin:$PATH"
          if command -v claude >/dev/null 2>&1; then
            CLAUDE_CMD="claude"
            echo "Found claude CLI after npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        else
          echo "npm install with sudo failed, trying user-level install..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Try user-level install (no sudo)
          if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
            echo "npm user-level install succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            export PATH="$HOME/.local/bin:$PATH"
            if command -v claude >/dev/null 2>&1; then
              CLAUDE_CMD="claude"
              echo "Found claude CLI after user-level install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            fi
          else
            echo "npm user-level install also failed, will rely on npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        fi
      fi

      # If still no claude, try installing Node.js
      if [ -z "$CLAUDE_CMD" ] && command -v apt-get >/dev/null 2>&1; then
          echo "Installing Node.js LTS to enable Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          sudo apt-get update -y >/dev/null 2>&1 || true

        # Install Node.js repository
          if command -v curl >/dev/null 2>&1; then
          echo "Adding Node.js repository..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - 2>&1 | tee -a "$CLAUDE_LOG" || true
          fi

        # Install Node.js
        echo "Installing Node.js..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if sudo apt-get install -y nodejs 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Node.js installation succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Try npm install again with proper permission handling
          if command -v npm >/dev/null 2>&1; then
            echo "Retrying npm install after Node.js installation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

            # Try with sudo first
            if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
              echo "npm install with sudo succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
              export PATH="$HOME/.local/bin:$PATH"
              if command -v claude >/dev/null 2>&1; then
                CLAUDE_CMD="claude"
                echo "Found claude CLI after Node.js + npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
              fi
            else
              echo "npm install with sudo failed after Node.js install, trying user-level..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

              # Try user-level install
              if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
                echo "npm user-level install succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
                export PATH="$HOME/.local/bin:$PATH"
                if command -v claude >/dev/null 2>&1; then
                  CLAUDE_CMD="claude"
                  echo "Found claude CLI after Node.js + user-level npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
                fi
              else
                echo "npm user-level install also failed after Node.js install, will use npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
              fi
            fi
          fi
        else
          echo "Node.js installation failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      fi

      # Final check for available commands
      export PATH="$HOME/.local/bin:$PATH"
      if command -v claude >/dev/null 2>&1; then
        CLAUDE_CMD="claude"
        echo "Found claude CLI after installation at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      elif command -v npx >/dev/null 2>&1; then
        CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
        echo "Will use npx runner for Claude Code CLI after installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Test npx availability with a simple command
        echo "Testing npx availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if timeout 30 npx --version 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Test if we can actually run the Claude Code CLI via npx
          echo "Testing Claude Code CLI via npx..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          if timeout 60 npx -y @anthropic-ai/claude-code --version 2>&1 | tee -a "$CLAUDE_LOG"; then
            echo "Claude Code CLI via npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          else
            echo "Warning: Claude Code CLI via npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "This could be due to network issues or package availability" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        else
          echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      fi
    fi

    # Guard: if still unavailable, skip gracefully
    echo "DEBUG: Final check - CLAUDE_CMD value: '${CLAUDE_CMD:-EMPTY}'" | tee -a "$TEST_LOG_FILE"
    if [ -z "$CLAUDE_CMD" ]; then
      echo "DEBUG: CLAUDE_CMD is still empty, skipping setup assistance" | tee -a "$TEST_LOG_FILE"
      echo "❌ Claude Code CLI unavailable; skipping setup assistance" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "All installation attempts failed:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "  - Global npm install failed (permission issues)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "  - User-level npm install failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "  - npx fallback not available" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Continuing without Claude Code CLI assistance..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: About to call _stage claude_round0_end with unavailable flag..." | tee -a "$TEST_LOG_FILE"
      _stage "claude_round0_end" "\"rc\":127,\"unavailable\":true"
      echo "DEBUG: Successfully called _stage claude_round0_end" | tee -a "$TEST_LOG_FILE"
    else
      echo "DEBUG: CLAUDE_CMD is set, proceeding with Claude Code CLI execution" | tee -a "$TEST_LOG_FILE"
      echo "✅ Claude Code CLI is available and ready to use" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Using command: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      # Ensure Claude Code CLI is authenticated (headless)
      echo "DEBUG: About to check ANTHROPIC_API_KEY..." | tee -a "$TEST_LOG_FILE"
      if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
        echo "DEBUG: ANTHROPIC_API_KEY is not set" | tee -a "$TEST_LOG_FILE"
        echo "ANTHROPIC_API_KEY not set; Claude CLI may fail to authenticate" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Setting ANTHROPIC_API_KEY environment variable for Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
        echo "DEBUG: Set ANTHROPIC_API_KEY to empty value" | tee -a "$TEST_LOG_FILE"
      else
        echo "DEBUG: ANTHROPIC_API_KEY is set" | tee -a "$TEST_LOG_FILE"
        echo "ANTHROPIC_API_KEY is set; configuring Claude CLI authentication..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        # Best-effort non-interactive auth via config (ignore failures)
        echo "Attempting to set API key via Claude CLI config..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: About to run Claude CLI config command..." | tee -a "$TEST_LOG_FILE"
        # Note: API key is passed to Claude CLI config but not logged for security
        if timeout 10 $CLAUDE_CMD config set api_key "${ANTHROPIC_API_KEY}" 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Claude CLI API key configuration succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "DEBUG: Claude CLI config command succeeded" | tee -a "$TEST_LOG_FILE"
        else
          echo "Claude CLI API key configuration failed, will rely on environment variable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "DEBUG: Claude CLI config command failed" | tee -a "$TEST_LOG_FILE"
        fi
        # Also set as environment variable as backup
        export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY}"
        echo "DEBUG: Exported ANTHROPIC_API_KEY as environment variable" | tee -a "$TEST_LOG_FILE"
      fi

      # Run Claude Code CLI with the setup prompt using print mode for automation
      echo "DEBUG: About to run Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
      echo "Running Claude Code CLI setup session using: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "=== CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: About to display prompt content..." | tee -a "$TEST_LOG_FILE"
      cat /tmp/claude_setup_prompt.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "=== END CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: Prompt content displayed successfully" | tee -a "$TEST_LOG_FILE"
      REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"
      echo "DEBUG: Set REPO_DIR to: $REPO_DIR" | tee -a "$TEST_LOG_FILE"

      # Pre-flight checks for Claude Code CLI
      echo "DEBUG: About to perform pre-flight checks..." | tee -a "$TEST_LOG_FILE"
      echo "Performing pre-flight checks for Claude Code CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Check if ANTHROPIC_API_KEY is set
      if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
        echo "❌ ANTHROPIC_API_KEY is not set - Claude Code CLI will fail" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Setting a placeholder API key to prevent immediate crash..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        # Note: Placeholder key is set but not logged for security
        export ANTHROPIC_API_KEY="placeholder-key-for-testing"
      else
        echo "✅ ANTHROPIC_API_KEY is set" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Check if repository directory exists and is writable
      if [ -d "$REPO_DIR" ]; then
        if [ -w "$REPO_DIR" ]; then
          echo "✅ Repository directory $REPO_DIR exists and is writable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "⚠️ Repository directory $REPO_DIR exists but is not writable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Attempting to fix permissions..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          chmod -R 755 "$REPO_DIR" 2>/dev/null || true
        fi
      else
        echo "❌ Repository directory $REPO_DIR does not exist" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Creating directory..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        mkdir -p "$REPO_DIR" 2>/dev/null || true
      fi

      # Check Claude Code CLI installation
      echo "Testing Claude Code CLI installation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      if command -v "$CLAUDE_CMD" >/dev/null 2>&1; then
        echo "✅ Claude Code CLI command found: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "❌ Claude Code CLI command not found: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "This will likely cause immediate failure" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Build Claude Code CLI command with proper arguments
      # Note: Claude Code CLI uses different argument structure than expected
      CLAUDE_BASE_ARGS=""

      # Add model if specified
      if [ -n "${ANTHROPIC_MODEL:-}" ]; then
        CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --model ${ANTHROPIC_MODEL}"
      fi

      # Add directory if it exists
      if [ -d "$REPO_DIR" ]; then
        CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --add-dir $REPO_DIR"
      fi

      # Set up different flag combinations for different CLI versions
      CLAUDE_FLAGS_PERM="$CLAUDE_BASE_ARGS --print --output-format text --max-turns 60 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit,FileManager,WebSearch"
      CLAUDE_FLAGS_MIN="$CLAUDE_BASE_ARGS --print --output-format text --max-turns 60"
      CLAUDE_FLAGS_BASIC="$CLAUDE_BASE_ARGS --print --output-format text"

      echo "Executing: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md" | sed 's/  */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Test Claude CLI is working with a simple command first
      echo "Testing Claude CLI availability (version)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Handle npx differently as it may take longer to download and run
      if [[ "$CLAUDE_CMD" == npx* ]]; then
        echo "Testing npx-based Claude CLI (may take longer for first run)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if timeout 120 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
          echo "Claude CLI via npx version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "Claude CLI via npx version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "This is common for npx on first run due to package download time" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      else
      if timeout 30 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
        echo "Claude CLI version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "Claude CLI version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      fi

      # Use timeout to enforce a hard limit; wait synchronously so tests run after it finishes/timeout
      echo "DEBUG: About to set error handling flags..." | tee -a "$TEST_LOG_FILE"
      set +e  # Don't exit on failure
      set -o pipefail
      echo "DEBUG: Error handling flags set" | tee -a "$TEST_LOG_FILE"

      # Try with full permissions first (pipe prompt via stdin to avoid argument parsing issues)
      echo "DEBUG: About to attempt Claude CLI execution..." | tee -a "$TEST_LOG_FILE"
      echo "Attempting Claude CLI execution with elevated permissions (non-interactive)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      CLAUDE_EXIT_CODE=1
      echo "DEBUG: Set CLAUDE_EXIT_CODE to 1" | tee -a "$TEST_LOG_FILE"

      # First attempt: Try with full permissions
      # Use longer timeout for npx as it may need to download packages
      echo "DEBUG: Setting timeout duration..." | tee -a "$TEST_LOG_FILE"
      TIMEOUT_DURATION=2700
      if [[ "$CLAUDE_CMD" == npx* ]]; then
        TIMEOUT_DURATION=3600  # 60 minutes for npx (includes download time)
        echo "Using extended timeout (60 min) for npx-based Claude CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: Set extended timeout for npx" | tee -a "$TEST_LOG_FILE"
      else
        echo "DEBUG: Using standard timeout (45 min)" | tee -a "$TEST_LOG_FILE"
      fi
      echo "DEBUG: Final timeout duration: $TIMEOUT_DURATION seconds" | tee -a "$TEST_LOG_FILE"

      # Create a debug log file for Claude Code CLI
      echo "DEBUG: About to create Claude debug log..." | tee -a "$TEST_LOG_FILE"
      CLAUDE_DEBUG_LOG="/tmp/claude_debug_$(date +%s).log"
      echo "Creating Claude Code CLI debug log: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: Claude debug log path: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE"

      # Pre-execution environment validation
      echo "🔍 Pre-execution environment validation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Working directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Python version: $(python --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Pip version: $(pip --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Virtualenv version: $(virtualenv --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Claude CLI version: $($CLAUDE_CMD --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Available disk space: $(df -h . | tail -1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Memory usage: $(free -h | grep Mem)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "Repository size: $(du -sh $REPO_DIR 2>/dev/null || echo 'Unknown')" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      if [ -d "$REPO_DIR" ]; then
        echo "DEBUG: Repository directory exists, executing Claude CLI in repo dir..." | tee -a "$TEST_LOG_FILE"
        echo "Executing Claude CLI in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Command: ( cd \"$REPO_DIR\" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md )" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: About to execute Claude CLI command..." | tee -a "$TEST_LOG_FILE"

        # Execute with detailed error capture
        {
          echo "=== CLAUDE CODE CLI EXECUTION START ===" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Working directory: $REPO_DIR" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Command: $CLAUDE_CMD $CLAUDE_FLAGS_PERM" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Environment variables:" | tee -a "$CLAUDE_DEBUG_LOG"
          _safe_log_env "(ANTHROPIC|PATH|NODE)" "$CLAUDE_DEBUG_LOG"
          echo "=== EXECUTION OUTPUT ===" | tee -a "$CLAUDE_DEBUG_LOG"

          echo "DEBUG: Starting Claude CLI execution..." | tee -a "$TEST_LOG_FILE"
          ( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
        CLI_STATUS=${PIPESTATUS[0]}
          echo "DEBUG: Claude CLI execution completed with exit code: $CLI_STATUS" | tee -a "$TEST_LOG_FILE"

          echo "=== EXECUTION END ===" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Exit code: $CLI_STATUS" | tee -a "$CLAUDE_DEBUG_LOG"
        } | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "DEBUG: Repository directory does not exist, executing Claude CLI in current directory..." | tee -a "$TEST_LOG_FILE"
        echo "Executing Claude CLI in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Command: timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: About to execute Claude CLI command in current directory..." | tee -a "$TEST_LOG_FILE"

        # Execute with detailed error capture
        {
          echo "=== CLAUDE CODE CLI EXECUTION START ===" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Working directory: $(pwd)" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Command: $CLAUDE_CMD $CLAUDE_FLAGS_PERM" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Environment variables:" | tee -a "$CLAUDE_DEBUG_LOG"
          _safe_log_env "(ANTHROPIC|PATH|NODE)" "$CLAUDE_DEBUG_LOG"
          echo "=== EXECUTION OUTPUT ===" | tee -a "$CLAUDE_DEBUG_LOG"

          echo "DEBUG: Starting Claude CLI execution in current directory..." | tee -a "$TEST_LOG_FILE"
          timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
        CLI_STATUS=${PIPESTATUS[0]}
          echo "DEBUG: Claude CLI execution completed with exit code: $CLI_STATUS" | tee -a "$TEST_LOG_FILE"

          echo "=== EXECUTION END ===" | tee -a "$CLAUDE_DEBUG_LOG"
          echo "Exit code: $CLI_STATUS" | tee -a "$CLAUDE_DEBUG_LOG"
        } | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Ensure CLI_STATUS is always defined to prevent unbound variable error
      echo "DEBUG: About to ensure CLI_STATUS is defined..." | tee -a "$TEST_LOG_FILE"
      CLI_STATUS=${CLI_STATUS:-1}
      CLAUDE_EXIT_CODE=$CLI_STATUS
      echo "DEBUG: CLI_STATUS set to: $CLI_STATUS" | tee -a "$TEST_LOG_FILE"
      echo "DEBUG: CLAUDE_EXIT_CODE set to: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE"
      echo "Claude CLI attempt 1 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

      # Enhanced error handling and retry logic
      echo "DEBUG: About to check Claude CLI exit code..." | tee -a "$TEST_LOG_FILE"
      if [ $CLAUDE_EXIT_CODE -ne 0 ]; then
        echo "DEBUG: Claude CLI failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE"
        echo "❌ Claude CLI attempt 1 failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Analyze failure and apply fixes
        echo "🔍 Analyzing failure and applying fixes..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Check for common failure patterns and fix them
        if grep -q "Permission denied" "$CLAUDE_DEBUG_LOG"; then
          echo "🔧 Fixing permission issues..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          chmod -R 755 "$REPO_DIR" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
        fi

        if grep -q "No space left" "$CLAUDE_DEBUG_LOG"; then
          echo "🔧 Cleaning up disk space..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          pip cache purge 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
          rm -rf /tmp/* 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
        fi

        # Retry with different approach
        echo "🔄 Retrying Claude CLI with alternative configuration..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Second attempt with reduced complexity
        CLAUDE_FLAGS_RETRY="--model claude-3-5-haiku-20241022 --add-dir \"$REPO_DIR\" --print --output-format text --max-turns 40 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit,FileManager"

        if [ -d "$REPO_DIR" ]; then
          ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_RETRY < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_RETRY < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi

        CLAUDE_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI attempt 2 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        if [ $CLAUDE_EXIT_CODE -ne 0 ]; then
          echo "❌ Claude CLI attempt 2 also failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Final retry with minimal configuration
          echo "🔄 Final retry with minimal Claude CLI configuration..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          CLAUDE_FLAGS_MINIMAL="--model claude-3-5-haiku-20241022 --add-dir \"$REPO_DIR\" --print --output-format text --max-turns 20 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit"

          if [ -d "$REPO_DIR" ]; then
            ( cd "$REPO_DIR" && timeout 900 $CLAUDE_CMD $CLAUDE_FLAGS_MINIMAL < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          else
            timeout 900 $CLAUDE_CMD $CLAUDE_FLAGS_MINIMAL < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          fi

          CLAUDE_EXIT_CODE=$CLI_STATUS
          echo "Claude CLI attempt 3 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          if [ $CLAUDE_EXIT_CODE -ne 0 ]; then
            echo "❌ All Claude CLI attempts failed. Final exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

            # Provide detailed failure analysis
            echo "📊 Claude CLI Failure Analysis:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "  - Exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "  - Repository: $GITHUB_REPO_URL" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "  - Test command: $PYTEST_CMD_VALUE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "  - Working directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "  - Claude command: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            echo "  - Debug log: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          else
            echo "✅ Claude Code CLI session finished successfully on final retry" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          fi
        else
          echo "✅ Claude Code CLI session finished successfully on retry" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      else
        echo "✅ Claude Code CLI session finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Second attempt: Try with fewer flags if first attempt failed
      if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
        echo "Retrying Claude CLI with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if [ -d "$REPO_DIR" ]; then
          ( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi
        CLAUDE_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI attempt 2 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Third attempt: Try with minimal flags if second attempt failed
      if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
        echo "Retrying Claude CLI with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        if [ -d "$REPO_DIR" ]; then
          ( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi
        CLAUDE_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI attempt 3 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      _stage "claude_round0_end" "\"rc\":$CLAUDE_EXIT_CODE"

      set -e
      trap - ERR

      if [ $CLAUDE_EXIT_CODE -eq 0 ]; then
        echo "✅ Claude Code CLI session finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Claude CLI completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      elif [ $CLAUDE_EXIT_CODE -eq 124 ]; then
        TIMEOUT_MINUTES=$((TIMEOUT_DURATION / 60))
        echo "⏰ Claude Code CLI session timed out after $TIMEOUT_DURATION seconds ($TIMEOUT_MINUTES minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "This is normal for complex setup tasks; continuing with post-CLAUDE tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      else
        echo "❌ Claude Code CLI session failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Debug log available at: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "Possible causes:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "  - Virtualenv version conflicts (like the one detected earlier)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Attempt to fix virtualenv issue if detected
        if grep -q "virtualenv.*missing.*requires" "$TEST_LOG_FILE" || grep -q "virtualenv.*20\." "$TEST_LOG_FILE"; then
          echo "🔧 Detected virtualenv version conflict - attempting to fix..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Installing compatible virtualenv version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Try multiple approaches to fix virtualenv
          echo "Attempting to downgrade virtualenv to compatible version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          pip install "virtualenv<20.26.3" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true

          # Try uninstalling and reinstalling
          echo "Attempting to reinstall virtualenv..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          pip uninstall -y virtualenv 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
          pip install "virtualenv<20.26.3" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true

          # Try installing specific version that works with tox
          echo "Attempting to install specific virtualenv version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          pip install "virtualenv==20.26.2" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true

          echo "Virtualenv fix attempt completed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi

        # Attempt to fix missing command issues
        if grep -q "command not found" "$TEST_LOG_FILE"; then
          echo "🔧 Detected missing command - attempting to fix..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

          # Check for specific missing commands and install them
          if grep -q "bench: command not found" "$TEST_LOG_FILE"; then
            echo "Installing frappe-bench..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            pip install frappe-bench 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true

            # Also try installing frappe-bench via system package manager
            echo "Attempting to install frappe-bench via system package manager..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            sudo apt-get update -y >/dev/null 2>&1 || true
            sudo apt-get install -y frappe-bench 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true

            # Try installing frappe-bench via pip with specific version
            echo "Attempting to install frappe-bench with specific version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            pip install "frappe-bench>=5.0.0" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
          fi

          if grep -q "tox: command not found" "$TEST_LOG_FILE"; then
            echo "Installing tox..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            pip install tox 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
          fi

          # Generic missing command handler
          MISSING_CMD=$(grep "command not found" "$TEST_LOG_FILE" | head -1 | awk '{print $1}' | sed 's/://')
          if [ -n "$MISSING_CMD" ] && [ "$MISSING_CMD" != "bench" ] && [ "$MISSING_CMD" != "tox" ]; then
            echo "Attempting to install missing command: $MISSING_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

            # Try pip first
            pip install "$MISSING_CMD" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true

            # Try system package manager
            sudo apt-get update -y >/dev/null 2>&1 || true
            sudo apt-get install -y "$MISSING_CMD" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
          fi

          echo "Missing command fix attempt completed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi

        echo "Continuing with post-CLAUDE tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi
    fi

    echo "DEBUG: About to clean up prompt file..." | tee -a "$TEST_LOG_FILE"
    # Clean up prompt file
    rm -f /tmp/claude_setup_prompt.md
    echo "DEBUG: Prompt file cleaned up" | tee -a "$TEST_LOG_FILE"

    echo "DEBUG: About to start post-LLM tests..." | tee -a "$TEST_LOG_FILE"
    _stage "post_llm_tests_start"
    echo "Re-running full tests after Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
    echo "Using timeout protection (30 minutes) to prevent hanging tests..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
    echo "DEBUG: About to set error handling for post-LLM tests..." | tee -a "$TEST_LOG_FILE"
    set +e

# Use timeout to prevent hanging tests
echo "DEBUG: Setting post-LLM test timeout..." | tee -a "$TEST_LOG_FILE"
POST_LLM_TEST_TIMEOUT=1800  # 30 minutes
TEST_RC=124  # Default to timeout
echo "DEBUG: Post-LLM test timeout: $POST_LLM_TEST_TIMEOUT seconds" | tee -a "$TEST_LOG_FILE"

echo "DEBUG: About to check tests directory..." | tee -a "$TEST_LOG_FILE"
    if [ -d "${TESTS_ROOT_VALUE}" ]; then
  echo "DEBUG: Tests directory exists: ${TESTS_ROOT_VALUE}" | tee -a "$TEST_LOG_FILE"
      echo "DEBUG: About to check if command is pytest runner..." | tee -a "$TEST_LOG_FILE"
      if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
        echo "DEBUG: Command is pytest runner" | tee -a "$TEST_LOG_FILE"
        if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
          echo "DEBUG: Command already includes tests directory" | tee -a "$TEST_LOG_FILE"
          echo "Running post-LLM test: ${PYTEST_CMD_RUN} (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "DEBUG: About to execute post-LLM test..." | tee -a "$TEST_LOG_FILE"
          timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
          TEST_RC=${PIPESTATUS[0]}
          echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
        else
          echo "DEBUG: Command does not include tests directory, appending it" | tee -a "$TEST_LOG_FILE"
          echo "Running post-LLM test: ${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/ (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "DEBUG: About to execute post-LLM test with tests directory..." | tee -a "$TEST_LOG_FILE"
          timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/'" | tee -a "$TEST_LOG_FILE"
          TEST_RC=${PIPESTATUS[0]}
          echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
        fi
      else
        echo "DEBUG: Command is not pytest runner" | tee -a "$TEST_LOG_FILE"
        echo "Running post-LLM test: ${PYTEST_CMD_RUN} (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "DEBUG: About to execute post-LLM test as-is..." | tee -a "$TEST_LOG_FILE"
        timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
        TEST_RC=${PIPESTATUS[0]}
        echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
      fi
    else
      echo "DEBUG: Tests directory does not exist" | tee -a "$TEST_LOG_FILE"
      echo "Running post-LLM test: ${PYTEST_CMD_RUN} (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      echo "DEBUG: About to execute post-LLM test without tests directory..." | tee -a "$TEST_LOG_FILE"
      timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
    TEST_RC=${PIPESTATUS[0]}
      echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
    fi

# Log timeout result
echo "DEBUG: About to check for timeout..." | tee -a "$TEST_LOG_FILE"
if [ $TEST_RC -eq 124 ]; then
  echo "DEBUG: Post-LLM tests timed out" | tee -a "$TEST_LOG_FILE"
  echo "⚠️ Post-LLM tests timed out after ${POST_LLM_TEST_TIMEOUT} seconds" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
  echo "This may indicate hanging tests or very slow test execution" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
  echo "DEBUG: Post-LLM tests did not timeout" | tee -a "$TEST_LOG_FILE"
fi

echo "DEBUG: About to check for argparse errors..." | tee -a "$TEST_LOG_FILE"
    # Fallback if run_tests.py argparse error persists
    if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
  echo "DEBUG: Detected argparse error, falling back to pytest" | tee -a "$TEST_LOG_FILE"
      echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
      if [ -d "${TESTS_ROOT_VALUE}" ]; then
        pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
      else
        pytest -q | tee -a "$TEST_LOG_FILE"
      fi
      TEST_RC=${PIPESTATUS[0]}
  echo "DEBUG: Fallback pytest completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
else
  echo "DEBUG: No argparse errors detected" | tee -a "$TEST_LOG_FILE"
    fi

echo "DEBUG: About to set error handling..." | tee -a "$TEST_LOG_FILE"
    set -e
echo "DEBUG: Error handling set" | tee -a "$TEST_LOG_FILE"
    echo "Post-LLM tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to call _stage post_llm_tests_end..." | tee -a "$TEST_LOG_FILE"
    _stage "post_llm_tests_end" "\"rc\":$TEST_RC"
echo "DEBUG: Successfully called _stage post_llm_tests_end" | tee -a "$TEST_LOG_FILE"

    # Persist exit code after post-LLM run
echo "DEBUG: About to persist exit code..." | tee -a "$TEST_LOG_FILE"
if [ -n "${EXIT_FILE:-}" ]; then
  echo "DEBUG: EXIT_FILE is set, writing exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
  echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true
else
  echo "DEBUG: EXIT_FILE is not set" | tee -a "$TEST_LOG_FILE"
fi
echo "DEBUG: Exit code persistence completed" | tee -a "$TEST_LOG_FILE"
    # If conftest import path mismatch detected, enable importlib mode for next runs
    if grep -q "ImportPathMismatchError: ('.*conftest'" "$TEST_LOG_FILE"; then
      echo "Detected conftest import path mismatch; enabling --import-mode=importlib for subsequent pytest runs" | tee -a "$TEST_LOG_FILE"
      export PYTEST_ADDOPTS="--import-mode=importlib ${PYTEST_ADDOPTS:-}"
    fi
    # Re-evaluate pass ratio; if still below threshold, run additional setup rounds
    echo "DEBUG: About to re-evaluate pass ratio..." | tee -a "$TEST_LOG_FILE"
    SETUP_MAX_ROUNDS=${LLM_SETUP_MAX_ROUNDS:-2}
    ROUND=0
    echo "DEBUG: SETUP_MAX_ROUNDS: $SETUP_MAX_ROUNDS" | tee -a "$TEST_LOG_FILE"
    echo "DEBUG: Starting additional rounds loop..." | tee -a "$TEST_LOG_FILE"
    while : ; do
      echo "DEBUG: Evaluating pass ratio for round $ROUND..." | tee -a "$TEST_LOG_FILE"

      # Add timeout protection for test parsing to prevent hanging
      echo "DEBUG: Starting test parsing with timeout protection for round $ROUND..." | tee -a "$TEST_LOG_FILE"

      # Use a subshell with timeout to prevent hanging
      (
        # Extract test counts with corrected regex patterns
        # Use grep to extract the number immediately before the keyword (handles ANSI codes)
        PASSED=$(grep -oE '[0-9]+ passed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
        FAILED=$(grep -oE '[0-9]+ failed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
        ERRORS=$(grep -oE '[0-9]+ errors' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')

        # Debug: Show what each regex extracted
        echo "DEBUG: Raw extracted values for round $ROUND - PASSED='$PASSED' FAILED='$FAILED' ERRORS='$ERRORS'" | tee -a "$TEST_LOG_FILE"

        # If no summary line found (interrupted test run), count individual test results
        if [ -z "$PASSED" ] && [ -z "$FAILED" ] && [ -z "$ERRORS" ]; then
          echo "DEBUG: No test summary found for round $ROUND, analyzing test execution..." | tee -a "$TEST_LOG_FILE"

          # Check if tests actually ran by looking for common test execution indicators
          if grep -q "No such file or directory\|command not found\|make: \*\*\*\|ERROR collecting\|Interrupted:" "$TEST_LOG_FILE"; then
            echo "DEBUG: Detected build/test command failure for round $ROUND - tests never executed properly" | tee -a "$TEST_LOG_FILE"
            PASSED=0
            FAILED=0
            ERRORS=1  # Treat command failure as an error
          elif grep -q "PASSED\|FAILED\|SKIPPED" "$TEST_LOG_FILE"; then
            echo "DEBUG: Found individual test results for round $ROUND, counting them..." | tee -a "$TEST_LOG_FILE"
            PASSED=$(grep -c "PASSED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
            FAILED=$(grep -c "FAILED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
            SKIPPED=$(grep -c "SKIPPED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
            ERRORS=0  # Individual test results don't show "errors" - they show as "FAILED"
            echo "DEBUG: Individual test counts for round $ROUND - PASSED=$PASSED FAILED=$FAILED SKIPPED=$SKIPPED" | tee -a "$TEST_LOG_FILE"
          else
            echo "DEBUG: No test execution detected for round $ROUND - treating as setup failure" | tee -a "$TEST_LOG_FILE"
            PASSED=0
            FAILED=0
            ERRORS=1  # Treat as setup failure
          fi
        fi

        PASSED=${PASSED:-0}
        FAILED=${FAILED:-0}
        ERRORS=${ERRORS:-0}
        TOTAL=$((PASSED + FAILED + ERRORS))
        RATIO=0
        if [ "$TOTAL" -gt 0 ]; then
          RATIO=$(( 100 * PASSED / TOTAL ))
        fi

        echo "DEBUG: Calculated totals for round $ROUND - TOTAL=$TOTAL RATIO=$RATIO%" | tee -a "$TEST_LOG_FILE"

        # Export variables for use outside the subshell
        echo "PASSED=$PASSED" > /tmp/test_parsing_result_round_$ROUND
        echo "FAILED=$FAILED" >> /tmp/test_parsing_result_round_$ROUND
        echo "ERRORS=$ERRORS" >> /tmp/test_parsing_result_round_$ROUND
        echo "TOTAL=$TOTAL" >> /tmp/test_parsing_result_round_$ROUND
        echo "RATIO=$RATIO" >> /tmp/test_parsing_result_round_$ROUND

      ) &
      PARSE_PID=$!

      # Wait for parsing with timeout
      if timeout 30 wait $PARSE_PID 2>/dev/null; then
        echo "DEBUG: Test parsing completed successfully for round $ROUND" | tee -a "$TEST_LOG_FILE"
        # Load results from temp file
        if [ -f /tmp/test_parsing_result_round_$ROUND ]; then
          source /tmp/test_parsing_result_round_$ROUND
          rm -f /tmp/test_parsing_result_round_$ROUND
        else
          echo "DEBUG: No parsing results found for round $ROUND, using fallback values" | tee -a "$TEST_LOG_FILE"
          PASSED=0
          FAILED=0
          ERRORS=1
          TOTAL=1
          RATIO=0
        fi
      else
        echo "DEBUG: Test parsing timed out for round $ROUND, using fallback values" | tee -a "$TEST_LOG_FILE"
        kill $PARSE_PID 2>/dev/null || true
        PASSED=0
        FAILED=0
        ERRORS=1
        TOTAL=1
        RATIO=0
      fi

      echo "DEBUG: Pass ratio evaluation - passed=$PASSED failed=$FAILED errors=$ERRORS total=$TOTAL ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
      echo "Post-LLM summary: passed=$PASSED failed=$FAILED errors=$ERRORS ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
      if [ "$TOTAL" -gt 0 ] && [ "$RATIO" -ge 50 ]; then
        echo "DEBUG: Pass ratio is above threshold (${RATIO}% >= 50%), breaking loop" | tee -a "$TEST_LOG_FILE"
        break
      fi
      if [ "$ROUND" -ge "$SETUP_MAX_ROUNDS" ]; then
        echo "DEBUG: Max rounds reached ($ROUND >= $SETUP_MAX_ROUNDS), exiting" | tee -a "$TEST_LOG_FILE"
        echo "Tests still below threshold after $ROUND additional rounds. Skipping optimization." | tee -a "$TEST_LOG_FILE"
        exit 4
      fi
      ROUND=$((ROUND + 1))
      echo "DEBUG: Starting round $ROUND..." | tee -a "$TEST_LOG_FILE"
      _stage "claude_round_start" "\"round\":$ROUND"
      echo "Starting additional Claude Code CLI setup round $ROUND..." | tee -a "$TEST_LOG_FILE"

      # Create focused prompt for additional round
      cat > /tmp/claude_setup_round_${ROUND}.md << EOF
# Repository Setup Assistant - Round $ROUND

You are fixing repository setup issues. Previous attempts have been made but tests are still failing.

## Current Situation
- This is setup round $ROUND of maximum $SETUP_MAX_ROUNDS
- Previous rounds have attempted to fix dependencies and setup issues
- Tests are still below 50% pass rate

## Your Focus This Round
1. **Analyze recent test failures** - look at the latest test output for new clues
2. **Try different approaches** - if pip installs didn't work, try other methods
3. **Check for version conflicts** - some packages might need specific versions
4. **Look for missing system dependencies** - some Python packages need system libs
5. **Consider alternative test commands** - the project might use a different test runner
6. **Use advanced troubleshooting** - check logs, try different Python versions, modify config files
7. **Leverage all available tools** - FileManager to explore, WebSearch for solutions, Edit to fix files

## Available Information
- Repository: ${GITHUB_REPO_URL:-unknown}
- Tests directory: ${TESTS_ROOT_VALUE:-test}
- Test command: ${PYTEST_CMD_VALUE:-pytest}
- Round: $ROUND/$SETUP_MAX_ROUNDS

## Strategies to Try
- **Explore with FileManager**: Check setup.py, pyproject.toml, requirements files, tox.ini
- **WebSearch for solutions**: Look up specific error messages, package installation guides
- **Edit configuration files**: Modify setup files, fix import paths, update dependencies
- **Try alternative installation methods**: conda, mamba, system packages, different pip flags
- **Install development/test extras**: `pip install -e .[dev,test]`, `pip install -e .[all]`
- **Check for version conflicts**: Use specific package versions, upgrade/downgrade packages
- **System-level fixes**: Install missing system libraries, fix permissions, environment variables
- **Test isolation**: Run individual test files to identify specific failing components

## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions

## Recent Test Errors (Summary)
$(sed -n '1,400p' "$TEST_LOG_FILE" | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30)

Focus on getting tests to run successfully, even if not all pass.
EOF

      # Run Claude Code CLI for additional round
      CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-round-${ROUND}-$(date -u +%Y-%m-%dT%H-%M-%S).log"
      touch "$CLAUDE_LOG" 2>/dev/null || true
      chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
      if [ -n "$CLAUDE_CMD" ]; then
        echo "=== CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        cat /tmp/claude_setup_round_${ROUND}.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        echo "=== END CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"

        # Build Claude Code CLI command for additional rounds
        CLAUDE_ROUND_BASE_ARGS=""

        # Add model if specified
        if [ -n "${ANTHROPIC_MODEL:-}" ]; then
          CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --model ${ANTHROPIC_MODEL}"
        fi

        # Add directory if it exists
        if [ -d "$REPO_DIR" ]; then
          CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --add-dir $REPO_DIR"
        fi

        # Set up different flag combinations for additional rounds
        CLAUDE_FLAGS_ROUND_PERM="$CLAUDE_ROUND_BASE_ARGS --print --output-format text --max-turns 50 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit,FileManager,WebSearch"
        CLAUDE_FLAGS_ROUND_MIN="$CLAUDE_ROUND_BASE_ARGS --print --output-format text --max-turns 50"
        CLAUDE_FLAGS_ROUND_BASIC="$CLAUDE_ROUND_BASE_ARGS --print --output-format text"

        echo "Executing round $ROUND: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md" | sed 's/  */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        set +e  # Don't exit on failure
        set -o pipefail

        # Try with full permissions first
        echo "Attempting Claude CLI round $ROUND with elevated permissions..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        CLAUDE_ROUND_EXIT_CODE=1

        # First attempt: Try with full permissions
        if [ -d "$REPO_DIR" ]; then
          echo "Executing Claude CLI round $ROUND in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        else
          echo "Executing Claude CLI round $ROUND in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          CLI_STATUS=${PIPESTATUS[0]}
        fi
        CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
        echo "Claude CLI round $ROUND attempt 1 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"

        # Second attempt: Try with fewer flags if first attempt failed
        if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
          echo "Retrying Claude CLI round $ROUND with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          if [ -d "$REPO_DIR" ]; then
            ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          else
            timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          fi
          CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
          echo "Claude CLI round $ROUND attempt 2 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi

        # Third attempt: Try with minimal flags if second attempt failed
        if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
          echo "Retrying Claude CLI round $ROUND with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          if [ -d "$REPO_DIR" ]; then
            ( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          else
            timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
            CLI_STATUS=${PIPESTATUS[0]}
          fi
          CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
          echo "Claude CLI round $ROUND attempt 3 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi

        _stage "claude_round_end" "\"round\":$ROUND,\"rc\":$CLAUDE_ROUND_EXIT_CODE"
        set -e

        if [ $CLAUDE_ROUND_EXIT_CODE -eq 0 ]; then
          echo "✅ Claude Code CLI round $ROUND finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Claude CLI round $ROUND completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        elif [ $CLAUDE_ROUND_EXIT_CODE -eq 124 ]; then
          echo "⏰ Claude Code CLI round $ROUND timed out after 1800 seconds (30 minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "This is normal for complex setup tasks; continuing with tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        else
          echo "❌ Claude Code CLI round $ROUND failed with exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Possible causes for round $ROUND:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "  - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
          echo "Continuing with tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
        fi
      else
        echo "Claude Code CLI not available in round $ROUND; skipping" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
      fi

      # Clean up round prompt file
      rm -f /tmp/claude_setup_round_${ROUND}.md
      _stage "round_tests_start" "\"round\":$ROUND"
      echo "Re-running full tests (round $ROUND)..." | tee -a "$TEST_LOG_FILE"
      set +e
      if [ -d "${TESTS_ROOT_VALUE}" ]; then
        if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
          if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
            eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
          else
            eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
          fi
        else
          eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
        fi
      else
        eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
      fi
      TEST_RC=${PIPESTATUS[0]}
      _stage "round_tests_end" "\"round\":$ROUND,\"rc\":$TEST_RC"
      # Fallback if run_tests.py argparse error persists
      if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
        echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
        if [ -d "${TESTS_ROOT_VALUE}" ]; then
          pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
        else
          pytest -q | tee -a "$TEST_LOG_FILE"
        fi
        TEST_RC=${PIPESTATUS[0]}
      fi
      set -e
      # Persist exit code on each round
      if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
    done
  fi
fi

# Handle custom optimization modes
if [ "$CUSTOM_RUN_MODE" = true ]; then
    echo "=== Executing Custom Optimization Mode ==="
    _stage "custom_optimization_start" "\"mode\":\"$OPTIMIZATION_MODE\""

    # Log custom optimization parameters
    echo "Custom optimization parameters:"
    echo "  Mode: $OPTIMIZATION_MODE"
    echo "  Module root: ${CUSTOM_MODULE_ROOT:-'default'}"
    echo "  Tests root: ${CUSTOM_TESTS_ROOT:-'default'}"
    echo "  Async mode: ${CUSTOM_ASYNC_MODE:-'default'}"
    echo "  Verbose: ${CUSTOM_VERBOSE:-'default'}"
    echo "  No PR: ${CUSTOM_NO_PR:-'default'}"

    case "$OPTIMIZATION_MODE" in
        "single_function")
            echo "Running single function optimization..."
            echo "Target file: ${CUSTOM_FILE_PATH:-'not specified'}"
            echo "Target function: ${CUSTOM_FUNCTION_NAME:-'not specified'}"

            if [ -z "${CUSTOM_FILE_PATH:-}" ] || [ -z "${CUSTOM_FUNCTION_NAME:-}" ]; then
                echo "Error: file_path and function_name are required for single function optimization" >&2
                _stage "custom_optimization_error" "\"error\":\"missing_target_parameters\""
                exit 1
            fi

            if [ ! -f "${CUSTOM_FILE_PATH}" ]; then
                echo "Error: Target file not found: ${CUSTOM_FILE_PATH}" >&2
                _stage "custom_optimization_error" "\"error\":\"target_file_not_found\",\"file\":\"${CUSTOM_FILE_PATH}\""
                exit 3
            fi

            # Build codeflash command
            CODEFLASH_CMD="codeflash --file \"${CUSTOM_FILE_PATH}\" --function \"${CUSTOM_FUNCTION_NAME}\""
            _stage "custom_optimization_command_built" "\"mode\":\"single_function\",\"file\":\"${CUSTOM_FILE_PATH}\",\"function\":\"${CUSTOM_FUNCTION_NAME}\""

            # Add flags
            if [ "${CUSTOM_VERBOSE:-true}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --verbose"
            fi

            if [ "${CUSTOM_ASYNC_MODE:-true}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --async"
            fi

            if [ "${CUSTOM_NO_PR:-false}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --no-pr"
            fi

            echo "Executing: $CODEFLASH_CMD"
            eval "$CODEFLASH_CMD"
            ;;

        "trace_and_optimize")
            echo "Running trace and optimize workflow..."
            echo "Target script: ${CUSTOM_SCRIPT_PATH:-'not specified'}"
            echo "Trace file: ${CUSTOM_TRACE_FILE:-'default'}"
            echo "Tracer timeout: ${CUSTOM_TRACER_TIMEOUT:-'default'}"
            echo "Trace only: ${CUSTOM_TRACE_ONLY:-'default'}"

            if [ -z "${CUSTOM_SCRIPT_PATH:-}" ]; then
                echo "Error: script_path is required for trace and optimize" >&2
                _stage "custom_optimization_error" "\"error\":\"missing_script_path\""
                exit 1
            fi

            if [ ! -f "${CUSTOM_SCRIPT_PATH}" ]; then
                echo "Error: Target script not found: ${CUSTOM_SCRIPT_PATH}" >&2
                _stage "custom_optimization_error" "\"error\":\"target_script_not_found\",\"script\":\"${CUSTOM_SCRIPT_PATH}\""
                exit 3
            fi

            # Build codeflash optimize command
            CODEFLASH_CMD="codeflash optimize \"${CUSTOM_SCRIPT_PATH}\""
            _stage "custom_optimization_command_built" "\"mode\":\"trace_and_optimize\",\"script\":\"${CUSTOM_SCRIPT_PATH}\""

            # Add trace file if specified
            if [ -n "${CUSTOM_TRACE_FILE:-}" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} -o \"${CUSTOM_TRACE_FILE}\""
            fi

            # Add tracer timeout if specified
            if [ -n "${CUSTOM_TRACER_TIMEOUT:-}" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --tracer-timeout ${CUSTOM_TRACER_TIMEOUT}"
            fi

            # Add trace-only flag if specified
            if [ "${CUSTOM_TRACE_ONLY:-false}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --trace-only"
            fi

            echo "Executing: $CODEFLASH_CMD"
            _stage "custom_optimization_executing" "\"mode\":\"single_function\",\"command\":\"$CODEFLASH_CMD\""
            eval "$CODEFLASH_CMD"
            CODEFLASH_RC=$?
            _stage "custom_optimization_executed" "\"mode\":\"single_function\",\"rc\":$CODEFLASH_RC"
            ;;

        "trace_and_optimize")
            echo "Executing: $CODEFLASH_CMD"
            _stage "custom_optimization_executing" "\"mode\":\"trace_and_optimize\",\"command\":\"$CODEFLASH_CMD\""
            eval "$CODEFLASH_CMD"
            CODEFLASH_RC=$?
            _stage "custom_optimization_executed" "\"mode\":\"trace_and_optimize\",\"rc\":$CODEFLASH_RC"
            ;;

        "optimize_all")
            echo "Running optimize all codebase..."
            echo "Target directory: ${CUSTOM_TARGET_DIRECTORY:-'entire codebase'}"
            echo "Benchmark mode: ${CUSTOM_BENCHMARK:-'default'}"
            echo "Async mode: ${CUSTOM_ASYNC_MODE:-'default'}"
            echo "Verbose: ${CUSTOM_VERBOSE:-'default'}"

            # Build codeflash --all command
            CODEFLASH_CMD="codeflash --all"
            _stage "custom_optimization_command_built" "\"mode\":\"optimize_all\",\"directory\":\"${CUSTOM_TARGET_DIRECTORY:-'entire'}\""

            # Add target directory if specified
            if [ -n "${CUSTOM_TARGET_DIRECTORY:-}" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} \"${CUSTOM_TARGET_DIRECTORY}\""
            fi

            # Add benchmark flag if specified
            if [ "${CUSTOM_BENCHMARK:-false}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --benchmark"
            fi

            # Add async flag if specified
            if [ "${CUSTOM_ASYNC_MODE:-true}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --async"
            fi

            # Add verbose flag if specified
            if [ "${CUSTOM_VERBOSE:-true}" = "true" ]; then
                CODEFLASH_CMD="${CODEFLASH_CMD} --verbose"
            fi

            echo "Executing: $CODEFLASH_CMD"
            _stage "custom_optimization_executing" "\"mode\":\"$OPTIMIZATION_MODE\",\"command\":\"$CODEFLASH_CMD\""
            eval "$CODEFLASH_CMD"
            CODEFLASH_RC=$?
            _stage "custom_optimization_executed" "\"mode\":\"$OPTIMIZATION_MODE\",\"rc\":$CODEFLASH_RC"
            ;;

        *)
            echo "Error: Unknown optimization mode: $OPTIMIZATION_MODE" >&2
            echo "Valid modes: single_function, trace_and_optimize, optimize_all" >&2
            _stage "custom_optimization_error" "\"error\":\"unknown_mode\",\"mode\":\"$OPTIMIZATION_MODE\""
            exit 1
            ;;
    esac

    _stage "custom_optimization_end" "\"mode\":\"$OPTIMIZATION_MODE\""
    echo "=== Custom Optimization Completed ==="

else
    # Original optimization logic for non-custom runs
    if [ -z "${CF_TARGET_FILE:-}" ]; then
      if [ -d "${TESTS_ROOT_VALUE}" ]; then
        echo "Trace-first: ${TRACE_CMD} ${TESTS_ROOT_VALUE}/"
        # Ensure pytest-cov if coverage flags present
        if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
          pip install pytest-cov || true
        fi
        set +e
        # If TRACE_CMD is pytest, pass the tests-root as args so tracer gets a non-empty split
        if [[ "${TRACE_CMD}" == pytest* ]]; then
          codeflash optimize --trace-only -m pytest -- "${TESTS_ROOT_VALUE}/" --async || true
        else
        codeflash optimize --trace-only -m "${TRACE_CMD}" --async || true
        fi
        set -e
      else
        echo "Skipping trace: tests root not found."
      fi
    fi

    if [ -n "${CF_TARGET_FILE:-}" ]; then
      echo "Running Codeflash single-file: ${CF_TARGET_FILE} ${CF_TARGET_FUNCTION:-}"
      if [ ! -f "${CF_TARGET_FILE}" ]; then
        echo "Target file not found: ${CF_TARGET_FILE}" >&2
        exit 3
      fi
      if [ -n "${CF_TARGET_FUNCTION:-}" ]; then
        codeflash --file "${CF_TARGET_FILE}" --function "${CF_TARGET_FUNCTION}" --verbose --async
      else
        codeflash --file "${CF_TARGET_FILE}" --verbose --async
      fi
    else
      echo "Running Codeflash --all with --async without staging-review flag..."
      codeflash --all  --async --verbose
    fi
fi

# If we reach here normally, ensure EXIT_FILE reflects last known code (0 if unset)
echo "DEBUG: About to check final EXIT_FILE status..." | tee -a "$TEST_LOG_FILE"
if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then
  echo "DEBUG: EXIT_FILE is empty, setting to 0" | tee -a "$TEST_LOG_FILE"
  echo "0" > "${EXIT_FILE}" 2>/dev/null || true
else
  echo "DEBUG: EXIT_FILE is already set or not configured" | tee -a "$TEST_LOG_FILE"
fi

echo "DEBUG: About to finish optimization..." | tee -a "$TEST_LOG_FILE"
echo "--- Finished Codeflash Optimization ---"

# In skip/failure paths earlier we may exit non-zero; ensure EXIT_FILE set there as well
echo "DEBUG: About to exit with code 0..." | tee -a "$TEST_LOG_FILE"
exit 0