codeflash-internal/experiments/optimization-factory/scripts/run_optimization.sh
mashraf-222 8216f32d9e
Optimization Factory update (#2035)
Co-authored-by: Mohamed Ashraf <mohamedashrraf222@gmail.com>
Co-authored-by: Sarthak Agarwal <sarthak.saga@gmail.com>
Co-authored-by: Kevin Turcios <106575910+KRRT7@users.noreply.github.com>
2025-12-08 23:22:32 +05:30

2655 lines
No EOL
129 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
set -u
# Enable pipefail when supported (works under bash; safely ignored under sh)
if (set -o 2>/dev/null | grep -q 'pipefail') 2>/dev/null; then
set -o pipefail
fi
echo "--- Starting Codeflash Optimization ---"
# Helper to record stage transitions for BE tracking
_stage() {
local name="$1"; shift || true
local extra="$*"
if [ -n "${STAGE_FILE:-}" ]; then
printf '{"ts":"%s","stage":"%s"%s}\n' "$(date -Is)" "$name" "${extra:+,$extra}" >> "$STAGE_FILE" 2>/dev/null || true
fi
}
# Helper to safely log environment variables without exposing sensitive values
_safe_log_env() {
local pattern="$1"
local log_file="${2:-/dev/stdout}"
# List of sensitive environment variable patterns to mask
local sensitive_patterns="(TOKEN|KEY|SECRET|PASSWORD|CREDENTIAL|AUTH|API_KEY|PRIVATE|ACCESS|BEARER)"
env | grep -E "$pattern" | while IFS='=' read -r key value; do
if echo "$key" | grep -qiE "$sensitive_patterns"; then
# Mask sensitive values - show first 4 and last 4 characters with asterisks in between
local masked_value=""
if [ ${#value} -le 8 ]; then
masked_value="***MASKED***"
else
masked_value="${value:0:4}***${value: -4}"
fi
echo "${key}=${masked_value}" >> "$log_file"
else
echo "${key}=${value}" >> "$log_file"
fi
done
}
_stage "start"
# Ensure we always record final exit and persist EXIT_FILE if not already set
trap '_rc=$?; _stage "runner_exit" "\"rc\":$_rc"; if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then echo "$_rc" > "${EXIT_FILE}" 2>/dev/null || true; fi; exit $_rc' EXIT
# Check if this is a custom run
CUSTOM_RUN_MODE=false
if [ -f "/home/ubuntu/custom_run_config.json" ]; then
CUSTOM_RUN_MODE=true
echo "=== Custom Run Mode Detected ==="
echo "Loading custom configuration from /home/ubuntu/custom_run_config.json"
_stage "custom_run_start"
fi
if [ -z "${GITHUB_TOKEN:-}" ]; then echo "GITHUB_TOKEN is required"; exit 1; fi
if [ -z "${CODEFLASH_API_KEY:-}" ]; then echo "CODEFLASH_API_KEY is required"; exit 1; fi
if [ -z "${GITHUB_REPO_URL:-}" ]; then echo "GITHUB_REPO_URL is required"; exit 1; fi
# Handle custom run configuration
if [ "$CUSTOM_RUN_MODE" = true ]; then
echo "=== Processing Custom Run Configuration ==="
_stage "custom_config_processing"
# Load custom configuration
if command -v python3 >/dev/null 2>&1; then
CUSTOM_CONFIG=$(python3 -c "
import json
try:
with open('/home/ubuntu/custom_run_config.json', 'r') as f:
config = json.load(f)
print(json.dumps(config))
except Exception as e:
print('{}')
")
else
echo "Warning: python3 not available for custom config parsing"
CUSTOM_CONFIG='{}'
fi
# Extract configuration values
OPTIMIZATION_MODE=$(echo "$CUSTOM_CONFIG" | python3 -c "
import json, sys
try:
config = json.load(sys.stdin)
print(config.get('optimization_mode', ''))
except:
print('')
" 2>/dev/null || echo "")
echo "Custom optimization mode: ${OPTIMIZATION_MODE:-'not specified'}"
# Log custom configuration details
echo "=== Custom Run Configuration Details ==="
echo "Configuration file: /home/ubuntu/custom_run_config.json"
echo "Optimization mode: ${OPTIMIZATION_MODE:-'not specified'}"
# Extract and log configuration sections
CONFIG_DATA=$(echo "$CUSTOM_CONFIG" | python3 -c "
import json, sys
try:
config = json.load(sys.stdin)
config_section = config.get('config', {})
flags_section = config.get('flags', {})
advanced_section = config.get('advanced', {})
print('Config section keys:', list(config_section.keys()))
print('Flags section keys:', list(flags_section.keys()))
print('Advanced section keys:', list(advanced_section.keys()))
except:
print('Error parsing configuration sections')
" 2>/dev/null || echo "Error parsing configuration")
echo "$CONFIG_DATA"
_stage "custom_config_loaded" "\"mode\":\"$OPTIMIZATION_MODE\""
# Override environment variables with custom values if provided
if [ -n "${CUSTOM_MODULE_ROOT:-}" ]; then
MODULE_ROOT_VALUE="$CUSTOM_MODULE_ROOT"
echo "Using custom module root: $MODULE_ROOT_VALUE"
fi
if [ -n "${CUSTOM_TESTS_ROOT:-}" ]; then
TESTS_ROOT_VALUE="$CUSTOM_TESTS_ROOT"
echo "Using custom tests root: $TESTS_ROOT_VALUE"
fi
if [ -n "${CUSTOM_TEST_FRAMEWORK:-}" ]; then
echo "Using custom test framework: $CUSTOM_TEST_FRAMEWORK"
TEST_FRAMEWORK_VALUE="$CUSTOM_TEST_FRAMEWORK"
fi
if [ -n "${CUSTOM_PYTEST_CMD:-}" ]; then
PYTEST_CMD_VALUE="$CUSTOM_PYTEST_CMD"
echo "Using custom pytest command: $PYTEST_CMD_VALUE"
fi
if [ -n "${CUSTOM_FORMATTER_CMDS:-}" ]; then
FORMATTER_CMDS_VALUE="$CUSTOM_FORMATTER_CMDS"
echo "Using custom formatter commands: $FORMATTER_CMDS_VALUE"
fi
_stage "custom_config_processed" "\"mode\":\"$OPTIMIZATION_MODE\""
fi
# Prefer LLM-provided overrides if present; fall back to CSV/env; then to auto
MODULE_ROOT_VALUE="${LLM_MODULE_ROOT:-${MODULE_ROOT:-auto}}"
TESTS_ROOT_VALUE="${LLM_TESTS_ROOT:-${TESTS_ROOT:-auto}}"
PYTEST_CMD_VALUE="${LLM_PYTEST_CMD:-${PYTEST_CMD:-pytest}}"
FORMATTER_CMDS_VALUE="${LLM_FORMATTER_CMDS:-${FORMATTER_CMDS:-[\"disabled\"]}}"
# Normalize pytest command: drop leading 'poetry run '
LOWER_PYTEST=$(echo "$PYTEST_CMD_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ "$LOWER_PYTEST" == poetry\ run* ]]; then
PYTEST_CMD_VALUE="$(echo "$PYTEST_CMD_VALUE" | sed 's/^poetry[[:space:]]\+run[[:space:]]\+//')"
fi
# Normalize formatter cmds to Codeflash-per-file style per docs
# See https://docs.codeflash.ai/configuration
FORMATTER_CMDS_NORM="$FORMATTER_CMDS_VALUE"
LOWER_FMT=$(echo "$FORMATTER_CMDS_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ -z "$LOWER_FMT" || "$LOWER_FMT" == "[]" || "$LOWER_FMT" == "[\"disabled\"]" ]]; then
FORMATTER_CMDS_NORM='["disabled"]'
elif [[ "$LOWER_FMT" == *"ruff"* ]]; then
FORMATTER_CMDS_NORM='["ruff check --exit-zero --fix $file","ruff format $file"]'
elif [[ "$LOWER_FMT" == *"black"* ]]; then
FORMATTER_CMDS_NORM='["black $file"]'
fi
# Summary of analyzed/exported config (no secrets)
echo "=== Configuration Summary (analyzer + effective) ==="
echo "Repo URL: ${GITHUB_REPO_URL}"
echo "CSV/ENV defaults: MODULE_ROOT='${MODULE_ROOT:-}', TESTS_ROOT='${TESTS_ROOT:-}', PYTEST_CMD='${PYTEST_CMD:-}'"
echo "Analyzer: LLM_MODULE_ROOT='${LLM_MODULE_ROOT:-}', LLM_TESTS_ROOT='${LLM_TESTS_ROOT:-}', LLM_PYTEST_CMD='${LLM_PYTEST_CMD:-}'"
echo "Analyzer: LLM_FORMATTER_CMDS='${LLM_FORMATTER_CMDS:-}', LLM_PIP_PACKAGES='${LLM_PIP_PACKAGES:-}'"
echo "Derived: MODULE_ROOT_VALUE='${MODULE_ROOT_VALUE}', TESTS_ROOT_VALUE='${TESTS_ROOT_VALUE}', PYTEST_CMD_VALUE='${PYTEST_CMD_VALUE}'"
echo "Derived: FORMATTER_CMDS_NORM=${FORMATTER_CMDS_NORM}"
echo "=== End Configuration Summary ==="
# Derive test framework for Codeflash config from the test command
TEST_FRAMEWORK_VALUE="pytest"
LOWER_CMD=$(echo "${PYTEST_CMD_VALUE}" | tr '[:upper:]' '[:lower:]')
if [[ "${LOWER_CMD}" =~ (^|[[:space:]])pytest([[:space:]]|$) ]]; then
TEST_FRAMEWORK_VALUE="pytest"
elif [[ "${LOWER_CMD}" == *"unittest"* ]]; then
TEST_FRAMEWORK_VALUE="unittest"
elif [[ "${LOWER_CMD}" == *"nose"* ]] || [[ "${LOWER_CMD}" == *"nosetests"* ]]; then
TEST_FRAMEWORK_VALUE="nose"
fi
_stage "auth_gh_start"
echo "Authenticating gh..."
# Check if GitHub CLI is available
if ! command -v gh >/dev/null 2>&1; then
echo "⚠️ GitHub CLI (gh) not found, falling back to direct git clone"
echo "This will clone the original repository directly without forking"
_stage "clone_start" "\"repo\":\"${GITHUB_REPO_URL}\""
echo "Cloning original repository directly: ${GITHUB_REPO_URL}"
# Working directory (must be writable by current user)
WORK_DIR="${WORK_DIR:-/home/ubuntu/work}"
rm -rf "$WORK_DIR" || true
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
# Clone the original repository directly
if git clone "${GITHUB_REPO_URL}" repo; then
echo "✅ Successfully cloned original repository: ${GITHUB_REPO_URL}"
cd repo
git remote add upstream "${GITHUB_REPO_URL}" || true
git fetch --all || true
else
echo "❌ Failed to clone original repository"
exit 1
fi
else
echo "✅ GitHub CLI (gh) is available"
if gh auth status -h github.com >/dev/null 2>&1; then
echo "gh auth status OK"
else
echo "Using GITHUB_TOKEN from environment for gh commands"
# Ensure gh CLI is authenticated with the token
if [ -n "${GITHUB_TOKEN:-}" ]; then
echo "Setting up gh authentication with provided token..."
# Note: Token is passed to gh auth login but not logged for security
echo "${GITHUB_TOKEN}" | gh auth login --with-token 2>/dev/null || {
echo "Failed to authenticate gh with token, but continuing..."
echo "This may cause fork/clone operations to fail"
}
else
echo "❌ No GITHUB_TOKEN provided - fork operations will likely fail"
fi
fi
_stage "fork_repo_start"
echo "Forking repository if needed..."
echo "Attempting to fork: ${GITHUB_REPO_URL}"
echo "Target organization: codeflash-ai"
# Extract repository name for fork checking
REPO_NAME=$(basename "${GITHUB_REPO_URL}")
FORK_REPO_NAME="${REPO_NAME}"
FORK_REPO="codeflash-ai/${FORK_REPO_NAME}"
# Check if fork already exists
echo "Checking if fork already exists: ${FORK_REPO}"
if gh repo view "${FORK_REPO}" >/dev/null 2>&1; then
echo "✅ Fork already exists: ${FORK_REPO}"
echo "Updating fork from upstream before cloning..."
_stage "sync_fork_start" "\"fork_repo\":\"${FORK_REPO}\",\"upstream\":\"${GITHUB_REPO_URL}\""
echo "Syncing fork ${FORK_REPO} with upstream ${GITHUB_REPO_URL}..."
# Sync the fork with its upstream repository
# gh repo sync automatically syncs a fork with its upstream if the repo is a fork
# This updates the fork on GitHub with the latest changes from the upstream repository
# We use --force to perform a hard reset, ensuring the fork matches upstream exactly even if it has diverged
if gh repo sync "${FORK_REPO}" --force 2>&1 | tee -a /tmp/sync_debug.log; then
echo "✅ Successfully synced fork ${FORK_REPO} with upstream"
_stage "sync_fork_complete" "\"fork_repo\":\"${FORK_REPO}\""
else
SYNC_EXIT_CODE=${PIPESTATUS[0]}
echo "⚠️ Fork sync failed with exit code: $SYNC_EXIT_CODE"
echo "Sync debug log:"
cat /tmp/sync_debug.log 2>/dev/null || echo "No sync debug log available"
echo "Note: If the fork doesn't have upstream configured, sync may fail."
echo "This is non-fatal - continuing with clone anyway - fork may still be usable"
_stage "sync_fork_warning" "\"fork_repo\":\"${FORK_REPO}\",\"exit_code\":$SYNC_EXIT_CODE"
fi
else
echo "Fork does not exist, creating new fork..."
# Try to fork with better error handling
if gh repo fork "${GITHUB_REPO_URL}" --org codeflash-ai --clone=false --remote=false 2>&1 | tee -a /tmp/fork_debug.log; then
echo "✅ Fork operation completed successfully"
# Check if GitHub created a numbered fork instead
FORK_OUTPUT=$(cat /tmp/fork_debug.log 2>/dev/null | grep -o 'https://github.com/codeflash-ai/[^[:space:]]*' | tail -1)
if [ -n "$FORK_OUTPUT" ]; then
ACTUAL_FORK_REPO=$(echo "$FORK_OUTPUT" | sed 's#https://github.com/##')
if [ "$ACTUAL_FORK_REPO" != "$FORK_REPO" ]; then
echo "⚠️ GitHub created a numbered fork: ${ACTUAL_FORK_REPO}"
echo "This suggests the original fork name was already taken"
echo "Using the created fork: ${ACTUAL_FORK_REPO}"
FORK_REPO="$ACTUAL_FORK_REPO"
fi
fi
else
FORK_EXIT_CODE=${PIPESTATUS[0]}
echo "❌ Fork operation failed with exit code: $FORK_EXIT_CODE"
echo "Debug information:"
echo "Repository URL: ${GITHUB_REPO_URL}"
echo "GitHub token status: $([ -n "${GITHUB_TOKEN:-}" ] && echo "Set" || echo "Not set")"
echo "gh auth status:"
gh auth status 2>&1 || echo "gh auth status failed"
echo "Fork debug log:"
cat /tmp/fork_debug.log 2>/dev/null || echo "No fork debug log available"
echo "Continuing anyway - fork may already exist or we'll clone the original repo"
fi
fi
# FORK_REPO is already set above based on whether fork exists or was created
_stage "clone_start" "\"repo\":\"${FORK_REPO}\""
echo "Cloning fork ${FORK_REPO}..."
# Working directory (must be writable by current user)
WORK_DIR="${WORK_DIR:-/home/ubuntu/work}"
rm -rf "$WORK_DIR" || true
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
# Retry clone with exponential backoff for GitHub service issues
for attempt in 1 2 3; do
echo "Clone attempt $attempt/3..."
echo "Attempting to clone fork: ${FORK_REPO}"
if gh repo clone "${FORK_REPO}" repo 2>&1 | tee -a /tmp/clone_debug.log; then
echo "✅ Successfully cloned fork: ${FORK_REPO}"
break
else
CLONE_EXIT_CODE=${PIPESTATUS[0]}
echo "❌ Fork clone failed with exit code: $CLONE_EXIT_CODE"
echo "Clone debug log:"
cat /tmp/clone_debug.log 2>/dev/null || echo "No clone debug log available"
if [ $attempt -lt 3 ]; then
echo "Clone failed, retrying in $((attempt * 10)) seconds..."
sleep $((attempt * 10))
else
echo "Fork clone failed after 3 attempts, trying original repo..."
echo "Attempting to clone original repo: ${GITHUB_REPO_URL}"
# Fallback to original repo if fork clone fails
if gh repo clone "${GITHUB_REPO_URL}" repo 2>&1 | tee -a /tmp/clone_debug.log; then
echo "✅ Successfully cloned original repo: ${GITHUB_REPO_URL}"
break
else
echo "❌ Original repo clone also failed"
echo "Final clone debug log:"
cat /tmp/clone_debug.log 2>/dev/null || echo "No clone debug log available"
echo "Failed to clone both fork and original repo"
exit 1
fi
fi
fi
done
cd repo
git remote add upstream "${GITHUB_REPO_URL}" || true
git fetch --all || true
fi # End of GitHub CLI availability check
if [ "${MODULE_ROOT_VALUE}" = "auto" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
echo "Detecting module/tests roots..."
PY_CMD=$(command -v python3 || command -v python || echo "")
if [ -z "$PY_CMD" ]; then echo "No Python interpreter found for detection"; else $PY_CMD /app/scripts/detect_roots.py > roots.json || true; fi
if [ -f roots.json ]; then
DETECTED_MODULE=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("module_root",""))' || echo "")
DETECTED_TESTS=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("tests_root",""))' || echo "")
if [ "${MODULE_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_MODULE}" ]; then MODULE_ROOT_VALUE="${DETECTED_MODULE}"; fi
if [ "${TESTS_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_TESTS}" ]; then TESTS_ROOT_VALUE="${DETECTED_TESTS}"; fi
fi
fi
if [ -z "${MODULE_ROOT_VALUE}" ] || [ "${MODULE_ROOT_VALUE}" = "auto" ]; then
echo "Failed to detect module-root; please set MODULE_ROOT env."; exit 2
fi
if [ -z "${TESTS_ROOT_VALUE}" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
echo "No tests-root detected; tracing will be skipped."
fi
_stage "write_codeflash_config"
echo "Writing pyproject.toml..."
# Verify we're in the correct working directory
echo "Current working directory: $(pwd)"
if [ ! -d "/home/ubuntu/work/repo" ]; then
echo "Error: Repository directory /home/ubuntu/work/repo not found" >&2
_stage "error" "\"error\":\"repository_directory_not_found\""
exit 1
fi
# Ensure we're in the repository directory
if [ "$(pwd)" != "/home/ubuntu/work/repo" ]; then
echo "Changing to repository directory: /home/ubuntu/work/repo"
cd "/home/ubuntu/work/repo" || {
echo "Error: Failed to change to repository directory" >&2
_stage "error" "\"error\":\"failed_to_change_directory\""
exit 1
}
echo "Now in directory: $(pwd)"
fi
# ROBUST PYPROJECT.TOML HANDLING
echo "=== ANALYZING EXISTING PYPROJECT.TOML STRUCTURE ==="
# Handle custom pyproject.toml location
PYPROJECT_FILE="pyproject.toml"
if [ "$CUSTOM_RUN_MODE" = true ] && [ -n "${CUSTOM_PYPROJECT_LOCATION:-}" ]; then
PYPROJECT_FILE="$CUSTOM_PYPROJECT_LOCATION"
echo "Using custom pyproject.toml location: $PYPROJECT_FILE"
# Verify custom pyproject.toml location is accessible
if [ ! -f "$PYPROJECT_FILE" ] && [ ! -d "$(dirname "$PYPROJECT_FILE")" ]; then
echo "Error: Custom pyproject.toml location not accessible: $PYPROJECT_FILE" >&2
_stage "error" "\"error\":\"custom_pyproject_location_inaccessible\",\"path\":\"$PYPROJECT_FILE\""
exit 1
fi
fi
# Check if pyproject.toml already exists
EXISTING_PYPROJECT=""
if [ -f "$PYPROJECT_FILE" ]; then
echo "✅ Found existing pyproject.toml at: $PYPROJECT_FILE"
EXISTING_PYPROJECT="$PYPROJECT_FILE"
# Analyze existing pyproject.toml structure
echo "Analyzing existing pyproject.toml structure..."
# Check for different project types
if grep -q "\[tool\.poetry\]" "$PYPROJECT_FILE"; then
echo "📦 Detected Poetry project structure"
PROJECT_TYPE="poetry"
elif grep -q "\[tool\.uv\]" "$PYPROJECT_FILE"; then
echo "📦 Detected UV project structure"
PROJECT_TYPE="uv"
elif grep -q "\[project\]" "$PYPROJECT_FILE"; then
echo "📦 Detected PEP 621 project structure"
PROJECT_TYPE="pep621"
elif grep -q "\[build-system\]" "$PYPROJECT_FILE"; then
echo "📦 Detected build-system configuration"
PROJECT_TYPE="build_system"
else
echo "📦 Detected generic pyproject.toml"
PROJECT_TYPE="generic"
fi
# Check if Codeflash config already exists
if grep -q "\[tool\.codeflash\]" "$PYPROJECT_FILE"; then
echo "⚙️ Found existing [tool.codeflash] section"
HAS_CODEFLASH_CONFIG=true
else
echo "⚙️ No existing [tool.codeflash] section found"
HAS_CODEFLASH_CONFIG=false
fi
# Create backup
cp "$PYPROJECT_FILE" "${PYPROJECT_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
echo "📋 Created backup: ${PYPROJECT_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
else
echo "📝 No existing pyproject.toml found at: $PYPROJECT_FILE"
PROJECT_TYPE="none"
HAS_CODEFLASH_CONFIG=false
fi
echo "=== GENERATING CODEFLASH CONFIGURATION ==="
# Generate Codeflash configuration
CODEFLASH_CONFIG="[tool.codeflash]
module-root = \"${MODULE_ROOT_VALUE}\"
tests-root = \"${TESTS_ROOT_VALUE}\"
test-framework = \"${TEST_FRAMEWORK_VALUE}\"
formatter-cmds = ${FORMATTER_CMDS_NORM}"
# Add optional fields if they have custom values
if [ "$CUSTOM_RUN_MODE" = true ]; then
if [ -n "${CUSTOM_BENCHMARKS_ROOT:-}" ]; then
CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
benchmarks-root = \"${CUSTOM_BENCHMARKS_ROOT}\""
fi
if [ -n "${CUSTOM_IGNORE_PATHS:-}" ]; then
CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
ignore-paths = ${CUSTOM_IGNORE_PATHS}"
fi
if [ -n "${CUSTOM_PYTEST_CMD:-}" ]; then
CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
pytest-cmd = \"${CUSTOM_PYTEST_CMD}\""
fi
if [ -n "${CUSTOM_DISABLE_IMPORTS_SORTING:-}" ]; then
CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
disable-imports-sorting = ${CUSTOM_DISABLE_IMPORTS_SORTING}"
fi
if [ -n "${CUSTOM_DISABLE_TELEMETRY:-}" ]; then
CODEFLASH_CONFIG="${CODEFLASH_CONFIG}
disable-telemetry = ${CUSTOM_DISABLE_TELEMETRY}"
fi
fi
echo "=== HANDLING PYPROJECT.TOML BASED ON PROJECT TYPE ==="
# Handle different scenarios based on project type and existing configuration
case "$PROJECT_TYPE" in
"poetry"|"uv"|"pep621"|"build_system"|"generic")
echo "🔄 Merging Codeflash configuration with existing project structure..."
if [ "$HAS_CODEFLASH_CONFIG" = true ]; then
echo "📝 Updating existing [tool.codeflash] section..."
# Remove existing [tool.codeflash] section and add new one
awk '
/^\[tool\.codeflash\]/ { in_codeflash = 1; next }
in_codeflash && /^\[/ { in_codeflash = 0 }
in_codeflash { next }
{ print }
' "$PYPROJECT_FILE" > "${PYPROJECT_FILE}.tmp"
echo "" >> "${PYPROJECT_FILE}.tmp"
echo "$CODEFLASH_CONFIG" >> "${PYPROJECT_FILE}.tmp"
mv "${PYPROJECT_FILE}.tmp" "$PYPROJECT_FILE"
else
echo "📝 Adding new [tool.codeflash] section..."
echo "" >> "$PYPROJECT_FILE"
echo "$CODEFLASH_CONFIG" >> "$PYPROJECT_FILE"
fi
;;
"none")
echo "📝 Creating new pyproject.toml with Codeflash configuration..."
echo "$CODEFLASH_CONFIG" > "$PYPROJECT_FILE"
;;
*)
echo "⚠️ Unknown project type, creating new pyproject.toml..."
echo "$CODEFLASH_CONFIG" > "$PYPROJECT_FILE"
;;
esac
echo "Written pyproject.toml to: $PYPROJECT_FILE"
# Verify pyproject.toml was written successfully
if [ ! -f "$PYPROJECT_FILE" ]; then
echo "Error: Failed to write pyproject.toml file" >&2
_stage "error" "\"error\":\"failed_to_write_pyproject_toml\",\"path\":\"$PYPROJECT_FILE\""
exit 1
fi
# Verify pyproject.toml has content
if [ ! -s "$PYPROJECT_FILE" ]; then
echo "Error: pyproject.toml file is empty" >&2
_stage "error" "\"error\":\"pyproject_toml_empty\",\"path\":\"$PYPROJECT_FILE\""
exit 1
fi
echo "=== VALIDATING FINAL PYPROJECT.TOML ==="
# Validate the final pyproject.toml structure
echo "Final pyproject.toml content:"
echo "----------------------------------------"
head -20 "$PYPROJECT_FILE"
echo "----------------------------------------"
# Verify Codeflash section exists
if grep -q "\[tool\.codeflash\]" "$PYPROJECT_FILE"; then
echo "✅ [tool.codeflash] section found in final pyproject.toml"
else
echo "❌ [tool.codeflash] section missing from final pyproject.toml" >&2
_stage "error" "\"error\":\"codeflash_section_missing\",\"path\":\"$PYPROJECT_FILE\""
exit 1
fi
# Verify required Codeflash fields
REQUIRED_FIELDS=("module-root" "tests-root" "test-framework")
for field in "${REQUIRED_FIELDS[@]}"; do
if grep -q "$field" "$PYPROJECT_FILE"; then
echo "✅ Found required field: $field"
else
echo "❌ Missing required field: $field" >&2
_stage "error" "\"error\":\"missing_required_field\",\"field\":\"$field\",\"path\":\"$PYPROJECT_FILE\""
exit 1
fi
done
# Log project type preservation
if [ "$PROJECT_TYPE" != "none" ]; then
echo "✅ Preserved existing project structure: $PROJECT_TYPE"
echo "📋 Backup available at: ${PYPROJECT_FILE}.backup.*"
else
echo "📝 Created new pyproject.toml (no existing project structure)"
fi
echo "pyproject.toml verification successful"
_stage "pyproject_toml_written" "\"path\":\"$PYPROJECT_FILE\",\"size\":$(wc -c < "$PYPROJECT_FILE"),\"project_type\":\"$PROJECT_TYPE\",\"preserved_existing\":$([ "$PROJECT_TYPE" != "none" ] && echo "true" || echo "false")"
# Also write a minimal parent pyproject for Sphinx (docs/conf.py may reference ../pyproject.toml)
if [ -d .. ]; then
echo "Writing parent pyproject.toml for docs..."
cat > ../pyproject.toml <<EOF
[project]
name = "autogenerated-project"
version = "0.0.0"
description = "Autogenerated to satisfy Sphinx config during CI"
authors = [{name = "Auto-generated", email = "noreply@example.com"}]
EOF
fi
if [ -n "${VENV_PATH:-}" ] && [ -d "${VENV_PATH}" ]; then
echo "Using pre-created venv at ${VENV_PATH}"
# shellcheck disable=SC1090
source "${VENV_PATH}/bin/activate"
else
_stage "venv_setup"
echo "Setting up Python venv..."
PY_CMD=$(command -v python3 || command -v python || echo "")
if [ -z "$PY_CMD" ]; then echo "No Python interpreter found"; exit 1; fi
# Try to create virtual environment with better error handling
echo "Attempting to create virtual environment with $PY_CMD..."
if "$PY_CMD" -m venv .venv 2>/tmp/venv_error.log; then
echo "✅ Virtual environment created successfully"
else
echo "❌ Virtual environment creation failed. Error log:"
cat /tmp/venv_error.log 2>/dev/null || echo "No error log available"
# Try to install missing packages and retry
echo "Attempting to install missing packages and retry..."
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y python3-venv python3.10-venv python3-distutils 2>/dev/null || true
# Retry virtual environment creation
echo "Retrying virtual environment creation..."
if "$PY_CMD" -m venv .venv 2>/tmp/venv_error2.log; then
echo "✅ Virtual environment created successfully on retry"
else
echo "❌ Virtual environment creation failed again. Error log:"
cat /tmp/venv_error2.log 2>/dev/null || echo "No error log available"
# Fallback: try using virtualenv if available
if command -v virtualenv >/dev/null 2>&1; then
echo "Attempting fallback with virtualenv..."
if virtualenv .venv 2>/tmp/venv_error3.log; then
echo "✅ Virtual environment created with virtualenv fallback"
else
echo "❌ All virtual environment creation methods failed"
echo "Error log:"
cat /tmp/venv_error3.log 2>/dev/null || echo "No error log available"
echo "Continuing without virtual environment..."
# Set up environment variables to simulate venv
export PYTHONPATH="$PWD/.venv/lib/python*/site-packages:$PYTHONPATH"
fi
else
echo "❌ All virtual environment creation methods failed"
echo "Continuing without virtual environment..."
# Set up environment variables to simulate venv
export PYTHONPATH="$PWD/.venv/lib/python*/site-packages:$PYTHONPATH"
fi
fi
fi
# Only activate if .venv directory exists
if [ -d ".venv" ] && [ -f ".venv/bin/activate" ]; then
# shellcheck disable=SC1091
source .venv/bin/activate
pip install --upgrade pip >/dev/null 2>&1 || true
else
echo "⚠️ Virtual environment not available, using system Python"
fi
_stage "install_codeflash"
echo "Installing codeflash CLI with asyncio support..."
# Install codeflash with asyncio extra to support --async flag
pip install --upgrade "codeflash[asyncio]" || pip install "codeflash[asyncio]" || true
fi
# Ensure 'python3' resolves to the venv interpreter (some venvs only expose 'python')
if ! command -v python3 >/dev/null 2>&1 && command -v python >/dev/null 2>&1; then
ln -sf "$(command -v python)" "$(dirname "$(command -v python)")/python3" || true
fi
# Make local repo importable first, then utils/ for helper modules like testutils
export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
if [ -d "$PWD/utils" ]; then
export PYTHONPATH="$PWD/utils:$PYTHONPATH"
fi
_stage "pre_test_setup"
# If coverage flags are present in test command, ensure pytest-cov is installed before any test run
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
echo "Detected coverage flags in test command; installing pytest-cov..."
pip install pytest-cov || true
fi
# If reruns flags are present in test command, ensure pytest-rerunfailures is installed
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --reruns"; then
echo "Detected reruns flags in test command; installing pytest-rerunfailures..."
pip install pytest-rerunfailures || true
fi
# Run install commands in the project directory (inside venv)
if [ -n "${PRE_INSTALL_CMDS:-}" ]; then
echo "Running pre-install commands: ${PRE_INSTALL_CMDS}"
bash -lc "${PRE_INSTALL_CMDS}" || echo "Pre-install commands failed, continuing..."
fi
_stage "project_install_start"
if [ -n "${INSTALL_CMDS:-}" ]; then
echo "Running install commands: ${INSTALL_CMDS}"
if bash -lc "${INSTALL_CMDS}"; then
echo "Install commands completed successfully"
else
echo "Install commands failed (exit code: $?), continuing..."
# For repositories with custom install scripts that may fail due to
# non-standard configurations, we continue and rely on pip install fallbacks
fi
_stage "project_install_end"
fi
if [ -n "${POST_INSTALL_CMDS:-}" ]; then
echo "Running post-install commands: ${POST_INSTALL_CMDS}"
bash -lc "${POST_INSTALL_CMDS}" || echo "Post-install commands failed, continuing..."
fi
# Normalize test command for use in two contexts:
# 1) Execution (must use venv's Python)
# 2) Codeflash tracing with -m (must be a Python module, not 'python3 <script>')
PYTEST_CMD_RUN="${PYTEST_CMD_VALUE}"
if [[ "${PYTEST_CMD_RUN}" == python3\ * ]]; then PYTEST_CMD_RUN="python ${PYTEST_CMD_RUN#python3 }"; fi
# Debug: Show the original and normalized test commands
echo "Debug: Original PYTEST_CMD_VALUE: '${PYTEST_CMD_VALUE}'"
echo "Debug: Normalized PYTEST_CMD_RUN: '${PYTEST_CMD_RUN}'"
echo "Debug: TESTS_ROOT_VALUE: '${TESTS_ROOT_VALUE}'"
# Helper: detect if command looks like invoking pytest directly
_is_pytest_runner() {
case "$1" in
pytest\ *|pytest) return 0 ;;
python\ -m\ pytest*) return 0 ;;
python3\ -m\ pytest*) return 0 ;;
py.test\ *|py.test) return 0 ;;
*) return 1 ;;
esac
}
TRACE_CMD="${PYTEST_CMD_VALUE}"
# Convert interpreter-prefix forms to module forms
if [[ "${TRACE_CMD}" == python3\ -m\ * ]]; then
TRACE_CMD="${TRACE_CMD#python3 -m }"
# For pytest commands, extract just the pytest part and handle args separately
if [[ "${TRACE_CMD}" == pytest\ * ]]; then
TRACE_CMD="pytest"
fi
fi
if [[ "${TRACE_CMD}" == python\ -m\ * ]]; then
TRACE_CMD="${TRACE_CMD#python -m }"
# For pytest commands, extract just the pytest part and handle args separately
if [[ "${TRACE_CMD}" == pytest\ * ]]; then
TRACE_CMD="pytest"
fi
fi
if [[ "${TRACE_CMD}" == python3\ ./*.py* ]]; then
SCRIPT_PATH="${TRACE_CMD#python3 }"
SCRIPT_FILE="${SCRIPT_PATH%% *}"
REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
MOD="${SCRIPT_FILE#./}"
MOD="${MOD%.py}"
MOD="${MOD//\//.}"
TRACE_CMD="${MOD}${REST}"
elif [[ "${TRACE_CMD}" == python\ ./*.py* ]]; then
SCRIPT_PATH="${TRACE_CMD#python }"
SCRIPT_FILE="${SCRIPT_PATH%% *}"
REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
MOD="${SCRIPT_FILE#./}"
MOD="${MOD%.py}"
MOD="${MOD//\//.}"
TRACE_CMD="${MOD}${REST}"
fi
# Debug: Show the trace command after processing
echo "Debug: TRACE_CMD for codeflash: '${TRACE_CMD}'"
echo "Installing project dependencies (best-effort)..."
# 1) Install repo requirements first to pin base versions
if [ -f requirements.txt ]; then pip install -r requirements.txt || true; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt || true; fi
if [ -d requirements ]; then
for f in requirements/*.txt; do
[ -f "$f" ] && pip install -r "$f" || true
done
fi
# 2) Only attempt editable install if packaging metadata likely exists
if [ -f pyproject.toml ] || [ -f setup.py ] || [ -f setup.cfg ]; then
EDITABLE_OK=0
if [ -f pyproject.toml ] && grep -qiE "^\s*\[tool\.poetry\]|^\s*\[project\]" pyproject.toml; then
EDITABLE_OK=1
fi
if [ -f setup.py ]; then
EDITABLE_OK=1
fi
if [ -f setup.cfg ] && grep -qiE "^\s*packages\s*=|^\s*package_dir\s*=|^\s*install_requires\s*=" setup.cfg; then
EDITABLE_OK=1
fi
if [ "$EDITABLE_OK" -eq 1 ]; then
echo "Attempting editable install (pip install -e .)..."
if pip install -e .; then
for extra in dev test tests ci all; do
pip install -e ".[${extra}]" || true
done
else
echo "Editable install failed; skipping editable extras and continuing without -e ."
fi
else
echo "Packaging metadata not sufficient; skipping editable install."
fi
fi
# 3) Freeze constraints and then install LLM-specified packages under constraints
if [ -n "${LLM_PIP_PACKAGES:-}" ] && [ "${LLM_PIP_PACKAGES}" != "[]" ]; then
echo "Freezing constraints before LLM package install..."
pip freeze > .cf_constraints.txt || true
echo "Installing LLM-suggested Python packages under constraints: ${LLM_PIP_PACKAGES}"
python - <<'PY'
import os, json, subprocess, sys
pkgs = []
try:
raw = os.environ.get('LLM_PIP_PACKAGES','[]')
pkgs = json.loads(raw)
if not isinstance(pkgs, list):
pkgs = []
except Exception:
pkgs = []
specs = []
for p in pkgs:
if isinstance(p, str) and p.strip():
specs.append(p.strip())
elif isinstance(p, dict) and p.get('name'):
name = str(p['name']).strip()
spec = str(p.get('version_spec') or '').strip()
if name:
specs.append(name + (spec if spec else ''))
if specs:
cmd = [sys.executable, '-m', 'pip', 'install', '--disable-pip-version-check', '-c', '.cf_constraints.txt'] + specs
try:
subprocess.run(cmd, check=False)
except Exception:
pass
PY
fi
# Ensure formatters/tools exist if referenced (check both normalized and original values)
if [[ "$FORMATTER_CMDS_NORM" == *"black "* ]] || [[ "${LOWER_FMT}" == *"black"* ]]; then
pip install black || true
fi
if [[ "$FORMATTER_CMDS_NORM" == *"ruff "* ]] || [[ "${LOWER_FMT}" == *"ruff"* ]]; then
pip install ruff || true
fi
# Install anthropic if key is present to enable Claude Code CLI
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
pip install --upgrade anthropic || true
fi
python -c "import pytest" 2>/dev/null || pip install pytest || true
# Ensure CodeFlash environment is properly set up before tests
echo "Setting up CodeFlash environment..."
if [ -n "${CODEFLASH_API_KEY:-}" ]; then
export CODEFLASH_API_KEY="${CODEFLASH_API_KEY}"
echo "✅ CodeFlash API key is set and exported"
# Verify CodeFlash is accessible
if command -v codeflash >/dev/null 2>&1; then
echo "✅ CodeFlash CLI is available"
else
echo "⚠️ CodeFlash CLI not found, installing..."
pip install codeflash[asyncio] || pip install codeflash || true
fi
# Test CodeFlash connectivity
if command -v codeflash >/dev/null 2>&1; then
echo "Testing CodeFlash connectivity..."
timeout 10 codeflash --version 2>/dev/null && echo "✅ CodeFlash connectivity verified" || echo "⚠️ CodeFlash connectivity test failed"
fi
else
echo "⚠️ CODEFLASH_API_KEY not set - optimization may fail"
fi
# Optional: preflight test run to detect missing modules (with timeout protection)
if [ -d "${TESTS_ROOT_VALUE}" ]; then
echo "Preflight test run to detect missing modules..."
echo "Using timeout protection (30 minutes) to prevent hanging tests..."
_stage "preflight_tests_start"
set +e
# Use timeout to prevent hanging tests
PREFLIGHT_TIMEOUT=1800 # 30 minutes
PRE_RC=124 # Default to timeout
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
echo "Running preflight test: ${PYTEST_CMD_RUN} -q (with timeout ${PREFLIGHT_TIMEOUT}s)"
timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight.out 2>&1
PRE_RC=${PIPESTATUS[0]}
else
echo "Running preflight test: ${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/ (with timeout ${PREFLIGHT_TIMEOUT}s)"
timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/'" >/tmp/preflight.out 2>&1
PRE_RC=${PIPESTATUS[0]}
fi
else
# Non-pytest runner; avoid appending tests path that may be unsupported
echo "Running preflight test: ${PYTEST_CMD_RUN} -q (with timeout ${PREFLIGHT_TIMEOUT}s)"
timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight.out 2>&1
PRE_RC=${PIPESTATUS[0]}
fi
else
echo "Running preflight test: ${PYTEST_CMD_RUN} -q (with timeout ${PREFLIGHT_TIMEOUT}s)"
timeout $PREFLIGHT_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight.out 2>&1
PRE_RC=${PIPESTATUS[0]}
fi
# Log the result
if [ $PRE_RC -eq 124 ]; then
echo "⚠️ Preflight test timed out after ${PREFLIGHT_TIMEOUT} seconds - tests may be hanging or very slow"
echo "Continuing with optimization despite timeout..."
elif [ $PRE_RC -eq 0 ]; then
echo "✅ Preflight test completed successfully"
else
echo "⚠️ Preflight test failed with exit code $PRE_RC - this is expected for some repositories"
fi
_stage "preflight_tests_end" "\"rc\":$PRE_RC"
set -e
# Detect and register unknown pytest marks to avoid collection errors under -Werror
# Why this exists:
# - Some repositories use custom pytest markers (e.g., `@pytest.mark.download`, `@pytest.mark.slow`)
# but forget to register them in their config (pyproject.toml/setup.cfg/pytest.ini).
# - With `-Werror` or strict settings, pytest turns the UnknownMark warning into an error during
# collection, causing the test run to fail before even starting.
#
# What we do:
# 1) We parse the preflight test output (`/tmp/preflight.out`) for lines like:
# "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
# 2) We extract the marker name (e.g., `download`) using `sed` with a capturing group, then de-duplicate
# with `sort -u`.
# 3) If any unknown markers are found, we append a minimal `conftest.py` shim at repo root that registers
# each discovered marker via `config.addinivalue_line("markers", ...)`. This is the official mechanism
# to declare custom markers so pytest accepts them.
#
# Example:
# If preflight output contains multiple instances of:
# "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
# then MARKS will contain `download`, and this block will append a conftest.py snippet like:
# def pytest_configure(config):
# config.addinivalue_line("markers", "download: auto-registered marker")
# After that, subsequent pytest runs will collect tests without failing on the unknown mark.
#
# Notes:
# - We only add to conftest.py; we do NOT overwrite existing content, keeping it non-destructive.
# - If no unknown markers are detected, nothing is changed.
# - This does not alter test behavior; it simply declares markers so pytest wont error on them.
if [ -s /tmp/preflight.out ]; then
MARKS=$(sed -n "s/.*Unknown pytest\.mark\.\([A-Za-z0-9_][A-Za-z0-9_]*\).*/\1/p" /tmp/preflight.out | sort -u)
if [ -n "${MARKS}" ]; then
echo "Detected unknown pytest marks: ${MARKS}"
echo "Auto-registering markers via conftest.py shim..."
(
echo "# Auto-added by optimizer to register pytest markers"
echo "def pytest_configure(config):"
# For each discovered unknown marker (e.g., download, slow, integration), write a declaration line.
# This is equivalent to having `markers = download: ...` in pytest.ini/pyproject.toml.
for m in ${MARKS}; do
echo " config.addinivalue_line(\"markers\", \"${m}: auto-registered marker\")"
done
) >> conftest.py
fi
fi
if [ $PRE_RC -ne 0 ]; then
echo "Analyzing missing module errors..."
# Use more robust error handling to prevent SIGPIPE
MISSING=""
if [ -f /tmp/preflight.out ]; then
MISSING=$(sed -n "s/.*ModuleNotFoundError: No module named '\([^']\+\)'.*/\1/p" /tmp/preflight.out 2>/dev/null | head -20 || true)
if [ -z "$MISSING" ]; then
MISSING=$(sed -n "s/.*ImportError: No module named \([^ ]\+\).*/\1/p" /tmp/preflight.out 2>/dev/null | head -20 || true)
fi
fi
if [ -n "$MISSING" ]; then
echo "Attempting to install missing modules:"
# Use a more robust approach to avoid SIGPIPE issues
while IFS= read -r mod || [ -n "$mod" ]; do
[ -z "$mod" ] && continue
pkg="$mod"
case "$pkg" in
PIL) pkg="Pillow";;
cv2) pkg="opencv-python";;
yaml) pkg="PyYAML";;
skimage) pkg="scikit-image";;
sklearn) pkg="scikit-learn";;
Crypto) pkg="pycryptodome";;
esac
echo " - pip install $pkg"
pip install "$pkg" || true
done <<< "$MISSING"
echo "Re-running preflight tests after installs..."
set +e
# Use timeout for re-running preflight tests to prevent hanging
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
timeout 300 bash -c "eval '${PYTEST_CMD_RUN} -q'" >/tmp/preflight2.out 2>&1 || true
else
timeout 300 bash -c "eval '${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/'" >/tmp/preflight2.out 2>&1 || true
fi
set -e
fi
fi
fi
# Full tests before optimization (with detailed logging)
TEST_LOG_DIR="${TEST_LOG_DIR:-/home/ubuntu/app/logs}"
mkdir -p "$TEST_LOG_DIR"
TS2=$(date -Is | sed 's/[:+]/-/g')
TEST_LOG_FILE="$TEST_LOG_DIR/tests-$TS2.log"
touch "$TEST_LOG_FILE" && chmod 666 "$TEST_LOG_FILE"
ln -sfn "$TEST_LOG_FILE" "$TEST_LOG_DIR/tests.log" || true
_stage "pre_tests_start"
echo "Running pre-optimization tests: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
echo "Using timeout protection (30 minutes) to prevent hanging tests..." | tee -a "$TEST_LOG_FILE"
set +e
# Use timeout to prevent hanging tests
MAIN_TEST_TIMEOUT=1800 # 30 minutes
TEST_RC=124 # Default to timeout
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
# Check if the command already includes the tests directory
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
# Command already includes tests directory, execute as-is
echo "Debug: Executing command as-is (already includes tests dir): ${PYTEST_CMD_RUN} (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
else
# Command doesn't include tests directory, append it
echo "Debug: Appending tests directory: ${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/ (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
fi
else
# Non-pytest runner, execute as-is
echo "Debug: Non-pytest runner, executing as-is: ${PYTEST_CMD_RUN} (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
fi
else
# No tests directory, execute as-is
echo "Debug: No tests directory, executing as-is: ${PYTEST_CMD_RUN} (with timeout ${MAIN_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE"
timeout $MAIN_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
fi
# Log timeout result
if [ $TEST_RC -eq 124 ]; then
echo "⚠️ Pre-optimization tests timed out after ${MAIN_TEST_TIMEOUT} seconds" | tee -a "$TEST_LOG_FILE"
echo "This may indicate hanging tests or very slow test execution" | tee -a "$TEST_LOG_FILE"
fi
# Dynamic fallback for non-pytest runners emitting argparse errors
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
else
pytest -q | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
fi
set -e
echo "Pre-optimization tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
_stage "pre_tests_end" "\"rc\":$TEST_RC"
# Persist exit code early if wrapper provided EXIT_FILE
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
# If tests below threshold, run Claude Code CLI setup loop
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
echo "Evaluating test pass ratio for Claude Code CLI setup gate..." | tee -a "$TEST_LOG_FILE"
# Add timeout protection for test parsing to prevent hanging
echo "DEBUG: Starting test parsing with timeout protection..." | tee -a "$TEST_LOG_FILE"
# Use a subshell with timeout to prevent hanging
(
# Attempt to extract passed/failed/errors from log tail (robust to order)
# Debug: Show what we're looking for in the log
echo "DEBUG: Looking for test summary in log file..." | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Last 10 lines of test log:" | tee -a "$TEST_LOG_FILE"
# Use a temporary file to avoid reading from the same file being written to
TEMP_LOG_TAIL=$(mktemp)
tail -10 "$TEST_LOG_FILE" > "$TEMP_LOG_TAIL" 2>/dev/null || echo "No log content available"
cat "$TEMP_LOG_TAIL" | tee -a "$TEST_LOG_FILE"
rm -f "$TEMP_LOG_TAIL"
# Extract test counts with corrected regex patterns
# Handle different pytest output formats: "X passed, Y failed, Z errors" or "X passed, Y errors"
# Use grep to extract the number immediately before the keyword (handles ANSI codes)
PASSED=$(grep -oE '[0-9]+ passed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
FAILED=$(grep -oE '[0-9]+ failed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
ERRORS=$(grep -oE '[0-9]+ errors' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
# Debug: Show what each regex extracted
echo "DEBUG: Raw extracted values - PASSED='$PASSED' FAILED='$FAILED' ERRORS='$ERRORS'" | tee -a "$TEST_LOG_FILE"
# If no summary line found (interrupted test run), count individual test results
if [ -z "$PASSED" ] && [ -z "$FAILED" ] && [ -z "$ERRORS" ]; then
echo "DEBUG: No test summary found, analyzing test execution..." | tee -a "$TEST_LOG_FILE"
# Check if tests actually ran by looking for common test execution indicators
if grep -q "No such file or directory\|command not found\|make: \*\*\*" "$TEST_LOG_FILE"; then
echo "DEBUG: Detected build/test command failure - tests never executed" | tee -a "$TEST_LOG_FILE"
PASSED=0
FAILED=0
ERRORS=1 # Treat command failure as an error
elif grep -q "PASSED\|FAILED\|SKIPPED" "$TEST_LOG_FILE"; then
echo "DEBUG: Found individual test results, counting them..." | tee -a "$TEST_LOG_FILE"
PASSED=$(grep -c "PASSED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
FAILED=$(grep -c "FAILED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
SKIPPED=$(grep -c "SKIPPED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
ERRORS=0 # Individual test results don't show "errors" - they show as "FAILED"
echo "DEBUG: Individual test counts - PASSED=$PASSED FAILED=$FAILED SKIPPED=$SKIPPED" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: No test execution detected - treating as setup failure" | tee -a "$TEST_LOG_FILE"
PASSED=0
FAILED=0
ERRORS=1 # Treat as setup failure
fi
fi
# Set defaults if extraction failed
PASSED=${PASSED:-0}
FAILED=${FAILED:-0}
ERRORS=${ERRORS:-0}
echo "DEBUG: After extraction and defaults - PASSED=$PASSED FAILED=$FAILED ERRORS=$ERRORS" | tee -a "$TEST_LOG_FILE"
# Calculate total and ratio
TOTAL=$((PASSED + FAILED + ERRORS))
RATIO=0
if [ "$TOTAL" -gt 0 ]; then
RATIO=$(( 100 * PASSED / TOTAL ))
fi
echo "DEBUG: Calculated totals - TOTAL=$TOTAL RATIO=$RATIO%" | tee -a "$TEST_LOG_FILE"
echo "Parsed test summary: passed=$PASSED failed=$FAILED errors=$ERRORS total=$TOTAL ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
# Export variables for use outside the subshell
echo "PASSED=$PASSED" > /tmp/test_parsing_result
echo "FAILED=$FAILED" >> /tmp/test_parsing_result
echo "ERRORS=$ERRORS" >> /tmp/test_parsing_result
echo "TOTAL=$TOTAL" >> /tmp/test_parsing_result
echo "RATIO=$RATIO" >> /tmp/test_parsing_result
) &
PARSE_PID=$!
# Wait for parsing with timeout
if timeout 30 wait $PARSE_PID 2>/dev/null; then
echo "DEBUG: Test parsing completed successfully" | tee -a "$TEST_LOG_FILE"
# Load results from temp file
if [ -f /tmp/test_parsing_result ]; then
source /tmp/test_parsing_result
rm -f /tmp/test_parsing_result
else
echo "DEBUG: No parsing results found, using fallback values" | tee -a "$TEST_LOG_FILE"
PASSED=0
FAILED=0
ERRORS=1
TOTAL=1
RATIO=0
fi
else
echo "DEBUG: Test parsing timed out, using fallback values" | tee -a "$TEST_LOG_FILE"
kill $PARSE_PID 2>/dev/null || true
PASSED=0
FAILED=0
ERRORS=1
TOTAL=1
RATIO=0
fi
echo "Final test parsing results: passed=$PASSED failed=$FAILED errors=$ERRORS total=$TOTAL ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
if [ "$TOTAL" -eq 0 ] || [ "$RATIO" -lt 50 ]; then
echo "Tests below threshold; invoking Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to start Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Current working directory: $(pwd)" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Environment check - ANTHROPIC_API_KEY: $([ -n "${ANTHROPIC_API_KEY:-}" ] && echo "SET" || echo "NOT SET")" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Environment check - GITHUB_TOKEN: $([ -n "${GITHUB_TOKEN:-}" ] && echo "SET" || echo "NOT SET")" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Environment check - CODEFLASH_API_KEY: $([ -n "${CODEFLASH_API_KEY:-}" ] && echo "SET" || echo "NOT SET")" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to call _stage claude_round0_start..." | tee -a "$TEST_LOG_FILE"
_stage "claude_round0_start"
echo "DEBUG: Successfully called _stage claude_round0_start" | tee -a "$TEST_LOG_FILE"
# ============================================================================
# CLAUDE CODE CLI SETUP INTEGRATION
# ============================================================================
# This section replaces the previous Python-based LLM setup helper with
# Claude Code CLI, which provides a more robust and interactive approach to
# repository setup. Claude Code CLI can:
#
# 1. Analyze repository structure and dependencies
# 2. Install missing packages and fix import issues
# 3. Handle custom test runners and build systems
# 4. Iteratively debug and resolve setup problems
# 5. Work directly in the terminal with full context
#
# The integration includes:
# - Comprehensive setup prompts with project context
# - Automatic CLI installation (npm or pip fallback)
# - Timeout protection (30 min initial, 20 min additional rounds)
# - Detailed logging of all Claude actions
# - Graceful fallback if Claude Code CLI is unavailable
# ============================================================================
echo "DEBUG: About to create Claude setup prompt..." | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Checking if /tmp is writable..." | tee -a "$TEST_LOG_FILE"
if [ -w "/tmp" ]; then
echo "DEBUG: /tmp is writable" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: ERROR - /tmp is not writable!" | tee -a "$TEST_LOG_FILE"
fi
# Create comprehensive prompt for Claude Code CLI
echo "DEBUG: Creating prompt file..." | tee -a "$TEST_LOG_FILE"
cat > /tmp/claude_setup_prompt.md << 'EOF'
# Repository Setup Assistant
You are an expert Python developer tasked with setting up a repository for testing. Your goal is to analyze the repository, install missing dependencies, and ensure tests can run successfully.
## Your Mission
1. **Analyze the repository structure** to understand the project layout
2. **Identify and install missing dependencies** that are causing test failures
3. **Fix common setup issues** like import path problems, missing packages, or configuration issues
4. **Run tests** to verify the setup is working
5. **Achieve at least 50% test pass rate** (you don't need 100% - some tests may legitimately fail)
## Available Tools & Capabilities
- **Bash**: Full terminal access in the repository directory with sudo privileges
- **Edit**: Modify any file in the repository (Python, config files, setup files, etc.)
- **FileManager**: Browse, read, and analyze repository structure and files
- **WebSearch**: Look up documentation, package information, and solutions online
- **Python Environment**: Virtual environment activated at `.venv/` with `.venv/bin/python` and `.venv/bin/pip`
- **System Access**: Can install system packages with `sudo apt-get install`
- **Package Management**: Can install Python packages with pip, conda, or other package managers
- **File Operations**: Can create, modify, delete files and directories
- **Process Management**: Can run tests, scripts, and other processes
## Key Guidelines
- **Focus on missing dependencies**: Look for ImportError, ModuleNotFoundError in test outputs
- **Use project's own install scripts** when available (like `devscripts/install_deps.py`, `setup.py`, etc.)
- **Install from requirements files** if they exist (`requirements.txt`, `requirements-dev.txt`, etc.)
- **Check pyproject.toml** for project dependencies and optional dependencies
- **Handle custom test runners**: Some projects use custom test scripts instead of pytest
- **Fix import path issues**: Add PYTHONPATH exports if needed
- **Install test-specific dependencies**: pytest plugins, coverage tools, etc.
## Common Patterns to Handle
1. **Custom dependency installers**: `python devscripts/install_deps.py`, `pip install -e .`
2. **Test runners with special args**: Projects may have `run_tests.py` or similar
3. **Missing test dependencies**: pytest plugins, mock libraries, etc.
4. **Path issues**: Repository modules not in PYTHONPATH
5. **Optional dependencies**: Install extras like `pip install -e .[test]`
## Effective Tool Usage Strategy
- **Start with FileManager**: Explore repository structure to understand the project layout
- **Use WebSearch**: Look up package documentation, installation guides, and common issues
- **Use Edit strategically**: Modify configuration files, fix import paths, update dependencies
- **Use Bash systematically**: Run commands step by step, check outputs, iterate based on results
- **Combine tools**: Use FileManager to find files, WebSearch to understand them, Edit to fix them, Bash to test
## Advanced Troubleshooting Techniques
- **Version conflicts**: Use `pip install --upgrade` or specific versions
- **Environment issues**: Check Python version, virtual environment activation
- **Permission problems**: Use `sudo` for system packages, check file permissions
- **Network issues**: Use `--timeout` flags, check proxy settings
- **Build failures**: Install build tools like `build-essential`, `python3-dev`
- **Missing system libraries**: `sudo apt-get install -y libffi-dev libssl-dev libxml2-dev`
## Success Criteria
- Tests run without ImportError/ModuleNotFoundError
- At least 50% of tests pass (some failures are acceptable)
- No critical setup errors that prevent test execution
## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions
## Current Context
- Repository: {REPO_URL}
- Tests directory: {TESTS_ROOT}
- Test command: {PYTEST_CMD}
- Previous test output shows dependency/setup issues
## Custom Run Configuration (if applicable)
{CUSTOM_RUN_CONTEXT}
## Recent Test Errors (Summary)
{TEST_ERRORS_SUMMARY}
Start by examining the repository structure and recent test failures, then systematically address the issues.
EOF
echo "DEBUG: Prompt file created successfully" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Prompt file size: $(wc -c < /tmp/claude_setup_prompt.md) bytes" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to replace placeholders in prompt..." | tee -a "$TEST_LOG_FILE"
# Replace placeholders in prompt
sed -i "s|{REPO_URL}|${GITHUB_REPO_URL:-unknown}|g" /tmp/claude_setup_prompt.md
sed -i "s|{TESTS_ROOT}|${TESTS_ROOT_VALUE:-}|g" /tmp/claude_setup_prompt.md
sed -i "s|{PYTEST_CMD}|${PYTEST_CMD_VALUE:-}|g" /tmp/claude_setup_prompt.md
# Add custom run context if this is a custom optimization
CUSTOM_RUN_CONTEXT=""
if [ "$CUSTOM_RUN_MODE" = true ]; then
echo "DEBUG: Adding custom run context to Claude prompt..." | tee -a "$TEST_LOG_FILE"
CUSTOM_RUN_CONTEXT="
### Custom Optimization Configuration
- **Optimization Mode**: ${OPTIMIZATION_MODE:-'not specified'}
- **Custom Module Root**: ${CUSTOM_MODULE_ROOT:-'using default'}
- **Custom Tests Root**: ${CUSTOM_TESTS_ROOT:-'using default'}
- **Async Mode**: ${CUSTOM_ASYNC_MODE:-'default'}
- **Verbose Output**: ${CUSTOM_VERBOSE:-'default'}
- **No Pull Request**: ${CUSTOM_NO_PR:-'default'}
- **Benchmark Mode**: ${CUSTOM_BENCHMARK:-'default'}
### Mode-Specific Targets"
case "$OPTIMIZATION_MODE" in
"single_function")
CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
- **Target File**: ${CUSTOM_FILE_PATH:-'not specified'}
- **Target Function**: ${CUSTOM_FUNCTION_NAME:-'not specified'}
- **Focus**: Ensure the target function's dependencies are properly installed
- **Note**: This is a single function optimization targeting a specific function in a specific file"
;;
"trace_and_optimize")
CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
- **Target Script**: ${CUSTOM_SCRIPT_PATH:-'not specified'}
- **Trace File**: ${CUSTOM_TRACE_FILE:-'default location'}
- **Tracer Timeout**: ${CUSTOM_TRACER_TIMEOUT:-'default'}
- **Trace Only**: ${CUSTOM_TRACE_ONLY:-'false'}
- **Focus**: Ensure the script/entry point and its dependencies are ready
- **Note**: This will trace execution of a specific script and optimize all functions it calls"
;;
"optimize_all")
CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
- **Target Directory**: ${CUSTOM_TARGET_DIRECTORY:-'entire codebase'}
- **Focus**: Ensure all project dependencies and test infrastructure are ready
- **Note**: This will analyze and optimize all functions in the project"
;;
esac
CUSTOM_RUN_CONTEXT="${CUSTOM_RUN_CONTEXT}
### Custom Run Setup Requirements
- Pay special attention to any custom configuration requirements for this optimization mode
- Ensure codeflash[asyncio] is installed if async mode is enabled
- Verify that all target files/scripts exist and are accessible
- Install any additional dependencies needed for the specific optimization mode
- Ensure the custom module root and tests root are properly configured"
else
CUSTOM_RUN_CONTEXT="No custom run configuration - using standard optimization setup"
fi
echo "DEBUG: Custom run context prepared, length: ${#CUSTOM_RUN_CONTEXT}" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to build error summary..." | tee -a "$TEST_LOG_FILE"
# Build a short error summary from the current test log (first 30 error lines) - simplified approach
echo "DEBUG: Reading test log file for errors..." | tee -a "$TEST_LOG_FILE"
TEST_ERRORS_SUMMARY=$(head -n 400 "$TEST_LOG_FILE" 2>/dev/null | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30 | sed 's/|/\|/g' | sed 's/\\/\\\\/g' || echo "No errors found")
echo "DEBUG: Error summary extracted, length: ${#TEST_ERRORS_SUMMARY}" | tee -a "$TEST_LOG_FILE"
TEST_ERRORS_SUMMARY=${TEST_ERRORS_SUMMARY:-"No error summary available"}
echo "DEBUG: About to escape newlines..." | tee -a "$TEST_LOG_FILE"
# Escape newlines for sed replacement (simplified approach)
TEST_ERRORS_SUMMARY=$(echo "$TEST_ERRORS_SUMMARY" | tr '\n' '|' | sed 's/|/\\n/g')
echo "DEBUG: About to replace TEST_ERRORS_SUMMARY placeholder..." | tee -a "$TEST_LOG_FILE"
sed -i "s|{TEST_ERRORS_SUMMARY}|${TEST_ERRORS_SUMMARY}|g" /tmp/claude_setup_prompt.md
echo "DEBUG: About to replace CUSTOM_RUN_CONTEXT placeholder..." | tee -a "$TEST_LOG_FILE"
# Escape newlines for sed replacement
CUSTOM_RUN_CONTEXT_ESCAPED=$(echo "$CUSTOM_RUN_CONTEXT" | tr '\n' '|' | sed 's/|/\\n/g')
sed -i "s|{CUSTOM_RUN_CONTEXT}|${CUSTOM_RUN_CONTEXT_ESCAPED}|g" /tmp/claude_setup_prompt.md
echo "DEBUG: Placeholder replacement completed" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to snapshot environment..." | tee -a "$TEST_LOG_FILE"
# Snapshot current environment into constraints to avoid breaking pinned deps
pip freeze > .cf_constraints.txt || true
echo "DEBUG: Environment snapshot completed" | tee -a "$TEST_LOG_FILE"
export PIP_CONSTRAINTS="$(pwd)/.cf_constraints.txt"
# If common system deps are missing based on errors, try lightweight installs (best-effort)
if grep -q "libGL.so.1" "$TEST_LOG_FILE" 2>/dev/null; then
echo "Detected missing libGL.so.1; installing headless OpenGL libs (libgl1, libglib2.0-0, libsm6, libxrender1, libxext6)..." | tee -a "$TEST_LOG_FILE"
# Handle APT lock issues by waiting and retrying
APT_RETRY_COUNT=0
APT_MAX_RETRIES=3
while [ $APT_RETRY_COUNT -lt $APT_MAX_RETRIES ]; do
if sudo apt-get update -y >/dev/null 2>&1; then
echo "APT update succeeded on attempt $((APT_RETRY_COUNT + 1))" | tee -a "$TEST_LOG_FILE"
break
else
APT_RETRY_COUNT=$((APT_RETRY_COUNT + 1))
echo "APT update failed on attempt $APT_RETRY_COUNT, retrying in 10 seconds..." | tee -a "$TEST_LOG_FILE"
sleep 10
fi
done
# Install packages with retry logic
APT_RETRY_COUNT=0
while [ $APT_RETRY_COUNT -lt $APT_MAX_RETRIES ]; do
if sudo apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 >/dev/null 2>&1; then
echo "OpenGL libraries installed successfully on attempt $((APT_RETRY_COUNT + 1))" | tee -a "$TEST_LOG_FILE"
break
else
APT_RETRY_COUNT=$((APT_RETRY_COUNT + 1))
echo "OpenGL library installation failed on attempt $APT_RETRY_COUNT, retrying in 10 seconds..." | tee -a "$TEST_LOG_FILE"
sleep 10
fi
done
# As a fallback in headless environments, prefer opencv-python-headless to avoid GUI backends
if pip show opencv-python >/dev/null 2>&1; then
echo "Installing opencv-python-headless as fallback for headless environment" | tee -a "$TEST_LOG_FILE"
pip install --upgrade opencv-python-headless || true
fi
fi
if grep -q "No module named 'tkinter'" "$TEST_LOG_FILE" 2>/dev/null; then
echo "Detected missing tkinter; installing Python Tk..." | tee -a "$TEST_LOG_FILE"
sudo apt-get update -y >/dev/null 2>&1 || true
sudo apt-get install -y --no-install-recommends python3-tk || true
fi
if grep -q "cannot import name 'Aer' from 'qiskit'" "$TEST_LOG_FILE" 2>/dev/null; then
echo "Detected missing qiskit-aer; installing..." | tee -a "$TEST_LOG_FILE"
pip install qiskit-aer || true
fi
# Run Claude Code CLI with the setup prompt
echo "DEBUG: About to start Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
echo "Starting Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to call _stage claude_setup_init..." | tee -a "$TEST_LOG_FILE"
_stage "claude_setup_init"
echo "DEBUG: Successfully called _stage claude_setup_init" | tee -a "$TEST_LOG_FILE"
# Prepare a session log regardless of availability so FE always finds a file
echo "DEBUG: About to create Claude log file..." | tee -a "$TEST_LOG_FILE"
CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-$(date -u +%Y-%m-%dT%H-%M-%S).log"
echo "DEBUG: Claude log path: $CLAUDE_LOG" | tee -a "$TEST_LOG_FILE"
touch "$CLAUDE_LOG" 2>/dev/null || true
echo "DEBUG: Claude log file created" | tee -a "$TEST_LOG_FILE"
chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
echo "DEBUG: Claude log file permissions set" | tee -a "$TEST_LOG_FILE"
echo "[claude] initializing setup session" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Initial log message written" | tee -a "$TEST_LOG_FILE"
# Ensure common local bin directory is in PATH (curl installer often writes here)
echo "DEBUG: About to update PATH..." | tee -a "$TEST_LOG_FILE"
export PATH="$HOME/.local/bin:$PATH"
echo "DEBUG: PATH updated, current PATH: $PATH" | tee -a "$TEST_LOG_FILE"
# Check if claude (Claude Code CLI) is available, or fallback to npx runner
echo "DEBUG: About to check for Claude Code CLI availability..." | tee -a "$TEST_LOG_FILE"
CLAUDE_CMD=""
echo "Checking for Claude Code CLI availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Check for global claude command first (should be pre-installed)
echo "DEBUG: Checking for global claude command..." | tee -a "$TEST_LOG_FILE"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found global claude CLI at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Found global claude command" | tee -a "$TEST_LOG_FILE"
# Test if it's actually Claude Code CLI
echo "DEBUG: Testing if global claude is Claude Code CLI..." | tee -a "$TEST_LOG_FILE"
if timeout 10 claude --version 2>&1 | grep -q "Claude Code"; then
echo "Confirmed: Global claude is Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Confirmed global claude is Claude Code CLI" | tee -a "$TEST_LOG_FILE"
else
echo "Warning: Global claude may not be Claude Code CLI, will try npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Global claude is not Claude Code CLI, clearing CLAUDE_CMD" | tee -a "$TEST_LOG_FILE"
CLAUDE_CMD=""
fi
else
echo "DEBUG: Global claude command not found" | tee -a "$TEST_LOG_FILE"
fi
# Check for npx if claude not found or not confirmed
echo "DEBUG: Checking for npx command..." | tee -a "$TEST_LOG_FILE"
if [ -z "$CLAUDE_CMD" ] && command -v npx >/dev/null 2>&1; then
CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
echo "Will use npx runner for Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Found npx command, set CLAUDE_CMD to npx" | tee -a "$TEST_LOG_FILE"
# Test npx availability
echo "DEBUG: Testing npx availability..." | tee -a "$TEST_LOG_FILE"
if timeout 10 npx --version >/dev/null 2>&1; then
echo "npx is available and working" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: npx is working correctly" | tee -a "$TEST_LOG_FILE"
else
echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: npx test failed, clearing CLAUDE_CMD" | tee -a "$TEST_LOG_FILE"
CLAUDE_CMD=""
fi
else
echo "DEBUG: npx not found or CLAUDE_CMD already set" | tee -a "$TEST_LOG_FILE"
fi
if [ -z "$CLAUDE_CMD" ]; then
echo "DEBUG: CLAUDE_CMD is empty, attempting installation..." | tee -a "$TEST_LOG_FILE"
echo "Claude Code CLI not found; attempting installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try npm global install first (with proper permissions)
echo "DEBUG: Checking for npm command..." | tee -a "$TEST_LOG_FILE"
if command -v npm >/dev/null 2>&1; then
echo "DEBUG: Found npm command" | tee -a "$TEST_LOG_FILE"
echo "Installing @anthropic-ai/claude-code via npm..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try with sudo first (for system-wide install)
if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm install with sudo succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm install with sudo failed, trying user-level install..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try user-level install (no sudo)
if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm user-level install succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after user-level install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm user-level install also failed, will rely on npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
fi
# If still no claude, try installing Node.js
if [ -z "$CLAUDE_CMD" ] && command -v apt-get >/dev/null 2>&1; then
echo "Installing Node.js LTS to enable Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
sudo apt-get update -y >/dev/null 2>&1 || true
# Install Node.js repository
if command -v curl >/dev/null 2>&1; then
echo "Adding Node.js repository..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - 2>&1 | tee -a "$CLAUDE_LOG" || true
fi
# Install Node.js
echo "Installing Node.js..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if sudo apt-get install -y nodejs 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Node.js installation succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try npm install again with proper permission handling
if command -v npm >/dev/null 2>&1; then
echo "Retrying npm install after Node.js installation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try with sudo first
if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm install with sudo succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after Node.js + npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm install with sudo failed after Node.js install, trying user-level..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try user-level install
if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm user-level install succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after Node.js + user-level npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm user-level install also failed after Node.js install, will use npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
fi
else
echo "Node.js installation failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
# Final check for available commands
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after installation at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
elif command -v npx >/dev/null 2>&1; then
CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
echo "Will use npx runner for Claude Code CLI after installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test npx availability with a simple command
echo "Testing npx availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 30 npx --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test if we can actually run the Claude Code CLI via npx
echo "Testing Claude Code CLI via npx..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 60 npx -y @anthropic-ai/claude-code --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude Code CLI via npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Warning: Claude Code CLI via npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This could be due to network issues or package availability" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
fi
# Guard: if still unavailable, skip gracefully
echo "DEBUG: Final check - CLAUDE_CMD value: '${CLAUDE_CMD:-EMPTY}'" | tee -a "$TEST_LOG_FILE"
if [ -z "$CLAUDE_CMD" ]; then
echo "DEBUG: CLAUDE_CMD is still empty, skipping setup assistance" | tee -a "$TEST_LOG_FILE"
echo "❌ Claude Code CLI unavailable; skipping setup assistance" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "All installation attempts failed:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Global npm install failed (permission issues)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - User-level npm install failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - npx fallback not available" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Continuing without Claude Code CLI assistance..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to call _stage claude_round0_end with unavailable flag..." | tee -a "$TEST_LOG_FILE"
_stage "claude_round0_end" "\"rc\":127,\"unavailable\":true"
echo "DEBUG: Successfully called _stage claude_round0_end" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: CLAUDE_CMD is set, proceeding with Claude Code CLI execution" | tee -a "$TEST_LOG_FILE"
echo "✅ Claude Code CLI is available and ready to use" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Using command: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Ensure Claude Code CLI is authenticated (headless)
echo "DEBUG: About to check ANTHROPIC_API_KEY..." | tee -a "$TEST_LOG_FILE"
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
echo "DEBUG: ANTHROPIC_API_KEY is not set" | tee -a "$TEST_LOG_FILE"
echo "ANTHROPIC_API_KEY not set; Claude CLI may fail to authenticate" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Setting ANTHROPIC_API_KEY environment variable for Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
echo "DEBUG: Set ANTHROPIC_API_KEY to empty value" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: ANTHROPIC_API_KEY is set" | tee -a "$TEST_LOG_FILE"
echo "ANTHROPIC_API_KEY is set; configuring Claude CLI authentication..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Best-effort non-interactive auth via config (ignore failures)
echo "Attempting to set API key via Claude CLI config..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to run Claude CLI config command..." | tee -a "$TEST_LOG_FILE"
# Note: API key is passed to Claude CLI config but not logged for security
if timeout 10 $CLAUDE_CMD config set api_key "${ANTHROPIC_API_KEY}" 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude CLI API key configuration succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Claude CLI config command succeeded" | tee -a "$TEST_LOG_FILE"
else
echo "Claude CLI API key configuration failed, will rely on environment variable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Claude CLI config command failed" | tee -a "$TEST_LOG_FILE"
fi
# Also set as environment variable as backup
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY}"
echo "DEBUG: Exported ANTHROPIC_API_KEY as environment variable" | tee -a "$TEST_LOG_FILE"
fi
# Run Claude Code CLI with the setup prompt using print mode for automation
echo "DEBUG: About to run Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
echo "Running Claude Code CLI setup session using: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "=== CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to display prompt content..." | tee -a "$TEST_LOG_FILE"
cat /tmp/claude_setup_prompt.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "=== END CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Prompt content displayed successfully" | tee -a "$TEST_LOG_FILE"
REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"
echo "DEBUG: Set REPO_DIR to: $REPO_DIR" | tee -a "$TEST_LOG_FILE"
# Pre-flight checks for Claude Code CLI
echo "DEBUG: About to perform pre-flight checks..." | tee -a "$TEST_LOG_FILE"
echo "Performing pre-flight checks for Claude Code CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Check if ANTHROPIC_API_KEY is set
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
echo "❌ ANTHROPIC_API_KEY is not set - Claude Code CLI will fail" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Setting a placeholder API key to prevent immediate crash..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Note: Placeholder key is set but not logged for security
export ANTHROPIC_API_KEY="placeholder-key-for-testing"
else
echo "✅ ANTHROPIC_API_KEY is set" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Check if repository directory exists and is writable
if [ -d "$REPO_DIR" ]; then
if [ -w "$REPO_DIR" ]; then
echo "✅ Repository directory $REPO_DIR exists and is writable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "⚠️ Repository directory $REPO_DIR exists but is not writable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Attempting to fix permissions..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
chmod -R 755 "$REPO_DIR" 2>/dev/null || true
fi
else
echo "❌ Repository directory $REPO_DIR does not exist" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Creating directory..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
mkdir -p "$REPO_DIR" 2>/dev/null || true
fi
# Check Claude Code CLI installation
echo "Testing Claude Code CLI installation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if command -v "$CLAUDE_CMD" >/dev/null 2>&1; then
echo "✅ Claude Code CLI command found: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "❌ Claude Code CLI command not found: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This will likely cause immediate failure" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Build Claude Code CLI command with proper arguments
# Note: Claude Code CLI uses different argument structure than expected
CLAUDE_BASE_ARGS=""
# Add model if specified
if [ -n "${ANTHROPIC_MODEL:-}" ]; then
CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --model ${ANTHROPIC_MODEL}"
fi
# Add directory if it exists
if [ -d "$REPO_DIR" ]; then
CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --add-dir $REPO_DIR"
fi
# Set up different flag combinations for different CLI versions
CLAUDE_FLAGS_PERM="$CLAUDE_BASE_ARGS --print --output-format text --max-turns 60 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit,FileManager,WebSearch"
CLAUDE_FLAGS_MIN="$CLAUDE_BASE_ARGS --print --output-format text --max-turns 60"
CLAUDE_FLAGS_BASIC="$CLAUDE_BASE_ARGS --print --output-format text"
echo "Executing: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md" | sed 's/ */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test Claude CLI is working with a simple command first
echo "Testing Claude CLI availability (version)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Handle npx differently as it may take longer to download and run
if [[ "$CLAUDE_CMD" == npx* ]]; then
echo "Testing npx-based Claude CLI (may take longer for first run)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 120 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude CLI via npx version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Claude CLI via npx version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This is common for npx on first run due to package download time" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
if timeout 30 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude CLI version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Claude CLI version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
# Use timeout to enforce a hard limit; wait synchronously so tests run after it finishes/timeout
echo "DEBUG: About to set error handling flags..." | tee -a "$TEST_LOG_FILE"
set +e # Don't exit on failure
set -o pipefail
echo "DEBUG: Error handling flags set" | tee -a "$TEST_LOG_FILE"
# Try with full permissions first (pipe prompt via stdin to avoid argument parsing issues)
echo "DEBUG: About to attempt Claude CLI execution..." | tee -a "$TEST_LOG_FILE"
echo "Attempting Claude CLI execution with elevated permissions (non-interactive)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_EXIT_CODE=1
echo "DEBUG: Set CLAUDE_EXIT_CODE to 1" | tee -a "$TEST_LOG_FILE"
# First attempt: Try with full permissions
# Use longer timeout for npx as it may need to download packages
echo "DEBUG: Setting timeout duration..." | tee -a "$TEST_LOG_FILE"
TIMEOUT_DURATION=2700
if [[ "$CLAUDE_CMD" == npx* ]]; then
TIMEOUT_DURATION=3600 # 60 minutes for npx (includes download time)
echo "Using extended timeout (60 min) for npx-based Claude CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Set extended timeout for npx" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: Using standard timeout (45 min)" | tee -a "$TEST_LOG_FILE"
fi
echo "DEBUG: Final timeout duration: $TIMEOUT_DURATION seconds" | tee -a "$TEST_LOG_FILE"
# Create a debug log file for Claude Code CLI
echo "DEBUG: About to create Claude debug log..." | tee -a "$TEST_LOG_FILE"
CLAUDE_DEBUG_LOG="/tmp/claude_debug_$(date +%s).log"
echo "Creating Claude Code CLI debug log: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: Claude debug log path: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE"
# Pre-execution environment validation
echo "🔍 Pre-execution environment validation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Working directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Python version: $(python --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Pip version: $(pip --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Virtualenv version: $(virtualenv --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Claude CLI version: $($CLAUDE_CMD --version 2>&1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Available disk space: $(df -h . | tail -1)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Memory usage: $(free -h | grep Mem)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Repository size: $(du -sh $REPO_DIR 2>/dev/null || echo 'Unknown')" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
echo "DEBUG: Repository directory exists, executing Claude CLI in repo dir..." | tee -a "$TEST_LOG_FILE"
echo "Executing Claude CLI in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Command: ( cd \"$REPO_DIR\" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md )" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to execute Claude CLI command..." | tee -a "$TEST_LOG_FILE"
# Execute with detailed error capture
{
echo "=== CLAUDE CODE CLI EXECUTION START ===" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Working directory: $REPO_DIR" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Command: $CLAUDE_CMD $CLAUDE_FLAGS_PERM" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Environment variables:" | tee -a "$CLAUDE_DEBUG_LOG"
_safe_log_env "(ANTHROPIC|PATH|NODE)" "$CLAUDE_DEBUG_LOG"
echo "=== EXECUTION OUTPUT ===" | tee -a "$CLAUDE_DEBUG_LOG"
echo "DEBUG: Starting Claude CLI execution..." | tee -a "$TEST_LOG_FILE"
( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
CLI_STATUS=${PIPESTATUS[0]}
echo "DEBUG: Claude CLI execution completed with exit code: $CLI_STATUS" | tee -a "$TEST_LOG_FILE"
echo "=== EXECUTION END ===" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Exit code: $CLI_STATUS" | tee -a "$CLAUDE_DEBUG_LOG"
} | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "DEBUG: Repository directory does not exist, executing Claude CLI in current directory..." | tee -a "$TEST_LOG_FILE"
echo "Executing Claude CLI in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Command: timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to execute Claude CLI command in current directory..." | tee -a "$TEST_LOG_FILE"
# Execute with detailed error capture
{
echo "=== CLAUDE CODE CLI EXECUTION START ===" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Working directory: $(pwd)" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Command: $CLAUDE_CMD $CLAUDE_FLAGS_PERM" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Environment variables:" | tee -a "$CLAUDE_DEBUG_LOG"
_safe_log_env "(ANTHROPIC|PATH|NODE)" "$CLAUDE_DEBUG_LOG"
echo "=== EXECUTION OUTPUT ===" | tee -a "$CLAUDE_DEBUG_LOG"
echo "DEBUG: Starting Claude CLI execution in current directory..." | tee -a "$TEST_LOG_FILE"
timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
CLI_STATUS=${PIPESTATUS[0]}
echo "DEBUG: Claude CLI execution completed with exit code: $CLI_STATUS" | tee -a "$TEST_LOG_FILE"
echo "=== EXECUTION END ===" | tee -a "$CLAUDE_DEBUG_LOG"
echo "Exit code: $CLI_STATUS" | tee -a "$CLAUDE_DEBUG_LOG"
} | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Ensure CLI_STATUS is always defined to prevent unbound variable error
echo "DEBUG: About to ensure CLI_STATUS is defined..." | tee -a "$TEST_LOG_FILE"
CLI_STATUS=${CLI_STATUS:-1}
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "DEBUG: CLI_STATUS set to: $CLI_STATUS" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: CLAUDE_EXIT_CODE set to: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE"
echo "Claude CLI attempt 1 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Enhanced error handling and retry logic
echo "DEBUG: About to check Claude CLI exit code..." | tee -a "$TEST_LOG_FILE"
if [ $CLAUDE_EXIT_CODE -ne 0 ]; then
echo "DEBUG: Claude CLI failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE"
echo "❌ Claude CLI attempt 1 failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Analyze failure and apply fixes
echo "🔍 Analyzing failure and applying fixes..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Check for common failure patterns and fix them
if grep -q "Permission denied" "$CLAUDE_DEBUG_LOG"; then
echo "🔧 Fixing permission issues..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
chmod -R 755 "$REPO_DIR" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
fi
if grep -q "No space left" "$CLAUDE_DEBUG_LOG"; then
echo "🔧 Cleaning up disk space..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip cache purge 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
rm -rf /tmp/* 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
fi
# Retry with different approach
echo "🔄 Retrying Claude CLI with alternative configuration..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Second attempt with reduced complexity
CLAUDE_FLAGS_RETRY="--model claude-3-5-haiku-20241022 --add-dir \"$REPO_DIR\" --print --output-format text --max-turns 40 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit,FileManager"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_RETRY < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_RETRY < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 2 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ $CLAUDE_EXIT_CODE -ne 0 ]; then
echo "❌ Claude CLI attempt 2 also failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Final retry with minimal configuration
echo "🔄 Final retry with minimal Claude CLI configuration..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_FLAGS_MINIMAL="--model claude-3-5-haiku-20241022 --add-dir \"$REPO_DIR\" --print --output-format text --max-turns 20 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout 900 $CLAUDE_CMD $CLAUDE_FLAGS_MINIMAL < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout 900 $CLAUDE_CMD $CLAUDE_FLAGS_MINIMAL < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$CLAUDE_DEBUG_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 3 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ $CLAUDE_EXIT_CODE -ne 0 ]; then
echo "❌ All Claude CLI attempts failed. Final exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Provide detailed failure analysis
echo "📊 Claude CLI Failure Analysis:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Repository: $GITHUB_REPO_URL" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Test command: $PYTEST_CMD_VALUE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Working directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Claude command: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Debug log: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "✅ Claude Code CLI session finished successfully on final retry" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "✅ Claude Code CLI session finished successfully on retry" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "✅ Claude Code CLI session finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Second attempt: Try with fewer flags if first attempt failed
if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 2 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Third attempt: Try with minimal flags if second attempt failed
if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 3 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
_stage "claude_round0_end" "\"rc\":$CLAUDE_EXIT_CODE"
set -e
trap - ERR
if [ $CLAUDE_EXIT_CODE -eq 0 ]; then
echo "✅ Claude Code CLI session finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Claude CLI completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
elif [ $CLAUDE_EXIT_CODE -eq 124 ]; then
TIMEOUT_MINUTES=$((TIMEOUT_DURATION / 60))
echo "⏰ Claude Code CLI session timed out after $TIMEOUT_DURATION seconds ($TIMEOUT_MINUTES minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This is normal for complex setup tasks; continuing with post-CLAUDE tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "❌ Claude Code CLI session failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Debug log available at: $CLAUDE_DEBUG_LOG" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Possible causes:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Virtualenv version conflicts (like the one detected earlier)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Attempt to fix virtualenv issue if detected
if grep -q "virtualenv.*missing.*requires" "$TEST_LOG_FILE" || grep -q "virtualenv.*20\." "$TEST_LOG_FILE"; then
echo "🔧 Detected virtualenv version conflict - attempting to fix..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Installing compatible virtualenv version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try multiple approaches to fix virtualenv
echo "Attempting to downgrade virtualenv to compatible version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip install "virtualenv<20.26.3" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
# Try uninstalling and reinstalling
echo "Attempting to reinstall virtualenv..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip uninstall -y virtualenv 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
pip install "virtualenv<20.26.3" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
# Try installing specific version that works with tox
echo "Attempting to install specific virtualenv version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip install "virtualenv==20.26.2" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
echo "Virtualenv fix attempt completed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Attempt to fix missing command issues
if grep -q "command not found" "$TEST_LOG_FILE"; then
echo "🔧 Detected missing command - attempting to fix..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Check for specific missing commands and install them
if grep -q "bench: command not found" "$TEST_LOG_FILE"; then
echo "Installing frappe-bench..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip install frappe-bench 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
# Also try installing frappe-bench via system package manager
echo "Attempting to install frappe-bench via system package manager..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
sudo apt-get update -y >/dev/null 2>&1 || true
sudo apt-get install -y frappe-bench 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
# Try installing frappe-bench via pip with specific version
echo "Attempting to install frappe-bench with specific version..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip install "frappe-bench>=5.0.0" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
fi
if grep -q "tox: command not found" "$TEST_LOG_FILE"; then
echo "Installing tox..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
pip install tox 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
fi
# Generic missing command handler
MISSING_CMD=$(grep "command not found" "$TEST_LOG_FILE" | head -1 | awk '{print $1}' | sed 's/://')
if [ -n "$MISSING_CMD" ] && [ "$MISSING_CMD" != "bench" ] && [ "$MISSING_CMD" != "tox" ]; then
echo "Attempting to install missing command: $MISSING_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try pip first
pip install "$MISSING_CMD" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
# Try system package manager
sudo apt-get update -y >/dev/null 2>&1 || true
sudo apt-get install -y "$MISSING_CMD" 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG" || true
fi
echo "Missing command fix attempt completed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
echo "Continuing with post-CLAUDE tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
echo "DEBUG: About to clean up prompt file..." | tee -a "$TEST_LOG_FILE"
# Clean up prompt file
rm -f /tmp/claude_setup_prompt.md
echo "DEBUG: Prompt file cleaned up" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to start post-LLM tests..." | tee -a "$TEST_LOG_FILE"
_stage "post_llm_tests_start"
echo "Re-running full tests after Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Using timeout protection (30 minutes) to prevent hanging tests..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to set error handling for post-LLM tests..." | tee -a "$TEST_LOG_FILE"
set +e
# Use timeout to prevent hanging tests
echo "DEBUG: Setting post-LLM test timeout..." | tee -a "$TEST_LOG_FILE"
POST_LLM_TEST_TIMEOUT=1800 # 30 minutes
TEST_RC=124 # Default to timeout
echo "DEBUG: Post-LLM test timeout: $POST_LLM_TEST_TIMEOUT seconds" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to check tests directory..." | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
echo "DEBUG: Tests directory exists: ${TESTS_ROOT_VALUE}" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to check if command is pytest runner..." | tee -a "$TEST_LOG_FILE"
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
echo "DEBUG: Command is pytest runner" | tee -a "$TEST_LOG_FILE"
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
echo "DEBUG: Command already includes tests directory" | tee -a "$TEST_LOG_FILE"
echo "Running post-LLM test: ${PYTEST_CMD_RUN} (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to execute post-LLM test..." | tee -a "$TEST_LOG_FILE"
timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: Command does not include tests directory, appending it" | tee -a "$TEST_LOG_FILE"
echo "Running post-LLM test: ${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/ (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to execute post-LLM test with tests directory..." | tee -a "$TEST_LOG_FILE"
timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
fi
else
echo "DEBUG: Command is not pytest runner" | tee -a "$TEST_LOG_FILE"
echo "Running post-LLM test: ${PYTEST_CMD_RUN} (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to execute post-LLM test as-is..." | tee -a "$TEST_LOG_FILE"
timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
fi
else
echo "DEBUG: Tests directory does not exist" | tee -a "$TEST_LOG_FILE"
echo "Running post-LLM test: ${PYTEST_CMD_RUN} (with timeout ${POST_LLM_TEST_TIMEOUT}s)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "DEBUG: About to execute post-LLM test without tests directory..." | tee -a "$TEST_LOG_FILE"
timeout $POST_LLM_TEST_TIMEOUT bash -c "eval '${PYTEST_CMD_RUN}'" | tee -a "$TEST_LOG_FILE"
TEST_RC=${PIPESTATUS[0]}
echo "DEBUG: Post-LLM test completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
fi
# Log timeout result
echo "DEBUG: About to check for timeout..." | tee -a "$TEST_LOG_FILE"
if [ $TEST_RC -eq 124 ]; then
echo "DEBUG: Post-LLM tests timed out" | tee -a "$TEST_LOG_FILE"
echo "⚠️ Post-LLM tests timed out after ${POST_LLM_TEST_TIMEOUT} seconds" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This may indicate hanging tests or very slow test execution" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "DEBUG: Post-LLM tests did not timeout" | tee -a "$TEST_LOG_FILE"
fi
echo "DEBUG: About to check for argparse errors..." | tee -a "$TEST_LOG_FILE"
# Fallback if run_tests.py argparse error persists
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
echo "DEBUG: Detected argparse error, falling back to pytest" | tee -a "$TEST_LOG_FILE"
echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
else
pytest -q | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
echo "DEBUG: Fallback pytest completed with exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: No argparse errors detected" | tee -a "$TEST_LOG_FILE"
fi
echo "DEBUG: About to set error handling..." | tee -a "$TEST_LOG_FILE"
set -e
echo "DEBUG: Error handling set" | tee -a "$TEST_LOG_FILE"
echo "Post-LLM tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: About to call _stage post_llm_tests_end..." | tee -a "$TEST_LOG_FILE"
_stage "post_llm_tests_end" "\"rc\":$TEST_RC"
echo "DEBUG: Successfully called _stage post_llm_tests_end" | tee -a "$TEST_LOG_FILE"
# Persist exit code after post-LLM run
echo "DEBUG: About to persist exit code..." | tee -a "$TEST_LOG_FILE"
if [ -n "${EXIT_FILE:-}" ]; then
echo "DEBUG: EXIT_FILE is set, writing exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true
else
echo "DEBUG: EXIT_FILE is not set" | tee -a "$TEST_LOG_FILE"
fi
echo "DEBUG: Exit code persistence completed" | tee -a "$TEST_LOG_FILE"
# If conftest import path mismatch detected, enable importlib mode for next runs
if grep -q "ImportPathMismatchError: ('.*conftest'" "$TEST_LOG_FILE"; then
echo "Detected conftest import path mismatch; enabling --import-mode=importlib for subsequent pytest runs" | tee -a "$TEST_LOG_FILE"
export PYTEST_ADDOPTS="--import-mode=importlib ${PYTEST_ADDOPTS:-}"
fi
# Re-evaluate pass ratio; if still below threshold, run additional setup rounds
echo "DEBUG: About to re-evaluate pass ratio..." | tee -a "$TEST_LOG_FILE"
SETUP_MAX_ROUNDS=${LLM_SETUP_MAX_ROUNDS:-2}
ROUND=0
echo "DEBUG: SETUP_MAX_ROUNDS: $SETUP_MAX_ROUNDS" | tee -a "$TEST_LOG_FILE"
echo "DEBUG: Starting additional rounds loop..." | tee -a "$TEST_LOG_FILE"
while : ; do
echo "DEBUG: Evaluating pass ratio for round $ROUND..." | tee -a "$TEST_LOG_FILE"
# Add timeout protection for test parsing to prevent hanging
echo "DEBUG: Starting test parsing with timeout protection for round $ROUND..." | tee -a "$TEST_LOG_FILE"
# Use a subshell with timeout to prevent hanging
(
# Extract test counts with corrected regex patterns
# Use grep to extract the number immediately before the keyword (handles ANSI codes)
PASSED=$(grep -oE '[0-9]+ passed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
FAILED=$(grep -oE '[0-9]+ failed' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
ERRORS=$(grep -oE '[0-9]+ errors' "$TEST_LOG_FILE" | tail -n1 | grep -oE '[0-9]+')
# Debug: Show what each regex extracted
echo "DEBUG: Raw extracted values for round $ROUND - PASSED='$PASSED' FAILED='$FAILED' ERRORS='$ERRORS'" | tee -a "$TEST_LOG_FILE"
# If no summary line found (interrupted test run), count individual test results
if [ -z "$PASSED" ] && [ -z "$FAILED" ] && [ -z "$ERRORS" ]; then
echo "DEBUG: No test summary found for round $ROUND, analyzing test execution..." | tee -a "$TEST_LOG_FILE"
# Check if tests actually ran by looking for common test execution indicators
if grep -q "No such file or directory\|command not found\|make: \*\*\*\|ERROR collecting\|Interrupted:" "$TEST_LOG_FILE"; then
echo "DEBUG: Detected build/test command failure for round $ROUND - tests never executed properly" | tee -a "$TEST_LOG_FILE"
PASSED=0
FAILED=0
ERRORS=1 # Treat command failure as an error
elif grep -q "PASSED\|FAILED\|SKIPPED" "$TEST_LOG_FILE"; then
echo "DEBUG: Found individual test results for round $ROUND, counting them..." | tee -a "$TEST_LOG_FILE"
PASSED=$(grep -c "PASSED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
FAILED=$(grep -c "FAILED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
SKIPPED=$(grep -c "SKIPPED" "$TEST_LOG_FILE" 2>/dev/null || echo "0")
ERRORS=0 # Individual test results don't show "errors" - they show as "FAILED"
echo "DEBUG: Individual test counts for round $ROUND - PASSED=$PASSED FAILED=$FAILED SKIPPED=$SKIPPED" | tee -a "$TEST_LOG_FILE"
else
echo "DEBUG: No test execution detected for round $ROUND - treating as setup failure" | tee -a "$TEST_LOG_FILE"
PASSED=0
FAILED=0
ERRORS=1 # Treat as setup failure
fi
fi
PASSED=${PASSED:-0}
FAILED=${FAILED:-0}
ERRORS=${ERRORS:-0}
TOTAL=$((PASSED + FAILED + ERRORS))
RATIO=0
if [ "$TOTAL" -gt 0 ]; then
RATIO=$(( 100 * PASSED / TOTAL ))
fi
echo "DEBUG: Calculated totals for round $ROUND - TOTAL=$TOTAL RATIO=$RATIO%" | tee -a "$TEST_LOG_FILE"
# Export variables for use outside the subshell
echo "PASSED=$PASSED" > /tmp/test_parsing_result_round_$ROUND
echo "FAILED=$FAILED" >> /tmp/test_parsing_result_round_$ROUND
echo "ERRORS=$ERRORS" >> /tmp/test_parsing_result_round_$ROUND
echo "TOTAL=$TOTAL" >> /tmp/test_parsing_result_round_$ROUND
echo "RATIO=$RATIO" >> /tmp/test_parsing_result_round_$ROUND
) &
PARSE_PID=$!
# Wait for parsing with timeout
if timeout 30 wait $PARSE_PID 2>/dev/null; then
echo "DEBUG: Test parsing completed successfully for round $ROUND" | tee -a "$TEST_LOG_FILE"
# Load results from temp file
if [ -f /tmp/test_parsing_result_round_$ROUND ]; then
source /tmp/test_parsing_result_round_$ROUND
rm -f /tmp/test_parsing_result_round_$ROUND
else
echo "DEBUG: No parsing results found for round $ROUND, using fallback values" | tee -a "$TEST_LOG_FILE"
PASSED=0
FAILED=0
ERRORS=1
TOTAL=1
RATIO=0
fi
else
echo "DEBUG: Test parsing timed out for round $ROUND, using fallback values" | tee -a "$TEST_LOG_FILE"
kill $PARSE_PID 2>/dev/null || true
PASSED=0
FAILED=0
ERRORS=1
TOTAL=1
RATIO=0
fi
echo "DEBUG: Pass ratio evaluation - passed=$PASSED failed=$FAILED errors=$ERRORS total=$TOTAL ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
echo "Post-LLM summary: passed=$PASSED failed=$FAILED errors=$ERRORS ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
if [ "$TOTAL" -gt 0 ] && [ "$RATIO" -ge 50 ]; then
echo "DEBUG: Pass ratio is above threshold (${RATIO}% >= 50%), breaking loop" | tee -a "$TEST_LOG_FILE"
break
fi
if [ "$ROUND" -ge "$SETUP_MAX_ROUNDS" ]; then
echo "DEBUG: Max rounds reached ($ROUND >= $SETUP_MAX_ROUNDS), exiting" | tee -a "$TEST_LOG_FILE"
echo "Tests still below threshold after $ROUND additional rounds. Skipping optimization." | tee -a "$TEST_LOG_FILE"
exit 4
fi
ROUND=$((ROUND + 1))
echo "DEBUG: Starting round $ROUND..." | tee -a "$TEST_LOG_FILE"
_stage "claude_round_start" "\"round\":$ROUND"
echo "Starting additional Claude Code CLI setup round $ROUND..." | tee -a "$TEST_LOG_FILE"
# Create focused prompt for additional round
cat > /tmp/claude_setup_round_${ROUND}.md << EOF
# Repository Setup Assistant - Round $ROUND
You are fixing repository setup issues. Previous attempts have been made but tests are still failing.
## Current Situation
- This is setup round $ROUND of maximum $SETUP_MAX_ROUNDS
- Previous rounds have attempted to fix dependencies and setup issues
- Tests are still below 50% pass rate
## Your Focus This Round
1. **Analyze recent test failures** - look at the latest test output for new clues
2. **Try different approaches** - if pip installs didn't work, try other methods
3. **Check for version conflicts** - some packages might need specific versions
4. **Look for missing system dependencies** - some Python packages need system libs
5. **Consider alternative test commands** - the project might use a different test runner
6. **Use advanced troubleshooting** - check logs, try different Python versions, modify config files
7. **Leverage all available tools** - FileManager to explore, WebSearch for solutions, Edit to fix files
## Available Information
- Repository: ${GITHUB_REPO_URL:-unknown}
- Tests directory: ${TESTS_ROOT_VALUE:-test}
- Test command: ${PYTEST_CMD_VALUE:-pytest}
- Round: $ROUND/$SETUP_MAX_ROUNDS
## Strategies to Try
- **Explore with FileManager**: Check setup.py, pyproject.toml, requirements files, tox.ini
- **WebSearch for solutions**: Look up specific error messages, package installation guides
- **Edit configuration files**: Modify setup files, fix import paths, update dependencies
- **Try alternative installation methods**: conda, mamba, system packages, different pip flags
- **Install development/test extras**: `pip install -e .[dev,test]`, `pip install -e .[all]`
- **Check for version conflicts**: Use specific package versions, upgrade/downgrade packages
- **System-level fixes**: Install missing system libraries, fix permissions, environment variables
- **Test isolation**: Run individual test files to identify specific failing components
## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions
## Recent Test Errors (Summary)
$(sed -n '1,400p' "$TEST_LOG_FILE" | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30)
Focus on getting tests to run successfully, even if not all pass.
EOF
# Run Claude Code CLI for additional round
CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-round-${ROUND}-$(date -u +%Y-%m-%dT%H-%M-%S).log"
touch "$CLAUDE_LOG" 2>/dev/null || true
chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
if [ -n "$CLAUDE_CMD" ]; then
echo "=== CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
cat /tmp/claude_setup_round_${ROUND}.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "=== END CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"
# Build Claude Code CLI command for additional rounds
CLAUDE_ROUND_BASE_ARGS=""
# Add model if specified
if [ -n "${ANTHROPIC_MODEL:-}" ]; then
CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --model ${ANTHROPIC_MODEL}"
fi
# Add directory if it exists
if [ -d "$REPO_DIR" ]; then
CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --add-dir $REPO_DIR"
fi
# Set up different flag combinations for additional rounds
CLAUDE_FLAGS_ROUND_PERM="$CLAUDE_ROUND_BASE_ARGS --print --output-format text --max-turns 50 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit,FileManager,WebSearch"
CLAUDE_FLAGS_ROUND_MIN="$CLAUDE_ROUND_BASE_ARGS --print --output-format text --max-turns 50"
CLAUDE_FLAGS_ROUND_BASIC="$CLAUDE_ROUND_BASE_ARGS --print --output-format text"
echo "Executing round $ROUND: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md" | sed 's/ */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
set +e # Don't exit on failure
set -o pipefail
# Try with full permissions first
echo "Attempting Claude CLI round $ROUND with elevated permissions..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_ROUND_EXIT_CODE=1
# First attempt: Try with full permissions
if [ -d "$REPO_DIR" ]; then
echo "Executing Claude CLI round $ROUND in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
echo "Executing Claude CLI round $ROUND in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
echo "Claude CLI round $ROUND attempt 1 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Second attempt: Try with fewer flags if first attempt failed
if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI round $ROUND with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
echo "Claude CLI round $ROUND attempt 2 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Third attempt: Try with minimal flags if second attempt failed
if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI round $ROUND with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
echo "Claude CLI round $ROUND attempt 3 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
_stage "claude_round_end" "\"round\":$ROUND,\"rc\":$CLAUDE_ROUND_EXIT_CODE"
set -e
if [ $CLAUDE_ROUND_EXIT_CODE -eq 0 ]; then
echo "✅ Claude Code CLI round $ROUND finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Claude CLI round $ROUND completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
elif [ $CLAUDE_ROUND_EXIT_CODE -eq 124 ]; then
echo "⏰ Claude Code CLI round $ROUND timed out after 1800 seconds (30 minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This is normal for complex setup tasks; continuing with tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "❌ Claude Code CLI round $ROUND failed with exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Possible causes for round $ROUND:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Continuing with tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "Claude Code CLI not available in round $ROUND; skipping" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Clean up round prompt file
rm -f /tmp/claude_setup_round_${ROUND}.md
_stage "round_tests_start" "\"round\":$ROUND"
echo "Re-running full tests (round $ROUND)..." | tee -a "$TEST_LOG_FILE"
set +e
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
else
eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
fi
else
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
else
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
_stage "round_tests_end" "\"round\":$ROUND,\"rc\":$TEST_RC"
# Fallback if run_tests.py argparse error persists
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
else
pytest -q | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
fi
set -e
# Persist exit code on each round
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
done
fi
fi
# Handle custom optimization modes
if [ "$CUSTOM_RUN_MODE" = true ]; then
echo "=== Executing Custom Optimization Mode ==="
_stage "custom_optimization_start" "\"mode\":\"$OPTIMIZATION_MODE\""
# Log custom optimization parameters
echo "Custom optimization parameters:"
echo " Mode: $OPTIMIZATION_MODE"
echo " Module root: ${CUSTOM_MODULE_ROOT:-'default'}"
echo " Tests root: ${CUSTOM_TESTS_ROOT:-'default'}"
echo " Async mode: ${CUSTOM_ASYNC_MODE:-'default'}"
echo " Verbose: ${CUSTOM_VERBOSE:-'default'}"
echo " No PR: ${CUSTOM_NO_PR:-'default'}"
case "$OPTIMIZATION_MODE" in
"single_function")
echo "Running single function optimization..."
echo "Target file: ${CUSTOM_FILE_PATH:-'not specified'}"
echo "Target function: ${CUSTOM_FUNCTION_NAME:-'not specified'}"
if [ -z "${CUSTOM_FILE_PATH:-}" ] || [ -z "${CUSTOM_FUNCTION_NAME:-}" ]; then
echo "Error: file_path and function_name are required for single function optimization" >&2
_stage "custom_optimization_error" "\"error\":\"missing_target_parameters\""
exit 1
fi
if [ ! -f "${CUSTOM_FILE_PATH}" ]; then
echo "Error: Target file not found: ${CUSTOM_FILE_PATH}" >&2
_stage "custom_optimization_error" "\"error\":\"target_file_not_found\",\"file\":\"${CUSTOM_FILE_PATH}\""
exit 3
fi
# Build codeflash command
CODEFLASH_CMD="codeflash --file \"${CUSTOM_FILE_PATH}\" --function \"${CUSTOM_FUNCTION_NAME}\""
_stage "custom_optimization_command_built" "\"mode\":\"single_function\",\"file\":\"${CUSTOM_FILE_PATH}\",\"function\":\"${CUSTOM_FUNCTION_NAME}\""
# Add flags
if [ "${CUSTOM_VERBOSE:-true}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --verbose"
fi
if [ "${CUSTOM_ASYNC_MODE:-true}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --async"
fi
if [ "${CUSTOM_NO_PR:-false}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --no-pr"
fi
echo "Executing: $CODEFLASH_CMD"
eval "$CODEFLASH_CMD"
;;
"trace_and_optimize")
echo "Running trace and optimize workflow..."
echo "Target script: ${CUSTOM_SCRIPT_PATH:-'not specified'}"
echo "Trace file: ${CUSTOM_TRACE_FILE:-'default'}"
echo "Tracer timeout: ${CUSTOM_TRACER_TIMEOUT:-'default'}"
echo "Trace only: ${CUSTOM_TRACE_ONLY:-'default'}"
if [ -z "${CUSTOM_SCRIPT_PATH:-}" ]; then
echo "Error: script_path is required for trace and optimize" >&2
_stage "custom_optimization_error" "\"error\":\"missing_script_path\""
exit 1
fi
if [ ! -f "${CUSTOM_SCRIPT_PATH}" ]; then
echo "Error: Target script not found: ${CUSTOM_SCRIPT_PATH}" >&2
_stage "custom_optimization_error" "\"error\":\"target_script_not_found\",\"script\":\"${CUSTOM_SCRIPT_PATH}\""
exit 3
fi
# Build codeflash optimize command
CODEFLASH_CMD="codeflash optimize \"${CUSTOM_SCRIPT_PATH}\""
_stage "custom_optimization_command_built" "\"mode\":\"trace_and_optimize\",\"script\":\"${CUSTOM_SCRIPT_PATH}\""
# Add trace file if specified
if [ -n "${CUSTOM_TRACE_FILE:-}" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} -o \"${CUSTOM_TRACE_FILE}\""
fi
# Add tracer timeout if specified
if [ -n "${CUSTOM_TRACER_TIMEOUT:-}" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --tracer-timeout ${CUSTOM_TRACER_TIMEOUT}"
fi
# Add trace-only flag if specified
if [ "${CUSTOM_TRACE_ONLY:-false}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --trace-only"
fi
echo "Executing: $CODEFLASH_CMD"
_stage "custom_optimization_executing" "\"mode\":\"single_function\",\"command\":\"$CODEFLASH_CMD\""
eval "$CODEFLASH_CMD"
CODEFLASH_RC=$?
_stage "custom_optimization_executed" "\"mode\":\"single_function\",\"rc\":$CODEFLASH_RC"
;;
"trace_and_optimize")
echo "Executing: $CODEFLASH_CMD"
_stage "custom_optimization_executing" "\"mode\":\"trace_and_optimize\",\"command\":\"$CODEFLASH_CMD\""
eval "$CODEFLASH_CMD"
CODEFLASH_RC=$?
_stage "custom_optimization_executed" "\"mode\":\"trace_and_optimize\",\"rc\":$CODEFLASH_RC"
;;
"optimize_all")
echo "Running optimize all codebase..."
echo "Target directory: ${CUSTOM_TARGET_DIRECTORY:-'entire codebase'}"
echo "Benchmark mode: ${CUSTOM_BENCHMARK:-'default'}"
echo "Async mode: ${CUSTOM_ASYNC_MODE:-'default'}"
echo "Verbose: ${CUSTOM_VERBOSE:-'default'}"
# Build codeflash --all command
CODEFLASH_CMD="codeflash --all"
_stage "custom_optimization_command_built" "\"mode\":\"optimize_all\",\"directory\":\"${CUSTOM_TARGET_DIRECTORY:-'entire'}\""
# Add target directory if specified
if [ -n "${CUSTOM_TARGET_DIRECTORY:-}" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} \"${CUSTOM_TARGET_DIRECTORY}\""
fi
# Add benchmark flag if specified
if [ "${CUSTOM_BENCHMARK:-false}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --benchmark"
fi
# Add async flag if specified
if [ "${CUSTOM_ASYNC_MODE:-true}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --async"
fi
# Add verbose flag if specified
if [ "${CUSTOM_VERBOSE:-true}" = "true" ]; then
CODEFLASH_CMD="${CODEFLASH_CMD} --verbose"
fi
echo "Executing: $CODEFLASH_CMD"
_stage "custom_optimization_executing" "\"mode\":\"$OPTIMIZATION_MODE\",\"command\":\"$CODEFLASH_CMD\""
eval "$CODEFLASH_CMD"
CODEFLASH_RC=$?
_stage "custom_optimization_executed" "\"mode\":\"$OPTIMIZATION_MODE\",\"rc\":$CODEFLASH_RC"
;;
*)
echo "Error: Unknown optimization mode: $OPTIMIZATION_MODE" >&2
echo "Valid modes: single_function, trace_and_optimize, optimize_all" >&2
_stage "custom_optimization_error" "\"error\":\"unknown_mode\",\"mode\":\"$OPTIMIZATION_MODE\""
exit 1
;;
esac
_stage "custom_optimization_end" "\"mode\":\"$OPTIMIZATION_MODE\""
echo "=== Custom Optimization Completed ==="
else
# Original optimization logic for non-custom runs
if [ -z "${CF_TARGET_FILE:-}" ]; then
if [ -d "${TESTS_ROOT_VALUE}" ]; then
echo "Trace-first: ${TRACE_CMD} ${TESTS_ROOT_VALUE}/"
# Ensure pytest-cov if coverage flags present
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
pip install pytest-cov || true
fi
set +e
# If TRACE_CMD is pytest, pass the tests-root as args so tracer gets a non-empty split
if [[ "${TRACE_CMD}" == pytest* ]]; then
codeflash optimize --trace-only -m pytest -- "${TESTS_ROOT_VALUE}/" --async || true
else
codeflash optimize --trace-only -m "${TRACE_CMD}" --async || true
fi
set -e
else
echo "Skipping trace: tests root not found."
fi
fi
if [ -n "${CF_TARGET_FILE:-}" ]; then
echo "Running Codeflash single-file: ${CF_TARGET_FILE} ${CF_TARGET_FUNCTION:-}"
if [ ! -f "${CF_TARGET_FILE}" ]; then
echo "Target file not found: ${CF_TARGET_FILE}" >&2
exit 3
fi
if [ -n "${CF_TARGET_FUNCTION:-}" ]; then
codeflash --file "${CF_TARGET_FILE}" --function "${CF_TARGET_FUNCTION}" --verbose --async
else
codeflash --file "${CF_TARGET_FILE}" --verbose --async
fi
else
echo "Running Codeflash --all with --async without staging-review flag..."
codeflash --all --async --verbose
fi
fi
# If we reach here normally, ensure EXIT_FILE reflects last known code (0 if unset)
echo "DEBUG: About to check final EXIT_FILE status..." | tee -a "$TEST_LOG_FILE"
if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then
echo "DEBUG: EXIT_FILE is empty, setting to 0" | tee -a "$TEST_LOG_FILE"
echo "0" > "${EXIT_FILE}" 2>/dev/null || true
else
echo "DEBUG: EXIT_FILE is already set or not configured" | tee -a "$TEST_LOG_FILE"
fi
echo "DEBUG: About to finish optimization..." | tee -a "$TEST_LOG_FILE"
echo "--- Finished Codeflash Optimization ---"
# In skip/failure paths earlier we may exit non-zero; ensure EXIT_FILE set there as well
echo "DEBUG: About to exit with code 0..." | tee -a "$TEST_LOG_FILE"
exit 0