codeflash-internal/experiments/optimization-factory/scripts/run_optimization.sh
Sarthak Agarwal 0c16414301
optimization Pipeline (#1860)
Co-authored-by: saga4 <saga4@codeflashs-MacBook-Air.local>
2025-10-02 12:21:10 -07:00

1256 lines
59 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -e
set -u
# Enable pipefail when supported (works under bash; safely ignored under sh)
if (set -o 2>/dev/null | grep -q 'pipefail') 2>/dev/null; then
set -o pipefail
fi
echo "--- Starting Codeflash Optimization ---"
# Helper to record stage transitions for BE tracking
_stage() {
local name="$1"; shift || true
local extra="$*"
if [ -n "${STAGE_FILE:-}" ]; then
printf '{"ts":"%s","stage":"%s"%s}\n' "$(date -Is)" "$name" "${extra:+,$extra}" >> "$STAGE_FILE" 2>/dev/null || true
fi
}
_stage "start"
# Ensure we always record final exit and persist EXIT_FILE if not already set
trap '_rc=$?; _stage "runner_exit" "\"rc\":$_rc"; if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then echo "$_rc" > "${EXIT_FILE}" 2>/dev/null || true; fi; exit $_rc' EXIT
if [ -z "${GITHUB_TOKEN:-}" ]; then echo "GITHUB_TOKEN is required"; exit 1; fi
if [ -z "${CODEFLASH_API_KEY:-}" ]; then echo "CODEFLASH_API_KEY is required"; exit 1; fi
if [ -z "${GITHUB_REPO_URL:-}" ]; then echo "GITHUB_REPO_URL is required"; exit 1; fi
# Prefer LLM-provided overrides if present; fall back to CSV/env; then to auto
MODULE_ROOT_VALUE="${LLM_MODULE_ROOT:-${MODULE_ROOT:-auto}}"
TESTS_ROOT_VALUE="${LLM_TESTS_ROOT:-${TESTS_ROOT:-auto}}"
PYTEST_CMD_VALUE="${LLM_PYTEST_CMD:-${PYTEST_CMD:-pytest}}"
FORMATTER_CMDS_VALUE="${LLM_FORMATTER_CMDS:-${FORMATTER_CMDS:-[\"disabled\"]}}"
# Normalize pytest command: drop leading 'poetry run '
LOWER_PYTEST=$(echo "$PYTEST_CMD_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ "$LOWER_PYTEST" == poetry\ run* ]]; then
PYTEST_CMD_VALUE="$(echo "$PYTEST_CMD_VALUE" | sed 's/^poetry[[:space:]]\+run[[:space:]]\+//')"
fi
# Normalize formatter cmds to Codeflash-per-file style per docs
# See https://docs.codeflash.ai/configuration
FORMATTER_CMDS_NORM="$FORMATTER_CMDS_VALUE"
LOWER_FMT=$(echo "$FORMATTER_CMDS_VALUE" | tr '[:upper:]' '[:lower:]')
if [[ -z "$LOWER_FMT" || "$LOWER_FMT" == "[]" || "$LOWER_FMT" == "[\"disabled\"]" ]]; then
FORMATTER_CMDS_NORM='["disabled"]'
elif [[ "$LOWER_FMT" == *"ruff"* ]]; then
FORMATTER_CMDS_NORM='["ruff check --exit-zero --fix $file","ruff format $file"]'
elif [[ "$LOWER_FMT" == *"black"* ]]; then
FORMATTER_CMDS_NORM='["black $file"]'
fi
# Summary of analyzed/exported config (no secrets)
echo "=== Configuration Summary (analyzer + effective) ==="
echo "Repo URL: ${GITHUB_REPO_URL}"
echo "CSV/ENV defaults: MODULE_ROOT='${MODULE_ROOT:-}', TESTS_ROOT='${TESTS_ROOT:-}', PYTEST_CMD='${PYTEST_CMD:-}'"
echo "Analyzer: LLM_MODULE_ROOT='${LLM_MODULE_ROOT:-}', LLM_TESTS_ROOT='${LLM_TESTS_ROOT:-}', LLM_PYTEST_CMD='${LLM_PYTEST_CMD:-}'"
echo "Analyzer: LLM_FORMATTER_CMDS='${LLM_FORMATTER_CMDS:-}', LLM_PIP_PACKAGES='${LLM_PIP_PACKAGES:-}'"
echo "Derived: MODULE_ROOT_VALUE='${MODULE_ROOT_VALUE}', TESTS_ROOT_VALUE='${TESTS_ROOT_VALUE}', PYTEST_CMD_VALUE='${PYTEST_CMD_VALUE}'"
echo "Derived: FORMATTER_CMDS_NORM=${FORMATTER_CMDS_NORM}"
echo "=== End Configuration Summary ==="
# Derive test framework for Codeflash config from the test command
TEST_FRAMEWORK_VALUE="pytest"
LOWER_CMD=$(echo "${PYTEST_CMD_VALUE}" | tr '[:upper:]' '[:lower:]')
if [[ "${LOWER_CMD}" =~ (^|[[:space:]])pytest([[:space:]]|$) ]]; then
TEST_FRAMEWORK_VALUE="pytest"
elif [[ "${LOWER_CMD}" == *"unittest"* ]]; then
TEST_FRAMEWORK_VALUE="unittest"
elif [[ "${LOWER_CMD}" == *"nose"* ]] || [[ "${LOWER_CMD}" == *"nosetests"* ]]; then
TEST_FRAMEWORK_VALUE="nose"
fi
_stage "auth_gh_start"
echo "Authenticating gh..."
if gh auth status -h github.com >/dev/null 2>&1; then
echo "gh auth status OK"
else
echo "Using GITHUB_TOKEN from environment for gh commands"
fi
_stage "fork_repo_start"
echo "Forking repository if needed..."
gh repo fork "${GITHUB_REPO_URL}" --clone=false --remote=false || true
REPO_PATH=$(echo "${GITHUB_REPO_URL}" | sed 's#https://github.com/##')
FORK_OWNER=$(gh api user --jq .login)
FORK_REPO="${FORK_OWNER}/$(basename "${REPO_PATH}")"
_stage "clone_start" "\"repo\":\"${FORK_REPO}\""
echo "Cloning fork ${FORK_REPO}..."
# Working directory (must be writable by current user)
WORK_DIR="${WORK_DIR:-/home/ubuntu/work}"
rm -rf "$WORK_DIR" || true
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
# Retry clone with exponential backoff for GitHub service issues
for attempt in 1 2 3; do
echo "Clone attempt $attempt/3..."
if gh repo clone "${FORK_REPO}" repo; then
echo "Clone successful"
break
else
if [ $attempt -lt 3 ]; then
echo "Clone failed, retrying in $((attempt * 10)) seconds..."
sleep $((attempt * 10))
else
echo "Clone failed after 3 attempts, continuing with original repo..."
# Fallback to original repo if fork clone fails
gh repo clone "${GITHUB_REPO_URL}" repo || {
echo "Failed to clone both fork and original repo"
exit 1
}
fi
fi
done
cd repo
git remote add upstream "${GITHUB_REPO_URL}" || true
git fetch --all || true
if [ "${MODULE_ROOT_VALUE}" = "auto" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
echo "Detecting module/tests roots..."
PY_CMD=$(command -v python3 || command -v python || echo "")
if [ -z "$PY_CMD" ]; then echo "No Python interpreter found for detection"; else $PY_CMD /app/scripts/detect_roots.py > roots.json || true; fi
if [ -f roots.json ]; then
DETECTED_MODULE=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("module_root",""))' || echo "")
DETECTED_TESTS=$($PY_CMD -c 'import json;print(json.load(open("roots.json")).get("tests_root",""))' || echo "")
if [ "${MODULE_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_MODULE}" ]; then MODULE_ROOT_VALUE="${DETECTED_MODULE}"; fi
if [ "${TESTS_ROOT_VALUE}" = "auto" ] && [ -n "${DETECTED_TESTS}" ]; then TESTS_ROOT_VALUE="${DETECTED_TESTS}"; fi
fi
fi
if [ -z "${MODULE_ROOT_VALUE}" ] || [ "${MODULE_ROOT_VALUE}" = "auto" ]; then
echo "Failed to detect module-root; please set MODULE_ROOT env."; exit 2
fi
if [ -z "${TESTS_ROOT_VALUE}" ] || [ "${TESTS_ROOT_VALUE}" = "auto" ]; then
echo "No tests-root detected; tracing will be skipped."
fi
_stage "write_codeflash_config"
echo "Writing pyproject.toml..."
cat > pyproject.toml <<EOF
[tool.codeflash]
module-root = "${MODULE_ROOT_VALUE}"
tests-root = "${TESTS_ROOT_VALUE}"
test-framework = "${TEST_FRAMEWORK_VALUE}"
formatter-cmds = ${FORMATTER_CMDS_NORM}
disable-telemetry = false
EOF
# Also write a minimal parent pyproject for Sphinx (docs/conf.py may reference ../pyproject.toml)
if [ -d .. ]; then
echo "Writing parent pyproject.toml for docs..."
cat > ../pyproject.toml <<EOF
[project]
name = "autogenerated-project"
version = "0.0.0"
description = "Autogenerated to satisfy Sphinx config during CI"
authors = [{name = "Auto-generated", email = "noreply@example.com"}]
EOF
fi
if [ -n "${VENV_PATH:-}" ] && [ -d "${VENV_PATH}" ]; then
echo "Using pre-created venv at ${VENV_PATH}"
# shellcheck disable=SC1090
source "${VENV_PATH}/bin/activate"
else
_stage "venv_setup"
echo "Setting up Python venv..."
PY_CMD=$(command -v python3 || command -v python || echo "")
if [ -z "$PY_CMD" ]; then echo "No Python interpreter found"; exit 1; fi
"$PY_CMD" -m venv .venv
# shellcheck disable=SC1091
source .venv/bin/activate
pip install --upgrade pip >/dev/null 2>&1 || true
_stage "install_codeflash"
echo "Installing codeflash CLI..."
pip install --upgrade codeflash || pip install codeflash || true
fi
# Ensure 'python3' resolves to the venv interpreter (some venvs only expose 'python')
if ! command -v python3 >/dev/null 2>&1 && command -v python >/dev/null 2>&1; then
ln -sf "$(command -v python)" "$(dirname "$(command -v python)")/python3" || true
fi
# Make local repo importable first, then utils/ for helper modules like testutils
export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
if [ -d "$PWD/utils" ]; then
export PYTHONPATH="$PWD/utils:$PYTHONPATH"
fi
_stage "pre_test_setup"
# If coverage flags are present in test command, ensure pytest-cov is installed before any test run
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
echo "Detected coverage flags in test command; installing pytest-cov..."
pip install pytest-cov || true
fi
# If reruns flags are present in test command, ensure pytest-rerunfailures is installed
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --reruns"; then
echo "Detected reruns flags in test command; installing pytest-rerunfailures..."
pip install pytest-rerunfailures || true
fi
# Run install commands in the project directory (inside venv)
if [ -n "${PRE_INSTALL_CMDS:-}" ]; then
echo "Running pre-install commands: ${PRE_INSTALL_CMDS}"
bash -lc "${PRE_INSTALL_CMDS}" || echo "Pre-install commands failed, continuing..."
fi
_stage "project_install_start"
if [ -n "${INSTALL_CMDS:-}" ]; then
echo "Running install commands: ${INSTALL_CMDS}"
if bash -lc "${INSTALL_CMDS}"; then
echo "Install commands completed successfully"
else
echo "Install commands failed (exit code: $?), continuing..."
# For repositories with custom install scripts that may fail due to
# non-standard configurations, we continue and rely on pip install fallbacks
fi
_stage "project_install_end"
fi
if [ -n "${POST_INSTALL_CMDS:-}" ]; then
echo "Running post-install commands: ${POST_INSTALL_CMDS}"
bash -lc "${POST_INSTALL_CMDS}" || echo "Post-install commands failed, continuing..."
fi
# Normalize test command for use in two contexts:
# 1) Execution (must use venv's Python)
# 2) Codeflash tracing with -m (must be a Python module, not 'python3 <script>')
PYTEST_CMD_RUN="${PYTEST_CMD_VALUE}"
if [[ "${PYTEST_CMD_RUN}" == python3\ * ]]; then PYTEST_CMD_RUN="python ${PYTEST_CMD_RUN#python3 }"; fi
# Debug: Show the original and normalized test commands
echo "Debug: Original PYTEST_CMD_VALUE: '${PYTEST_CMD_VALUE}'"
echo "Debug: Normalized PYTEST_CMD_RUN: '${PYTEST_CMD_RUN}'"
echo "Debug: TESTS_ROOT_VALUE: '${TESTS_ROOT_VALUE}'"
# Helper: detect if command looks like invoking pytest directly
_is_pytest_runner() {
case "$1" in
pytest\ *|pytest) return 0 ;;
python\ -m\ pytest*) return 0 ;;
python3\ -m\ pytest*) return 0 ;;
py.test\ *|py.test) return 0 ;;
*) return 1 ;;
esac
}
TRACE_CMD="${PYTEST_CMD_VALUE}"
# Convert interpreter-prefix forms to module forms
if [[ "${TRACE_CMD}" == python3\ -m\ * ]]; then
TRACE_CMD="${TRACE_CMD#python3 -m }"
# For pytest commands, extract just the pytest part and handle args separately
if [[ "${TRACE_CMD}" == pytest\ * ]]; then
TRACE_CMD="pytest"
fi
fi
if [[ "${TRACE_CMD}" == python\ -m\ * ]]; then
TRACE_CMD="${TRACE_CMD#python -m }"
# For pytest commands, extract just the pytest part and handle args separately
if [[ "${TRACE_CMD}" == pytest\ * ]]; then
TRACE_CMD="pytest"
fi
fi
if [[ "${TRACE_CMD}" == python3\ ./*.py* ]]; then
SCRIPT_PATH="${TRACE_CMD#python3 }"
SCRIPT_FILE="${SCRIPT_PATH%% *}"
REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
MOD="${SCRIPT_FILE#./}"
MOD="${MOD%.py}"
MOD="${MOD//\//.}"
TRACE_CMD="${MOD}${REST}"
elif [[ "${TRACE_CMD}" == python\ ./*.py* ]]; then
SCRIPT_PATH="${TRACE_CMD#python }"
SCRIPT_FILE="${SCRIPT_PATH%% *}"
REST="${SCRIPT_PATH#${SCRIPT_FILE}}"
MOD="${SCRIPT_FILE#./}"
MOD="${MOD%.py}"
MOD="${MOD//\//.}"
TRACE_CMD="${MOD}${REST}"
fi
# Debug: Show the trace command after processing
echo "Debug: TRACE_CMD for codeflash: '${TRACE_CMD}'"
echo "Installing project dependencies (best-effort)..."
# 1) Install repo requirements first to pin base versions
if [ -f requirements.txt ]; then pip install -r requirements.txt || true; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt || true; fi
if [ -d requirements ]; then
for f in requirements/*.txt; do
[ -f "$f" ] && pip install -r "$f" || true
done
fi
# 2) Only attempt editable install if packaging metadata likely exists
if [ -f pyproject.toml ] || [ -f setup.py ] || [ -f setup.cfg ]; then
EDITABLE_OK=0
if [ -f pyproject.toml ] && grep -qiE "^\s*\[tool\.poetry\]|^\s*\[project\]" pyproject.toml; then
EDITABLE_OK=1
fi
if [ -f setup.py ]; then
EDITABLE_OK=1
fi
if [ -f setup.cfg ] && grep -qiE "^\s*packages\s*=|^\s*package_dir\s*=|^\s*install_requires\s*=" setup.cfg; then
EDITABLE_OK=1
fi
if [ "$EDITABLE_OK" -eq 1 ]; then
echo "Attempting editable install (pip install -e .)..."
if pip install -e .; then
for extra in dev test tests ci all; do
pip install -e ".[${extra}]" || true
done
else
echo "Editable install failed; skipping editable extras and continuing without -e ."
fi
else
echo "Packaging metadata not sufficient; skipping editable install."
fi
fi
# 3) Freeze constraints and then install LLM-specified packages under constraints
if [ -n "${LLM_PIP_PACKAGES:-}" ] && [ "${LLM_PIP_PACKAGES}" != "[]" ]; then
echo "Freezing constraints before LLM package install..."
pip freeze > .cf_constraints.txt || true
echo "Installing LLM-suggested Python packages under constraints: ${LLM_PIP_PACKAGES}"
python - <<'PY'
import os, json, subprocess, sys
pkgs = []
try:
raw = os.environ.get('LLM_PIP_PACKAGES','[]')
pkgs = json.loads(raw)
if not isinstance(pkgs, list):
pkgs = []
except Exception:
pkgs = []
specs = []
for p in pkgs:
if isinstance(p, str) and p.strip():
specs.append(p.strip())
elif isinstance(p, dict) and p.get('name'):
name = str(p['name']).strip()
spec = str(p.get('version_spec') or '').strip()
if name:
specs.append(name + (spec if spec else ''))
if specs:
cmd = [sys.executable, '-m', 'pip', 'install', '--disable-pip-version-check', '-c', '.cf_constraints.txt'] + specs
try:
subprocess.run(cmd, check=False)
except Exception:
pass
PY
fi
# Ensure formatters/tools exist if referenced (check both normalized and original values)
if [[ "$FORMATTER_CMDS_NORM" == *"black "* ]] || [[ "${LOWER_FMT}" == *"black"* ]]; then
pip install black || true
fi
if [[ "$FORMATTER_CMDS_NORM" == *"ruff "* ]] || [[ "${LOWER_FMT}" == *"ruff"* ]]; then
pip install ruff || true
fi
# Install anthropic if key is present to enable Claude Code CLI
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
pip install --upgrade anthropic || true
fi
python -c "import pytest" 2>/dev/null || pip install pytest || true
# Optional: preflight test run to detect missing modules
if [ -d "${TESTS_ROOT_VALUE}" ]; then
echo "Preflight test run to detect missing modules..."
set +e
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight.out 2>&1
else
eval "${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/" >/tmp/preflight.out 2>&1
fi
else
# Non-pytest runner; avoid appending tests path that may be unsupported
eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight.out 2>&1
fi
else
eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight.out 2>&1
fi
PRE_RC=$?
set -e
# Detect and register unknown pytest marks to avoid collection errors under -Werror
# Why this exists:
# - Some repositories use custom pytest markers (e.g., `@pytest.mark.download`, `@pytest.mark.slow`)
# but forget to register them in their config (pyproject.toml/setup.cfg/pytest.ini).
# - With `-Werror` or strict settings, pytest turns the UnknownMark warning into an error during
# collection, causing the test run to fail before even starting.
#
# What we do:
# 1) We parse the preflight test output (`/tmp/preflight.out`) for lines like:
# "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
# 2) We extract the marker name (e.g., `download`) using `sed` with a capturing group, then de-duplicate
# with `sort -u`.
# 3) If any unknown markers are found, we append a minimal `conftest.py` shim at repo root that registers
# each discovered marker via `config.addinivalue_line("markers", ...)`. This is the official mechanism
# to declare custom markers so pytest accepts them.
#
# Example:
# If preflight output contains multiple instances of:
# "PytestUnknownMarkWarning: Unknown pytest.mark.download ..."
# then MARKS will contain `download`, and this block will append a conftest.py snippet like:
# def pytest_configure(config):
# config.addinivalue_line("markers", "download: auto-registered marker")
# After that, subsequent pytest runs will collect tests without failing on the unknown mark.
#
# Notes:
# - We only add to conftest.py; we do NOT overwrite existing content, keeping it non-destructive.
# - If no unknown markers are detected, nothing is changed.
# - This does not alter test behavior; it simply declares markers so pytest wont error on them.
if [ -s /tmp/preflight.out ]; then
MARKS=$(sed -n "s/.*Unknown pytest\.mark\.\([A-Za-z0-9_][A-Za-z0-9_]*\).*/\1/p" /tmp/preflight.out | sort -u)
if [ -n "${MARKS}" ]; then
echo "Detected unknown pytest marks: ${MARKS}" | tee -a "$TEST_LOG_FILE"
echo "Auto-registering markers via conftest.py shim..." | tee -a "$TEST_LOG_FILE"
(
echo "# Auto-added by optimizer to register pytest markers"
echo "def pytest_configure(config):"
# For each discovered unknown marker (e.g., download, slow, integration), write a declaration line.
# This is equivalent to having `markers = download: ...` in pytest.ini/pyproject.toml.
for m in ${MARKS}; do
echo " config.addinivalue_line(\"markers\", \"${m}: auto-registered marker\")"
done
) >> conftest.py
fi
fi
if [ $PRE_RC -ne 0 ]; then
echo "Analyzing missing module errors..."
MISSING=$(sed -n "s/.*ModuleNotFoundError: No module named '\([^']\+\)'.*/\1/p" /tmp/preflight.out | head -20)
if [ -z "$MISSING" ]; then
MISSING=$(sed -n "s/.*ImportError: No module named \([^ ]\+\).*/\1/p" /tmp/preflight.out | head -20)
fi
if [ -n "$MISSING" ]; then
echo "Attempting to install missing modules:"
echo "$MISSING" | while read -r mod; do
[ -z "$mod" ] && continue
pkg="$mod"
case "$pkg" in
PIL) pkg="Pillow";;
cv2) pkg="opencv-python";;
yaml) pkg="PyYAML";;
skimage) pkg="scikit-image";;
sklearn) pkg="scikit-learn";;
Crypto) pkg="pycryptodome";;
esac
echo " - pip install $pkg"
pip install "$pkg" || true
done
echo "Re-running preflight tests after installs..."
set +e
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
eval "${PYTEST_CMD_RUN} -q" >/tmp/preflight2.out 2>&1
else
eval "${PYTEST_CMD_RUN} -q ${TESTS_ROOT_VALUE}/" >/tmp/preflight2.out 2>&1
fi
set -e
fi
fi
fi
# Full tests before optimization (with detailed logging)
TEST_LOG_DIR="${TEST_LOG_DIR:-/home/ubuntu/app/logs}"
mkdir -p "$TEST_LOG_DIR"
TS2=$(date -Is | sed 's/[:+]/-/g')
TEST_LOG_FILE="$TEST_LOG_DIR/tests-$TS2.log"
touch "$TEST_LOG_FILE" && chmod 666 "$TEST_LOG_FILE"
ln -sfn "$TEST_LOG_FILE" "$TEST_LOG_DIR/tests.log" || true
_stage "pre_tests_start"
echo "Running pre-optimization tests: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
set +e
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
# Check if the command already includes the tests directory
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
# Command already includes tests directory, execute as-is
echo "Debug: Executing command as-is (already includes tests dir): ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
else
# Command doesn't include tests directory, append it
echo "Debug: Appending tests directory: ${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
fi
else
# Non-pytest runner, execute as-is
echo "Debug: Non-pytest runner, executing as-is: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
else
# No tests directory, execute as-is
echo "Debug: No tests directory, executing as-is: ${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
# Dynamic fallback for non-pytest runners emitting argparse errors
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
else
pytest -q | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
fi
set -e
echo "Pre-optimization tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
_stage "pre_tests_end" "\"rc\":$TEST_RC"
# Persist exit code early if wrapper provided EXIT_FILE
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
# If tests below threshold, run Claude Code CLI setup loop
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
echo "Evaluating test pass ratio for Claude Code CLI setup gate..." | tee -a "$TEST_LOG_FILE"
# Attempt to extract passed/failed/errors from log tail (robust to order)
PASSED=$(sed -n "s/.* \([0-9]\+\) passed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
FAILED=$(sed -n "s/.* \([0-9]\+\) failed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
ERRORS=$(sed -n "s/.* \([0-9]\+\) errors.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
PASSED=${PASSED:-0}
FAILED=${FAILED:-0}
ERRORS=${ERRORS:-0}
TOTAL=$((PASSED + FAILED + ERRORS))
RATIO=0
if [ "$TOTAL" -gt 0 ]; then
RATIO=$(( 100 * PASSED / TOTAL ))
fi
echo "Parsed test summary: passed=$PASSED failed=$FAILED errors=$ERRORS ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
if [ "$TOTAL" -eq 0 ] || [ "$RATIO" -lt 50 ]; then
echo "Tests below threshold; invoking Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE"
_stage "claude_round0_start"
# ============================================================================
# CLAUDE CODE CLI SETUP INTEGRATION
# ============================================================================
# This section replaces the previous Python-based LLM setup helper with
# Claude Code CLI, which provides a more robust and interactive approach to
# repository setup. Claude Code CLI can:
#
# 1. Analyze repository structure and dependencies
# 2. Install missing packages and fix import issues
# 3. Handle custom test runners and build systems
# 4. Iteratively debug and resolve setup problems
# 5. Work directly in the terminal with full context
#
# The integration includes:
# - Comprehensive setup prompts with project context
# - Automatic CLI installation (npm or pip fallback)
# - Timeout protection (30 min initial, 20 min additional rounds)
# - Detailed logging of all Claude actions
# - Graceful fallback if Claude Code CLI is unavailable
# ============================================================================
# Create comprehensive prompt for Claude Code CLI
cat > /tmp/claude_setup_prompt.md << 'EOF'
# Repository Setup Assistant
You are an expert Python developer tasked with setting up a repository for testing. Your goal is to analyze the repository, install missing dependencies, and ensure tests can run successfully.
## Your Mission
1. **Analyze the repository structure** to understand the project layout
2. **Identify and install missing dependencies** that are causing test failures
3. **Fix common setup issues** like import path problems, missing packages, or configuration issues
4. **Run tests** to verify the setup is working
5. **Achieve at least 50% test pass rate** (you don't need 100% - some tests may legitimately fail)
## Available Tools
- You have full access to the terminal in the repository directory
- Python virtual environment is already activated at `.venv/`
- Use `.venv/bin/python` and `.venv/bin/pip` for Python operations
- You can read any file in the repository
- You can install packages using pip
## Key Guidelines
- **Focus on missing dependencies**: Look for ImportError, ModuleNotFoundError in test outputs
- **Use project's own install scripts** when available (like `devscripts/install_deps.py`, `setup.py`, etc.)
- **Install from requirements files** if they exist (`requirements.txt`, `requirements-dev.txt`, etc.)
- **Check pyproject.toml** for project dependencies and optional dependencies
- **Handle custom test runners**: Some projects use custom test scripts instead of pytest
- **Fix import path issues**: Add PYTHONPATH exports if needed
- **Install test-specific dependencies**: pytest plugins, coverage tools, etc.
## Common Patterns to Handle
1. **Custom dependency installers**: `python devscripts/install_deps.py`, `pip install -e .`
2. **Test runners with special args**: Projects may have `run_tests.py` or similar
3. **Missing test dependencies**: pytest plugins, mock libraries, etc.
4. **Path issues**: Repository modules not in PYTHONPATH
5. **Optional dependencies**: Install extras like `pip install -e .[test]`
## Success Criteria
- Tests run without ImportError/ModuleNotFoundError
- At least 50% of tests pass (some failures are acceptable)
- No critical setup errors that prevent test execution
## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions
## Current Context
- Repository: {REPO_URL}
- Tests directory: {TESTS_ROOT}
- Test command: {PYTEST_CMD}
- Previous test output shows dependency/setup issues
## Recent Test Errors (Summary)
{TEST_ERRORS_SUMMARY}
Start by examining the repository structure and recent test failures, then systematically address the issues.
EOF
# Replace placeholders in prompt
sed -i "s|{REPO_URL}|${GITHUB_REPO_URL:-unknown}|g" /tmp/claude_setup_prompt.md
sed -i "s|{TESTS_ROOT}|${TESTS_ROOT_VALUE:-test}|g" /tmp/claude_setup_prompt.md
sed -i "s|{PYTEST_CMD}|${PYTEST_CMD_VALUE:-pytest}|g" /tmp/claude_setup_prompt.md
# Build a short error summary from the current test log (first 30 error lines)
TEST_ERRORS_SUMMARY=$(sed -n '1,400p' "$TEST_LOG_FILE" | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30 | sed 's/|/\|/g' | sed 's/\\/\\\\/g')
TEST_ERRORS_SUMMARY=${TEST_ERRORS_SUMMARY:-"No error summary available"}
# Escape newlines for sed replacement
TEST_ERRORS_SUMMARY=$(printf "%s" "$TEST_ERRORS_SUMMARY" | sed ':a;N;$!ba;s/\n/\\n/g')
sed -i "s|{TEST_ERRORS_SUMMARY}|${TEST_ERRORS_SUMMARY}|g" /tmp/claude_setup_prompt.md
# Snapshot current environment into constraints to avoid breaking pinned deps
pip freeze > .cf_constraints.txt || true
export PIP_CONSTRAINTS="$(pwd)/.cf_constraints.txt"
# If common system deps are missing based on errors, try lightweight installs (best-effort)
if grep -q "libGL.so.1" "$TEST_LOG_FILE" 2>/dev/null; then
echo "Detected missing libGL.so.1; installing headless OpenGL libs (libgl1, libglib2.0-0, libsm6, libxrender1, libxext6)..." | tee -a "$TEST_LOG_FILE"
sudo apt-get update -y >/dev/null 2>&1 || true
sudo apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 || true
# As a fallback in headless environments, prefer opencv-python-headless to avoid GUI backends
if pip show opencv-python >/dev/null 2>&1; then
echo "Installing opencv-python-headless as fallback for headless environment" | tee -a "$TEST_LOG_FILE"
pip install --upgrade opencv-python-headless || true
fi
fi
if grep -q "No module named 'tkinter'" "$TEST_LOG_FILE" 2>/dev/null; then
echo "Detected missing tkinter; installing Python Tk..." | tee -a "$TEST_LOG_FILE"
sudo apt-get update -y >/dev/null 2>&1 || true
sudo apt-get install -y --no-install-recommends python3-tk || true
fi
if grep -q "cannot import name 'Aer' from 'qiskit'" "$TEST_LOG_FILE" 2>/dev/null; then
echo "Detected missing qiskit-aer; installing..." | tee -a "$TEST_LOG_FILE"
pip install qiskit-aer || true
fi
# Run Claude Code CLI with the setup prompt
echo "Starting Claude Code CLI setup session..." | tee -a "$TEST_LOG_FILE"
_stage "claude_setup_init"
# Prepare a session log regardless of availability so FE always finds a file
CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-$(date -u +%Y-%m-%dT%H-%M-%S).log"
touch "$CLAUDE_LOG" 2>/dev/null || true
chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
echo "[claude] initializing setup session" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Ensure common local bin directory is in PATH (curl installer often writes here)
export PATH="$HOME/.local/bin:$PATH"
# Check if claude (Claude Code CLI) is available, or fallback to npx runner
CLAUDE_CMD=""
echo "Checking for Claude Code CLI availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Check for global claude command first
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found global claude CLI at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test if it's actually Claude Code CLI
if timeout 10 claude --version 2>&1 | grep -q "Claude Code"; then
echo "Confirmed: Global claude is Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Warning: Global claude may not be Claude Code CLI, will try npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_CMD=""
fi
fi
# Check for npx if claude not found or not confirmed
if [ -z "$CLAUDE_CMD" ] && command -v npx >/dev/null 2>&1; then
CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
echo "Will use npx runner for Claude Code CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test npx availability
if timeout 10 npx --version >/dev/null 2>&1; then
echo "npx is available and working" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_CMD=""
fi
fi
if [ -z "$CLAUDE_CMD" ]; then
echo "Claude Code CLI not found; attempting installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try npm global install first (with proper permissions)
if command -v npm >/dev/null 2>&1; then
echo "Installing @anthropic-ai/claude-code via npm..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try with sudo first (for system-wide install)
if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm install with sudo succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm install with sudo failed, trying user-level install..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try user-level install (no sudo)
if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm user-level install succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after user-level install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm user-level install also failed, will rely on npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
fi
# If still no claude, try installing Node.js
if [ -z "$CLAUDE_CMD" ] && command -v apt-get >/dev/null 2>&1; then
echo "Installing Node.js LTS to enable Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
sudo apt-get update -y >/dev/null 2>&1 || true
# Install Node.js repository
if command -v curl >/dev/null 2>&1; then
echo "Adding Node.js repository..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - 2>&1 | tee -a "$CLAUDE_LOG" || true
fi
# Install Node.js
echo "Installing Node.js..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if sudo apt-get install -y nodejs 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Node.js installation succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try npm install again with proper permission handling
if command -v npm >/dev/null 2>&1; then
echo "Retrying npm install after Node.js installation..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try with sudo first
if sudo npm install -g @anthropic-ai/claude-code 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm install with sudo succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after Node.js + npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm install with sudo failed after Node.js install, trying user-level..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Try user-level install
if npm install -g @anthropic-ai/claude-code --prefix ~/.local 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npm user-level install succeeded after Node.js install" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after Node.js + user-level npm install at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "npm user-level install also failed after Node.js install, will use npx fallback" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
fi
else
echo "Node.js installation failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
# Final check for available commands
export PATH="$HOME/.local/bin:$PATH"
if command -v claude >/dev/null 2>&1; then
CLAUDE_CMD="claude"
echo "Found claude CLI after installation at: $(which claude)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
elif command -v npx >/dev/null 2>&1; then
CLAUDE_CMD="npx -y @anthropic-ai/claude-code"
echo "Will use npx runner for Claude Code CLI after installation" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test npx availability with a simple command
echo "Testing npx availability..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 30 npx --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test if we can actually run the Claude Code CLI via npx
echo "Testing Claude Code CLI via npx..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 60 npx -y @anthropic-ai/claude-code --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude Code CLI via npx is working correctly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Warning: Claude Code CLI via npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This could be due to network issues or package availability" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "Warning: npx may not be working properly" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
fi
# Guard: if still unavailable, skip gracefully
if [ -z "$CLAUDE_CMD" ]; then
echo "❌ Claude Code CLI unavailable; skipping setup assistance" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "All installation attempts failed:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Global npm install failed (permission issues)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - User-level npm install failed" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - npx fallback not available" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Continuing without Claude Code CLI assistance..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
_stage "claude_round0_end" "\"rc\":127,\"unavailable\":true"
else
echo "✅ Claude Code CLI is available and ready to use" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Using command: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Ensure Claude Code CLI is authenticated (headless)
if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
echo "ANTHROPIC_API_KEY not set; Claude CLI may fail to authenticate" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Setting ANTHROPIC_API_KEY environment variable for Claude CLI..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
else
echo "ANTHROPIC_API_KEY is set; configuring Claude CLI authentication..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Best-effort non-interactive auth via config (ignore failures)
echo "Attempting to set API key via Claude CLI config..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 10 $CLAUDE_CMD config set api_key "${ANTHROPIC_API_KEY}" 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude CLI API key configuration succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Claude CLI API key configuration failed, will rely on environment variable" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Also set as environment variable as backup
export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY}"
fi
# Run Claude Code CLI with the setup prompt using print mode for automation
echo "Running Claude Code CLI setup session using: $CLAUDE_CMD" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "=== CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
cat /tmp/claude_setup_prompt.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "=== END CLAUDE SETUP PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"
# Build Claude Code CLI command with proper arguments
# Note: Claude Code CLI uses different argument structure than expected
CLAUDE_BASE_ARGS=""
# Add model if specified
if [ -n "${ANTHROPIC_MODEL:-}" ]; then
CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --model ${ANTHROPIC_MODEL}"
fi
# Add directory if it exists
if [ -d "$REPO_DIR" ]; then
CLAUDE_BASE_ARGS="$CLAUDE_BASE_ARGS --add-dir $REPO_DIR"
fi
# Set up different flag combinations for different CLI versions
CLAUDE_FLAGS_PERM="$CLAUDE_BASE_ARGS --print --max-turns 40 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit"
CLAUDE_FLAGS_MIN="$CLAUDE_BASE_ARGS --print --max-turns 40"
CLAUDE_FLAGS_BASIC="$CLAUDE_BASE_ARGS --print"
echo "Executing: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md" | sed 's/ */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Test Claude CLI is working with a simple command first
echo "Testing Claude CLI availability (version)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Handle npx differently as it may take longer to download and run
if [[ "$CLAUDE_CMD" == npx* ]]; then
echo "Testing npx-based Claude CLI (may take longer for first run)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if timeout 120 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude CLI via npx version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Claude CLI via npx version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This is common for npx on first run due to package download time" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
if timeout 30 $CLAUDE_CMD --version 2>&1 | tee -a "$CLAUDE_LOG"; then
echo "Claude CLI version check succeeded" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "Claude CLI version check failed; continuing anyway" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
# Use timeout to enforce a hard limit; wait synchronously so tests run after it finishes/timeout
set +e # Don't exit on failure
set -o pipefail
# Try with full permissions first (pipe prompt via stdin to avoid argument parsing issues)
echo "Attempting Claude CLI execution with elevated permissions (non-interactive)..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_EXIT_CODE=1
# First attempt: Try with full permissions
# Use longer timeout for npx as it may need to download packages
TIMEOUT_DURATION=2700
if [[ "$CLAUDE_CMD" == npx* ]]; then
TIMEOUT_DURATION=3600 # 60 minutes for npx (includes download time)
echo "Using extended timeout (60 min) for npx-based Claude CLI" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
if [ -d "$REPO_DIR" ]; then
echo "Executing Claude CLI in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
echo "Executing Claude CLI in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_PERM < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 1 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Second attempt: Try with fewer flags if first attempt failed
if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_MIN < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 2 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Third attempt: Try with minimal flags if second attempt failed
if [ $CLAUDE_EXIT_CODE -ne 0 ] && [ $CLAUDE_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout $TIMEOUT_DURATION $CLAUDE_CMD $CLAUDE_FLAGS_BASIC < /tmp/claude_setup_prompt.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_EXIT_CODE=$CLI_STATUS
echo "Claude CLI attempt 3 exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
_stage "claude_round0_end" "\"rc\":$CLAUDE_EXIT_CODE"
set -e
trap - ERR
if [ $CLAUDE_EXIT_CODE -eq 0 ]; then
echo "✅ Claude Code CLI session finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Claude CLI completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
elif [ $CLAUDE_EXIT_CODE -eq 124 ]; then
TIMEOUT_MINUTES=$((TIMEOUT_DURATION / 60))
echo "⏰ Claude Code CLI session timed out after $TIMEOUT_DURATION seconds ($TIMEOUT_MINUTES minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This is normal for complex setup tasks; continuing with post-CLAUDE tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "❌ Claude Code CLI session failed with exit code: $CLAUDE_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Possible causes:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Continuing with post-CLAUDE tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
fi
# Clean up prompt file
rm -f /tmp/claude_setup_prompt.md
_stage "post_llm_tests_start"
echo "Re-running full tests after Claude Code CLI setup..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
set +e
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
else
eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
fi
else
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
else
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
# Fallback if run_tests.py argparse error persists
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
else
pytest -q | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
fi
set -e
echo "Post-LLM tests exit code: $TEST_RC" | tee -a "$TEST_LOG_FILE"
_stage "post_llm_tests_end" "\"rc\":$TEST_RC"
# Persist exit code after post-LLM run
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
# If conftest import path mismatch detected, enable importlib mode for next runs
if grep -q "ImportPathMismatchError: ('.*conftest'" "$TEST_LOG_FILE"; then
echo "Detected conftest import path mismatch; enabling --import-mode=importlib for subsequent pytest runs" | tee -a "$TEST_LOG_FILE"
export PYTEST_ADDOPTS="--import-mode=importlib ${PYTEST_ADDOPTS:-}"
fi
# Re-evaluate pass ratio; if still below threshold, run additional setup rounds
SETUP_MAX_ROUNDS=${LLM_SETUP_MAX_ROUNDS:-2}
ROUND=0
while : ; do
PASSED=$(sed -n "s/.* \([0-9]\+\) passed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
FAILED=$(sed -n "s/.* \([0-9]\+\) failed.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
ERRORS=$(sed -n "s/.* \([0-9]\+\) errors.*/\1/p" "$TEST_LOG_FILE" | tail -n1)
PASSED=${PASSED:-0}
FAILED=${FAILED:-0}
ERRORS=${ERRORS:-0}
TOTAL=$((PASSED + FAILED + ERRORS))
RATIO=0
if [ "$TOTAL" -gt 0 ]; then
RATIO=$(( 100 * PASSED / TOTAL ))
fi
echo "Post-LLM summary: passed=$PASSED failed=$FAILED errors=$ERRORS ratio=${RATIO}%" | tee -a "$TEST_LOG_FILE"
if [ "$TOTAL" -gt 0 ] && [ "$RATIO" -ge 50 ]; then
break
fi
if [ "$ROUND" -ge "$SETUP_MAX_ROUNDS" ]; then
echo "Tests still below threshold after $ROUND additional rounds. Skipping optimization." | tee -a "$TEST_LOG_FILE"
exit 4
fi
ROUND=$((ROUND + 1))
_stage "claude_round_start" "\"round\":$ROUND"
echo "Starting additional Claude Code CLI setup round $ROUND..." | tee -a "$TEST_LOG_FILE"
# Create focused prompt for additional round
cat > /tmp/claude_setup_round_${ROUND}.md << EOF
# Repository Setup Assistant - Round $ROUND
You are continuing to fix repository setup issues. Previous attempts have been made but tests are still failing.
## Current Situation
- This is setup round $ROUND of maximum $SETUP_MAX_ROUNDS
- Previous rounds have attempted to fix dependencies and setup issues
- Tests are still below 50% pass rate
## Your Focus This Round
1. **Analyze recent test failures** - look at the latest test output for new clues
2. **Try different approaches** - if pip installs didn't work, try other methods
3. **Check for version conflicts** - some packages might need specific versions
4. **Look for missing system dependencies** - some Python packages need system libs
5. **Consider alternative test commands** - the project might use a different test runner
## Available Information
- Repository: ${GITHUB_REPO_URL:-unknown}
- Tests directory: ${TESTS_ROOT_VALUE:-test}
- Test command: ${PYTEST_CMD_VALUE:-pytest}
- Round: $ROUND/$SETUP_MAX_ROUNDS
## Strategies to Try
- Check if there are alternative dependency installation methods
- Look for version pinning in setup files
- Try installing development/test extras: \`pip install -e .[dev,test]\`
- Check for conda/mamba environment files
- Look for Docker setup if available
- Try running individual test files to isolate issues
## Non-Interactive Mode
- Do not ask questions or request confirmations
- Do not prompt the user; instead, choose the most reasonable next action and execute it
- Prefer concrete commands (pip/system installs, edits) over suggestions
## Recent Test Errors (Summary)
$(sed -n '1,400p' "$TEST_LOG_FILE" | grep -E "(ImportError|ModuleNotFoundError|BadConfigError|FileNotFoundError|ERROR collecting)" | head -n 30)
Focus on getting tests to run successfully, even if not all pass.
EOF
# Run Claude Code CLI for additional round
CLAUDE_LOG="/home/ubuntu/app/logs/claude-setup-round-${ROUND}-$(date -u +%Y-%m-%dT%H-%M-%S).log"
touch "$CLAUDE_LOG" 2>/dev/null || true
chmod 666 "$CLAUDE_LOG" 2>/dev/null || true
if [ -n "$CLAUDE_CMD" ]; then
echo "=== CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
cat /tmp/claude_setup_round_${ROUND}.md | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "=== END CLAUDE SETUP ROUND $ROUND PROMPT ===" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
REPO_DIR="${WORK_DIR:-/home/ubuntu/work}/repo"
# Build Claude Code CLI command for additional rounds
CLAUDE_ROUND_BASE_ARGS=""
# Add model if specified
if [ -n "${ANTHROPIC_MODEL:-}" ]; then
CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --model ${ANTHROPIC_MODEL}"
fi
# Add directory if it exists
if [ -d "$REPO_DIR" ]; then
CLAUDE_ROUND_BASE_ARGS="$CLAUDE_ROUND_BASE_ARGS --add-dir $REPO_DIR"
fi
# Set up different flag combinations for additional rounds
CLAUDE_FLAGS_ROUND_PERM="$CLAUDE_ROUND_BASE_ARGS --print --max-turns 25 --dangerously-skip-permissions --permission-mode bypassPermissions --allowed-tools Bash,Edit"
CLAUDE_FLAGS_ROUND_MIN="$CLAUDE_ROUND_BASE_ARGS --print --max-turns 25"
CLAUDE_FLAGS_ROUND_BASIC="$CLAUDE_ROUND_BASE_ARGS --print"
echo "Executing round $ROUND: (cd $REPO_DIR) $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md" | sed 's/ */ /g' | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
set +e # Don't exit on failure
set -o pipefail
# Try with full permissions first
echo "Attempting Claude CLI round $ROUND with elevated permissions..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLAUDE_ROUND_EXIT_CODE=1
# First attempt: Try with full permissions
if [ -d "$REPO_DIR" ]; then
echo "Executing Claude CLI round $ROUND in repository directory: $REPO_DIR" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
echo "Executing Claude CLI round $ROUND in current directory" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_PERM < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
echo "Claude CLI round $ROUND attempt 1 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
# Second attempt: Try with fewer flags if first attempt failed
if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI round $ROUND with basic flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_MIN < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
echo "Claude CLI round $ROUND attempt 2 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Third attempt: Try with minimal flags if second attempt failed
if [ $CLAUDE_ROUND_EXIT_CODE -ne 0 ] && [ $CLAUDE_ROUND_EXIT_CODE -ne 124 ]; then
echo "Retrying Claude CLI round $ROUND with minimal flags..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
if [ -d "$REPO_DIR" ]; then
( cd "$REPO_DIR" && timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md ) 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
else
timeout 1800 $CLAUDE_CMD $CLAUDE_FLAGS_ROUND_BASIC < /tmp/claude_setup_round_${ROUND}.md 2>&1 | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
CLI_STATUS=${PIPESTATUS[0]}
fi
CLAUDE_ROUND_EXIT_CODE=$CLI_STATUS
echo "Claude CLI round $ROUND attempt 3 exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
_stage "claude_round_end" "\"round\":$ROUND,\"rc\":$CLAUDE_ROUND_EXIT_CODE"
set -e
if [ $CLAUDE_ROUND_EXIT_CODE -eq 0 ]; then
echo "✅ Claude Code CLI round $ROUND finished successfully" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Claude CLI round $ROUND completed setup tasks without errors" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
elif [ $CLAUDE_ROUND_EXIT_CODE -eq 124 ]; then
echo "⏰ Claude Code CLI round $ROUND timed out after 1800 seconds (30 minutes)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "This is normal for complex setup tasks; continuing with tests" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
else
echo "❌ Claude Code CLI round $ROUND failed with exit code: $CLAUDE_ROUND_EXIT_CODE" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Possible causes for round $ROUND:" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Authentication issues (check ANTHROPIC_API_KEY)" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Network connectivity problems" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Unsupported command line arguments" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo " - Claude CLI version compatibility issues" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
echo "Continuing with tests to see if any improvements were made..." | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
else
echo "Claude Code CLI not available in round $ROUND; skipping" | tee -a "$TEST_LOG_FILE" "$CLAUDE_LOG"
fi
# Clean up round prompt file
rm -f /tmp/claude_setup_round_${ROUND}.md
_stage "round_tests_start" "\"round\":$ROUND"
echo "Re-running full tests (round $ROUND)..." | tee -a "$TEST_LOG_FILE"
set +e
if [ -d "${TESTS_ROOT_VALUE}" ]; then
if _is_pytest_runner "${PYTEST_CMD_RUN}"; then
if echo " ${PYTEST_CMD_RUN} " | grep -q " ${TESTS_ROOT_VALUE}\(/\| \|$\)"; then
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
else
eval "${PYTEST_CMD_RUN} ${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
fi
else
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
else
eval "${PYTEST_CMD_RUN}" | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
_stage "round_tests_end" "\"round\":$ROUND,\"rc\":$TEST_RC"
# Fallback if run_tests.py argparse error persists
if grep -q "run_tests\.py: error: unrecognized arguments" "$TEST_LOG_FILE"; then
echo "Detected run_tests.py argparse error; falling back to pytest runner" | tee -a "$TEST_LOG_FILE"
if [ -d "${TESTS_ROOT_VALUE}" ]; then
pytest -q "${TESTS_ROOT_VALUE}/" | tee -a "$TEST_LOG_FILE"
else
pytest -q | tee -a "$TEST_LOG_FILE"
fi
TEST_RC=${PIPESTATUS[0]}
fi
set -e
# Persist exit code on each round
if [ -n "${EXIT_FILE:-}" ]; then echo "$TEST_RC" > "$EXIT_FILE" 2>/dev/null || true; fi
done
fi
fi
if [ -z "${CF_TARGET_FILE:-}" ]; then
if [ -d "${TESTS_ROOT_VALUE}" ]; then
echo "Trace-first: ${TRACE_CMD} ${TESTS_ROOT_VALUE}/"
# Ensure pytest-cov if coverage flags present
if echo " ${PYTEST_CMD_VALUE} " | grep -q " --cov"; then
pip install pytest-cov || true
fi
set +e
# If TRACE_CMD is pytest, pass the tests-root as args so tracer gets a non-empty split
if [[ "${TRACE_CMD}" == pytest* ]]; then
codeflash optimize --trace-only -m pytest -- "${TESTS_ROOT_VALUE}/" || true
else
codeflash optimize --trace-only -m "${TRACE_CMD}" || true
fi
set -e
else
echo "Skipping trace: tests root not found."
fi
fi
if [ -n "${CF_TARGET_FILE:-}" ]; then
echo "Running Codeflash single-file: ${CF_TARGET_FILE} ${CF_TARGET_FUNCTION:-}"
if [ ! -f "${CF_TARGET_FILE}" ]; then
echo "Target file not found: ${CF_TARGET_FILE}" >&2
exit 3
fi
if [ -n "${CF_TARGET_FUNCTION:-}" ]; then
codeflash --file "${CF_TARGET_FILE}" --function "${CF_TARGET_FUNCTION}" --verbose
else
codeflash --file "${CF_TARGET_FILE}" --verbose
fi
else
echo "Running Codeflash --all without staging-review flag..."
codeflash --all --verbose
fi
# If we reach here normally, ensure EXIT_FILE reflects last known code (0 if unset)
if [ -n "${EXIT_FILE:-}" ] && [ ! -s "${EXIT_FILE}" ]; then echo "0" > "${EXIT_FILE}" 2>/dev/null || true; fi
echo "--- Finished Codeflash Optimization ---"
# In skip/failure paths earlier we may exit non-zero; ensure EXIT_FILE set there as well
exit 0