From 7c1933180ae9825b09852d982b28b711406b5f77 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Mon, 17 Nov 2025 12:35:09 -0800
Subject: [PATCH] local setup (#1898)

Signed-off-by: Saurabh Misra <misra.saurabh1@gmail.com>
Co-authored-by: saga4 <saga4@codeflashs-MacBook-Air.local>
Co-authored-by: Sarthak Agarwal <sarthak.saga@gmail.com>
Co-authored-by: Mohamed Ashraf <mohamedashrraf222@gmail.com>
Co-authored-by: Aseem Saxena <aseem.bits@gmail.com>
---
 .dockerignore                                 | 136 +++++
 .github/workflows/django-unit-tests.yaml      |   2 +
 ...-to-end-test-bubblesort-pytest-no-git.yaml |   2 +
 .../end-to-end-test-bubblesort-unittest.yaml  |   2 +
 .../workflows/end-to-end-test-coverage.yaml   |   2 +
 .../end-to-end-test-futurehouse.yaml          |   2 +
 .../workflows/end-to-end-test-init-optim.yaml |   2 +
 .../end-to-end-test-tracer-replay.yaml        |   2 +
 .../end-to-end-topological-sort-test.yaml     |   2 +
 .github/workflows/mypy_aiservice.yml          |   2 +
 .../onprem-simple/Dockerfile.unifiedall       | 171 ++++++
 deployment/onprem-simple/README.md            | 341 +++++++++++
 deployment/onprem-simple/TESTING.md           | 550 ++++++++++++++++++
 .../old-compose/docker-compose-simple.yml     | 108 ++++
 .../archive/old-compose/docker-compose.yml    |  57 ++
 .../old-dockerfiles/Dockerfile.aiservice      |  30 +
 .../archive/old-dockerfiles/Dockerfile.cfapi  |  51 ++
 .../old-dockerfiles/Dockerfile.unified        |  93 +++
 .../old-dockerfiles/Dockerfile.unified-simple |  46 ++
 .../archive/old-dockerfiles/Dockerfile.webapp |  27 +
 .../archive/old-scripts/REBUILD-AND-TEST.sh   |  66 +++
 .../archive/old-scripts/build-images.sh       |  50 ++
 .../archive/old-scripts/build.sh              |  61 ++
 .../archive/old-scripts/run-fresh.sh          | 128 ++++
 deployment/onprem-simple/init-db.sh           | 138 +++++
 deployment/onprem-simple/startup.sh           |  90 +++
 deployment/onprem-simple/supervisord.conf     |  62 ++
 django/.dockerignore                          |  81 +++
 django/aiservice/aiservice/env_specific.py    |  57 +-
 django/aiservice/aiservice/models/aimodels.py |  84 ++-
 django/aiservice/explanations/explanations.py |  22 +-
 .../optimization_review.py                    |  19 +-
 django/aiservice/optimizer/optimizer.py       |  20 +-
 .../optimizer/optimizer_line_profiler.py      |  29 +-
 django/aiservice/optimizer/refinement.py      |  27 +-
 django/aiservice/ranker/ranker.py             |  13 +-
 django/aiservice/testgen/testgen.py           |  27 +-
 js/cf-api/Dockerfile                          |  10 +
 js/cf-api/github/github-app.ts                |  18 +-
 js/cf-webapp/src/lib/stripe.ts                |  10 +-
 40 files changed, 2513 insertions(+), 127 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 deployment/onprem-simple/Dockerfile.unifiedall
 create mode 100644 deployment/onprem-simple/README.md
 create mode 100644 deployment/onprem-simple/TESTING.md
 create mode 100644 deployment/onprem-simple/archive/old-compose/docker-compose-simple.yml
 create mode 100644 deployment/onprem-simple/archive/old-compose/docker-compose.yml
 create mode 100644 deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.aiservice
 create mode 100644 deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.cfapi
 create mode 100644 deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified
 create mode 100644 deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified-simple
 create mode 100644 deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.webapp
 create mode 100755 deployment/onprem-simple/archive/old-scripts/REBUILD-AND-TEST.sh
 create mode 100755 deployment/onprem-simple/archive/old-scripts/build-images.sh
 create mode 100644 deployment/onprem-simple/archive/old-scripts/build.sh
 create mode 100644 deployment/onprem-simple/archive/old-scripts/run-fresh.sh
 create mode 100644 deployment/onprem-simple/init-db.sh
 create mode 100644 deployment/onprem-simple/startup.sh
 create mode 100644 deployment/onprem-simple/supervisord.conf
 create mode 100644 django/.dockerignore
 create mode 100644 js/cf-api/Dockerfile

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 000000000..6f9d4c51c
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,136 @@
+# Python virtual environments
+.venv/
+venv/
+env/
+ENV/
+django/aiservice/.venv/
+django/aiservice/venv/
+django/aiservice/__pycache__/
+
+# Node.js dependencies
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Build outputs
+dist/
+build/
+.next/
+.nuxt/
+.vuepress/dist/
+.serverless/
+.fusebox/
+.dynamodb/
+.tern-port/
+.vscode-test/
+
+# Cache directories
+.cache/
+.parcel-cache/
+.eslintcache/
+.stylelintcache/
+.ruff_cache/
+
+# Environment files
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+# IDE files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Git
+.git/
+.gitignore
+
+# Logs
+logs/
+*.log
+
+# Coverage
+coverage/
+.nyc_output/
+
+# Testing
+.tox/
+.coverage
+.coverage.*
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Temporary files
+tmp/
+temp/
+.tmp/
+
+# Yarn
+.yarn/cache/
+.yarn/unplugged/
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+
+# Python cache files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Database files
+*.db
+*.sqlite3
+
+# Large directories that shouldn't be in build context
+experiments/
+node_modules/
+js/node_modules/
+js/cf-api/node_modules/
+js/cf-webapp/node_modules/
+js/common/node_modules/
+django/aiservice/venv/
+django/aiservice/.venv/
+
+# Additional large files
+*.zip
+*.tar.gz
+*.rar
+*.7z
+*.iso
+*.dmg
+
+# Lock files (keep package-lock.json but exclude others)
+yarn.lock
+pnpm-lock.yaml
\ No newline at end of file
diff --git a/.github/workflows/django-unit-tests.yaml b/.github/workflows/django-unit-tests.yaml
index ad2045bc2..1aabbc398 100644
--- a/.github/workflows/django-unit-tests.yaml
+++ b/.github/workflows/django-unit-tests.yaml
@@ -51,6 +51,8 @@ jobs:
       SECRET_KEY: ${{ secrets.SECRET_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
     steps:
       - name: Checkout code
diff --git a/.github/workflows/end-to-end-test-bubblesort-pytest-no-git.yaml b/.github/workflows/end-to-end-test-bubblesort-pytest-no-git.yaml
index 2c15fc848..8e8d89cb9 100644
--- a/.github/workflows/end-to-end-test-bubblesort-pytest-no-git.yaml
+++ b/.github/workflows/end-to-end-test-bubblesort-pytest-no-git.yaml
@@ -46,6 +46,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/end-to-end-test-bubblesort-unittest.yaml b/.github/workflows/end-to-end-test-bubblesort-unittest.yaml
index a828bdfe8..ce6c74d6f 100644
--- a/.github/workflows/end-to-end-test-bubblesort-unittest.yaml
+++ b/.github/workflows/end-to-end-test-bubblesort-unittest.yaml
@@ -46,6 +46,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/end-to-end-test-coverage.yaml b/.github/workflows/end-to-end-test-coverage.yaml
index a99502d98..19e164487 100644
--- a/.github/workflows/end-to-end-test-coverage.yaml
+++ b/.github/workflows/end-to-end-test-coverage.yaml
@@ -12,6 +12,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/end-to-end-test-futurehouse.yaml b/.github/workflows/end-to-end-test-futurehouse.yaml
index 7f1569966..34725ff3e 100644
--- a/.github/workflows/end-to-end-test-futurehouse.yaml
+++ b/.github/workflows/end-to-end-test-futurehouse.yaml
@@ -46,6 +46,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/end-to-end-test-init-optim.yaml b/.github/workflows/end-to-end-test-init-optim.yaml
index 886d3c55a..f9956d39e 100644
--- a/.github/workflows/end-to-end-test-init-optim.yaml
+++ b/.github/workflows/end-to-end-test-init-optim.yaml
@@ -12,6 +12,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/end-to-end-test-tracer-replay.yaml b/.github/workflows/end-to-end-test-tracer-replay.yaml
index 4484c3e9a..9d7d17b5d 100644
--- a/.github/workflows/end-to-end-test-tracer-replay.yaml
+++ b/.github/workflows/end-to-end-test-tracer-replay.yaml
@@ -46,6 +46,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/end-to-end-topological-sort-test.yaml b/.github/workflows/end-to-end-topological-sort-test.yaml
index 155ed9b7a..1052e1f92 100644
--- a/.github/workflows/end-to-end-topological-sort-test.yaml
+++ b/.github/workflows/end-to-end-topological-sort-test.yaml
@@ -46,6 +46,8 @@ jobs:
     env:
       CODEFLASH_AIS_SERVER: local
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
diff --git a/.github/workflows/mypy_aiservice.yml b/.github/workflows/mypy_aiservice.yml
index 27a6dbd9d..44bb91916 100644
--- a/.github/workflows/mypy_aiservice.yml
+++ b/.github/workflows/mypy_aiservice.yml
@@ -19,6 +19,8 @@ jobs:
       SECRET_KEY: ${{ secrets.SECRET_KEY }}
       DATABASE_URL: ${{ secrets.DATABASE_URL }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      OPENAI_API_TYPE: ${{ secrets.OPENAI_API_TYPE }}
+      OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
     steps:
       - name: Checkout code
diff --git a/deployment/onprem-simple/Dockerfile.unifiedall b/deployment/onprem-simple/Dockerfile.unifiedall
new file mode 100644
index 000000000..de8c4cb0b
--- /dev/null
+++ b/deployment/onprem-simple/Dockerfile.unifiedall
@@ -0,0 +1,171 @@
+# ============================================================================
+# Codeflash Unified Container - Self-Contained Build
+# ============================================================================
+# This Dockerfile builds everything from source without depending on pre-built
+# images. Use this for client deployments and teammate onboarding.
+#
+# Build command:
+#   docker build -f deployment/onprem-simple/Dockerfile.unified-selfcontained \
+#                -t codeflash/unified:latest .
+#
+# ============================================================================
+
+# ============================================================================
+# Stage 1: Build aiservice
+# ============================================================================
+FROM --platform=$BUILDPLATFORM python:3.12-slim AS aiservice-builder
+
+WORKDIR /app
+
+# Install system dependencies for aiservice
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libpq-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv
+RUN pip install uv
+
+# Copy aiservice source
+COPY django/aiservice ./
+
+# Install dependencies
+RUN uv sync
+
+# ============================================================================
+# Stage 2: Build cf-api
+# ============================================================================
+FROM node:20-alpine AS cfapi-builder
+
+WORKDIR /build
+
+# Copy cf-api and common source
+COPY js/cf-api ./cf-api
+COPY js/common ./common
+
+# Build common package first (cf-api depends on it)
+WORKDIR /build/common
+RUN npm ci && npm run build
+
+# Install ALL dependencies for cf-api (matching Dockerfile.cfapi exactly)
+WORKDIR /build/cf-api
+RUN npm ci
+
+# Replace the common package from registry with local build
+RUN rm -rf node_modules/@codeflash-ai/common && \
+    cp -r /build/common node_modules/@codeflash-ai/
+
+# Clean dist folder before build
+RUN rm -rf dist
+
+# Build TypeScript (npm run build does: npm install && prisma generate && tsc && copy-assets)
+RUN npm run build
+
+# ============================================================================
+# Stage 3: Build cf-webapp
+# ============================================================================
+FROM node:20-alpine AS webapp-builder
+
+WORKDIR /build
+
+# Copy cf-webapp and common source
+COPY js/cf-webapp ./cf-webapp
+COPY js/common ./common
+
+# Build common package first (webapp depends on it)
+WORKDIR /build/common
+RUN npm ci && npm run build
+
+# Install ALL dependencies for webapp
+WORKDIR /build/cf-webapp
+RUN npm ci
+
+# Replace the common package from registry with local build
+RUN rm -rf node_modules/@codeflash-ai/common && \
+    cp -r /build/common node_modules/@codeflash-ai/
+
+# Build Next.js application
+RUN npm run build
+
+# ============================================================================
+# Stage 4: Final unified container
+# ============================================================================
+FROM node:20-alpine
+
+# Install system dependencies
+RUN apk add --no-cache \
+    python3 \
+    py3-pip \
+    postgresql15 \
+    postgresql15-client \
+    supervisor \
+    bash \
+    openssl \
+    build-base \
+    libpq-dev \
+    && rm -rf /var/cache/apk/*
+
+# Install Python uv
+RUN pip3 install --break-system-packages uv
+
+WORKDIR /app
+
+# ============================================================================
+# Copy cf-api from builder
+# ============================================================================
+COPY --from=cfapi-builder /build/cf-api/dist ./cf-api/dist
+COPY --from=cfapi-builder /build/cf-api/package.json ./cf-api/package.json
+COPY --from=cfapi-builder /build/cf-api/package-lock.json ./cf-api/package-lock.json
+COPY --from=cfapi-builder /build/cf-api/resend ./cf-api/resend
+COPY --from=cfapi-builder /build/cf-api/github ./cf-api/github
+COPY --from=cfapi-builder /build/cf-api/node_modules ./cf-api/node_modules
+COPY --from=cfapi-builder /build/common /common
+
+# Copy node_modules into dist (Azure deployment structure)
+RUN cp -rL /app/cf-api/node_modules /app/cf-api/dist/ 2>/dev/null || cp -r /app/cf-api/node_modules /app/cf-api/dist/
+
+# ============================================================================
+# Copy aiservice from builder
+# ============================================================================
+COPY --from=aiservice-builder /app ./aiservice
+
+# ============================================================================
+# Copy cf-webapp from builder
+# ============================================================================
+COPY --from=webapp-builder /build/cf-webapp/.next ./cf-webapp/.next
+COPY --from=webapp-builder /build/cf-webapp/public ./cf-webapp/public
+COPY --from=webapp-builder /build/cf-webapp/package.json ./cf-webapp/package.json
+COPY --from=webapp-builder /build/cf-webapp/package-lock.json ./cf-webapp/package-lock.json
+COPY --from=webapp-builder /build/cf-webapp/next.config.mjs ./cf-webapp/next.config.mjs
+COPY --from=webapp-builder /build/cf-webapp/node_modules ./cf-webapp/node_modules
+
+# ============================================================================
+# Configure PostgreSQL
+# ============================================================================
+RUN mkdir -p /var/lib/postgresql/data /var/run/postgresql && \
+    chown -R postgres:postgres /var/lib/postgresql /var/run/postgresql
+
+# ============================================================================
+# Copy configuration files
+# ============================================================================
+COPY deployment/onprem-simple/supervisord.conf /etc/supervisord.conf
+COPY deployment/onprem-simple/init-db.sh /app/init-db.sh
+COPY deployment/onprem-simple/startup.sh /app/startup.sh
+
+RUN chmod +x /app/init-db.sh /app/startup.sh
+
+# ============================================================================
+# Expose ports
+# ============================================================================
+EXPOSE 5432 8000 3001 3000
+
+# ============================================================================
+# Environment variables
+# ============================================================================
+ENV PGDATA=/var/lib/postgresql/data
+ENV PATH="/var/lib/postgresql/bin:${PATH}"
+
+# ============================================================================
+# Start services
+# ============================================================================
+CMD ["/app/startup.sh"]
\ No newline at end of file
diff --git a/deployment/onprem-simple/README.md b/deployment/onprem-simple/README.md
new file mode 100644
index 000000000..730c1ca56
--- /dev/null
+++ b/deployment/onprem-simple/README.md
@@ -0,0 +1,341 @@
+# Codeflash On-Premise Deployment
+
+A single Docker container that runs all Codeflash services for on-premise deployments.
+
+## What's Inside
+
+The unified container includes:
+- **PostgreSQL 15** - Database server (port 5432)
+- **aiservice** - Python Django optimization service (port 8000)
+- **cf-api** - Node.js API server (port 3001)
+- **cf-webapp** - Next.js web interface (port 3000)
+- **Supervisord** - Process manager for all services
+
+## Quick Start
+
+### Prerequisites
+
+- Docker installed (version 20.10 or higher)
+- An AI provider API key (Azure OpenAI, OpenAI, or Anthropic)
+
+### Step 1: Build the Docker Image
+
+```bash
+git clone https://github.com/codeflash-ai/codeflash
+cd codeflash
+docker build -f deployment/onprem-simple/Dockerfile.unifiedall -t codeflash/unified:latest .
+```
+
+**Build time:** ~5-10 minutes
+
+### Step 2: Run the Container
+
+The simplest way to run Codeflash (only 1 required environment variable!):
+
+```bash
+docker run -d --name codeflash \
+  -e AZURE_OPENAI_API_KEY=your-azure-api-key \
+  -p 5432:5432 \
+  -p 8000:8000 \
+  -p 3001:3001 \
+  -p 3000:3000 \
+  -v codeflash-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+```
+
+**What happens automatically:**
+- ✅ DATABASE_URL defaults to built-in PostgreSQL
+- ✅ SECRET_KEY auto-generated
+- ✅ URLs default to localhost
+- ✅ API key auto-generated on first run
+
+### Step 3: Get Your API Key
+
+After the container starts (~15 seconds), retrieve your API key:
+
+```bash
+# View logs to see the API key
+docker logs codeflash
+
+# Or get it from the saved file
+docker exec codeflash cat /app/API_KEY.txt
+```
+
+You'll see output like:
+```
+======================================
+  CODEFLASH SETUP COMPLETE!
+======================================
+
+Your API Key: cf-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
+Save this API key! You'll need it to configure the Codeflash CLI.
+======================================
+```
+
+**Save this API key** - you'll need it for the CLI!
+
+### Step 4: Install the CLI
+
+```bash
+pip install codeflash
+```
+
+### Step 5: Configure the CLI
+
+```bash
+export CODEFLASH_API_KEY=cf-your-api-key-from-step-3
+export CODEFLASH_AIS_SERVER=local
+export CODEFLASH_CFAPI_SERVER=local
+```
+
+Or create a `.env` file in your project:
+```bash
+CODEFLASH_API_KEY=cf-your-api-key-from-step-3
+CODEFLASH_AIS_SERVER=local
+CODEFLASH_CFAPI_SERVER=local
+```
+
+### Step 6: Optimize Your Code!
+
+```bash
+cd your-python-project
+codeflash --file path/to/file.py --function function_name --no-pr
+```
+
+## Configuration Options
+
+### Minimal Configuration (Recommended)
+
+Only provide your AI provider key:
+
+```bash
+docker run -d --name codeflash \
+  -e OPENAI_API_TYPE=azure \
+  -e OPENAI_API_BASE=your-azure-openai-base-url \
+  -e AZURE_OPENAI_API_KEY=your-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+```
+
+### Full Configuration (Optional)
+
+You can customize all settings if needed:
+
+```bash
+docker run -d --name codeflash \
+  -e OPENAI_API_TYPE=azure \
+  -e OPENAI_API_BASE=your-azure-openai-base-url \
+  -e AZURE_OPENAI_API_KEY=your-azure-key \
+  -e ANTHROPIC_API_KEY=your-anthropic-key \
+  -e SECRET_KEY=your-custom-secret \
+  -e DATABASE_URL=postgresql://user:pass@host:5432/db \
+  -e NEXT_PUBLIC_APP_URL=http://your-domain:3000 \
+  -e WEBAPP_URL=http://your-domain:3000 \
+  -e CODEFLASH_CFAPI_URL=http://your-domain:3001 \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+```
+
+See `.env.onprem.minimal` for all available options.
+
+## Container Management
+
+### Check Status
+
+```bash
+# Check if container is running
+docker ps | grep codeflash
+
+# View logs
+docker logs codeflash
+
+# Follow logs in real-time
+docker logs -f codeflash
+
+# Check service status inside container
+docker exec codeflash supervisorctl status
+```
+
+Expected output:
+```
+postgres                         RUNNING   pid 40, uptime 0:10:23
+aiservice                        RUNNING   pid 41, uptime 0:10:23
+cf-api                           RUNNING   pid 42, uptime 0:10:23
+cf-webapp                        RUNNING   pid 43, uptime 0:10:23
+```
+
+### Stop/Start/Restart
+
+```bash
+# Stop container (data persists in volume)
+docker stop codeflash
+
+# Start container
+docker start codeflash
+
+# Restart container
+docker restart codeflash
+```
+
+### Remove Container
+
+```bash
+# Remove container (keeps data volume)
+docker stop codeflash
+docker rm codeflash
+
+# Remove container AND data (⚠️ deletes all data!)
+docker stop codeflash
+docker rm codeflash
+docker volume rm codeflash-data
+```
+
+### Upgrade to New Version
+
+```bash
+# Pull or build new image
+docker pull codeflash/unified:latest
+# OR
+docker build -f deployment/onprem-simple/Dockerfile.unifiedall -t codeflash/unified:latest .
+
+# Stop and remove old container
+docker stop codeflash
+docker rm codeflash
+
+# Start new container (data persists in volume)
+docker run -d --name codeflash \
+  -e AZURE_OPENAI_API_KEY=your-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+```
+
+## Accessing Services
+
+Once running, you can access:
+
+- **cf-api**: http://localhost:3001
+- **aiservice**: http://localhost:8000
+- **cf-webapp**: http://localhost:3000
+- **PostgreSQL**: localhost:5432 (username: `codeflash`, password: `codeflash`, database: `codeflash`)
+
+## Troubleshooting
+
+### Container won't start
+
+```bash
+# Check logs for errors
+docker logs codeflash
+
+# Verify ports are available
+lsof -i :5432
+lsof -i :8000
+lsof -i :3001
+lsof -i :3000
+```
+
+### Services not responding
+
+```bash
+# Check service status
+docker exec codeflash supervisorctl status
+
+# Restart a specific service
+docker exec codeflash supervisorctl restart cf-api
+docker exec codeflash supervisorctl restart aiservice
+docker exec codeflash supervisorctl restart cf-webapp
+```
+
+### CLI can't connect
+
+```bash
+# Test service endpoints
+curl http://localhost:3001/cfapi/healthcheck
+curl http://localhost:8000/health
+
+# Verify environment variables
+echo $CODEFLASH_API_KEY
+echo $CODEFLASH_AIS_SERVER
+echo $CODEFLASH_CFAPI_SERVER
+```
+
+### Database issues
+
+```bash
+# Check if PostgreSQL is ready
+docker exec codeflash pg_isready -h localhost -p 5432 -U codeflash
+
+# Access database
+docker exec -it codeflash psql postgresql://codeflash:codeflash@localhost:5432/codeflash
+
+# Check API keys in database
+docker exec codeflash psql postgresql://codeflash:codeflash@localhost:5432/codeflash \
+  -c "SELECT key, suffix FROM cf_api_keys;"
+```
+
+## FAQ
+
+**Q: Do I need GitHub App configuration?**
+A: No, not if you use `--no-pr` mode. GitHub integration is optional.
+
+**Q: Do I need Stripe configuration?**
+A: No, billing features are not required for on-premise deployments.
+
+**Q: What AI providers are supported?**
+A: Azure OpenAI, OpenAI, and Anthropic Claude. You only need one.
+
+**Q: Can I use my own PostgreSQL database?**
+A: Yes, set the `DATABASE_URL` environment variable.
+
+**Q: What ports need to be accessible?**
+A: For CLI usage, only ports 3001 (cf-api) and 8000 (aiservice) are required. Port 3000 (webapp) is for the web interface, and 5432 (PostgreSQL) is only if you want direct database access.
+
+**Q: How much disk space is needed?**
+A: ~5GB for the image, plus storage for your data (depends on usage).
+
+**Q: How much memory is needed?**
+A: Minimum 2GB RAM, recommended 4GB+ for optimal performance.
+
+## Performance Notes
+
+- **Container size:** ~5GB (includes all services and dependencies)
+- **Startup time:** ~15-20 seconds for all services
+- **Memory usage:** ~500MB-2GB (depending on workload)
+- **CPU:** Works on both x86_64 and ARM64 (Apple Silicon)
+
+## File Structure
+
+```
+deployment/onprem-simple/
+├── Dockerfile.unifiedall        # Main unified Docker image
+├── supervisord.conf             # Process manager configuration
+├── startup.sh                   # Container startup script
+├── init-db.sh                   # Database initialization script
+├── .env.onprem.minimal          # Minimal environment variables template
+├── .dockerignore                # Docker build exclusions
+├── README.md                    # This file
+├── TESTING.md                   # Testing guide
+└── archive/                     # Old/experimental files
+    ├── old-dockerfiles/         # Previous Dockerfile attempts
+    ├── old-compose/             # Old docker-compose files
+    └── old-scripts/             # Previous build scripts
+```
+
+## Next Steps
+
+- ✅ Container built and running
+- ✅ Database initialized
+- ✅ API key generated
+- ✅ CLI configured
+- 🚀 **Ready to optimize code!**
+
+See `TESTING.md` for a complete testing guide with example workflows.
+
+## Support
+
+For issues or questions:
+- GitHub Issues: https://github.com/codeflash-ai/codeflash/issues
+- Documentation: https://docs.codeflash.ai
\ No newline at end of file
diff --git a/deployment/onprem-simple/TESTING.md b/deployment/onprem-simple/TESTING.md
new file mode 100644
index 000000000..0433f7953
--- /dev/null
+++ b/deployment/onprem-simple/TESTING.md
@@ -0,0 +1,550 @@
+# Codeflash On-Premise Testing Guide
+
+This guide walks through testing the Codeflash on-premise deployment step-by-step.
+
+## Quick Test (5 minutes)
+
+This is the fastest way to verify everything works:
+
+### 1. Build and Run
+
+```bash
+# Build the image (from repository root)
+docker build -f deployment/onprem-simple/Dockerfile.unifiedall -t codeflash/unified:latest .
+
+# Run the container
+docker run -d --name codeflash-test \
+  -e AZURE_OPENAI_API_KEY=your-azure-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-test-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+# Wait ~15 seconds for services to start
+sleep 15
+```
+
+### 2. Verify All Services Are Running
+
+```bash
+# Check container status
+docker ps | grep codeflash-test
+
+# Check all services
+docker exec codeflash-test supervisorctl status
+```
+
+Expected output:
+```
+postgres                         RUNNING   pid 40, uptime 0:00:15
+aiservice                        RUNNING   pid 41, uptime 0:00:15
+cf-api                           RUNNING   pid 42, uptime 0:00:15
+cf-webapp                        RUNNING   pid 43, uptime 0:00:15
+```
+
+### 3. Test Service Endpoints
+
+```bash
+# Test cf-api
+curl http://localhost:3001/cfapi/healthcheck
+# Expected: {"status":"ok"}
+
+# Test aiservice
+curl http://localhost:8000/health
+# Expected: {"status":"healthy"}
+
+# Test webapp
+curl http://localhost:3000
+# Expected: HTML content
+```
+
+### 4. Get API Key
+
+```bash
+docker logs codeflash-test | grep "Your API Key"
+# Or
+docker exec codeflash-test cat /app/API_KEY.txt
+```
+
+### 5. Test CLI
+
+```bash
+# Install CLI
+pip install codeflash
+
+# Configure environment
+export CODEFLASH_API_KEY=cf-your-key-from-step-4
+export CODEFLASH_AIS_SERVER=local
+export CODEFLASH_CFAPI_SERVER=local
+
+# Test connection
+codeflash --help
+```
+
+### 6. Test Optimization (Optional)
+
+If you have a Python project to test:
+
+```bash
+cd your-python-project
+codeflash --file path/to/file.py --function function_name --no-pr -v
+```
+
+### 7. Cleanup
+
+```bash
+docker stop codeflash-test
+docker rm codeflash-test
+docker volume rm codeflash-test-data
+```
+
+---
+
+## Detailed Testing Scenarios
+
+### Scenario 1: Fresh Installation Test
+
+Tests a brand new installation from scratch.
+
+```bash
+# Clean slate
+docker stop codeflash-test 2>/dev/null || true
+docker rm codeflash-test 2>/dev/null || true
+docker volume rm codeflash-test-data 2>/dev/null || true
+
+# Build
+docker build -f deployment/onprem-simple/Dockerfile.unifiedall -t codeflash/unified:latest .
+
+# Run
+docker run -d --name codeflash-test \
+  -e AZURE_OPENAI_API_KEY=your-azure-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-test-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+# Wait for startup
+sleep 20
+
+# Verify
+docker logs codeflash-test
+docker exec codeflash-test supervisorctl status
+docker exec codeflash-test pg_isready -h localhost -p 5432 -U codeflash
+```
+
+**Success criteria:**
+- ✅ All 4 services show "RUNNING"
+- ✅ PostgreSQL is ready
+- ✅ API key generated and saved
+- ✅ No error messages in logs
+
+### Scenario 2: Persistent Data Test
+
+Tests that data persists across container restarts.
+
+```bash
+# Get initial API key
+INITIAL_KEY=$(docker exec codeflash-test cat /app/API_KEY.txt)
+echo "Initial API key: $INITIAL_KEY"
+
+# Restart container
+docker restart codeflash-test
+sleep 15
+
+# Get API key after restart
+NEW_KEY=$(docker exec codeflash-test cat /app/API_KEY.txt)
+echo "API key after restart: $NEW_KEY"
+
+# Compare
+if [ "$INITIAL_KEY" = "$NEW_KEY" ]; then
+  echo "✅ Data persisted correctly"
+else
+  echo "❌ Data did not persist"
+fi
+```
+
+**Success criteria:**
+- ✅ API key remains the same after restart
+- ✅ All services restart successfully
+- ✅ Database data is intact
+
+### Scenario 3: Service Restart Test
+
+Tests individual service restarts.
+
+```bash
+# Restart cf-api
+docker exec codeflash-test supervisorctl restart cf-api
+sleep 5
+curl http://localhost:3001/cfapi/healthcheck
+
+# Restart aiservice
+docker exec codeflash-test supervisorctl restart aiservice
+sleep 5
+curl http://localhost:8000/health
+
+# Restart cf-webapp
+docker exec codeflash-test supervisorctl restart cf-webapp
+sleep 5
+curl http://localhost:3000
+
+# Check all services
+docker exec codeflash-test supervisorctl status
+```
+
+**Success criteria:**
+- ✅ Each service restarts without errors
+- ✅ Endpoints respond after restart
+- ✅ No cascading failures
+
+### Scenario 4: Database Test
+
+Tests database connectivity and data.
+
+```bash
+# Check PostgreSQL is running
+docker exec codeflash-test pg_isready -h localhost -p 5432 -U codeflash
+
+# Check database exists
+docker exec codeflash-test psql postgresql://codeflash:codeflash@localhost:5432/codeflash \
+  -c "\l" | grep codeflash
+
+# Check tables exist
+docker exec codeflash-test psql postgresql://codeflash:codeflash@localhost:5432/codeflash \
+  -c "\dt" | head -10
+
+# Check user exists
+docker exec codeflash-test psql postgresql://codeflash:codeflash@localhost:5432/codeflash \
+  -c "SELECT * FROM users;"
+
+# Check API key exists
+docker exec codeflash-test psql postgresql://codeflash:codeflash@localhost:5432/codeflash \
+  -c "SELECT key, suffix FROM cf_api_keys;"
+```
+
+**Success criteria:**
+- ✅ Database `codeflash` exists
+- ✅ All migrations applied (36+ tables)
+- ✅ User created
+- ✅ API key stored
+
+### Scenario 5: CLI Integration Test
+
+Full end-to-end test with a real Python file.
+
+```bash
+# Create test project
+mkdir -p /tmp/codeflash-test-project
+cd /tmp/codeflash-test-project
+
+# Create a simple Python file
+cat > test.py << 'EOF'
+def fibonacci(n):
+    """Calculate fibonacci number recursively (slow)."""
+    if n <= 1:
+        return n
+    return fibonacci(n - 1) + fibonacci(n - 2)
+
+def main():
+    print(fibonacci(10))
+
+if __name__ == "__main__":
+    main()
+EOF
+
+# Get API key
+export CODEFLASH_API_KEY=$(docker exec codeflash-test cat /app/API_KEY.txt)
+export CODEFLASH_AIS_SERVER=local
+export CODEFLASH_CFAPI_SERVER=local
+
+# Install CLI
+pip install codeflash
+
+# Run optimization
+codeflash --file test.py --function fibonacci --no-pr -v
+
+# Check if file was modified
+git diff test.py 2>/dev/null || echo "File modified (no git)"
+```
+
+**Success criteria:**
+- ✅ CLI connects successfully
+- ✅ Optimization completes without errors
+- ✅ Function is optimized (or determined not optimizable)
+- ✅ Test file shows improvements
+
+### Scenario 6: Port Conflict Test
+
+Tests behavior when ports are already in use.
+
+```bash
+# Try to run second container (should fail)
+docker run -d --name codeflash-test-2 \
+  -e AZURE_OPENAI_API_KEY=your-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  codeflash/unified:latest 2>&1 | grep "address already in use"
+
+# Run with different ports (should succeed)
+docker run -d --name codeflash-test-2 \
+  -e AZURE_OPENAI_API_KEY=your-key \
+  -p 15432:5432 -p 18000:8000 -p 13001:3001 -p 13000:3000 \
+  -v codeflash-test-2-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+sleep 15
+docker exec codeflash-test-2 supervisorctl status
+
+# Cleanup
+docker stop codeflash-test-2
+docker rm codeflash-test-2
+docker volume rm codeflash-test-2-data
+```
+
+**Success criteria:**
+- ✅ Second container with same ports fails gracefully
+- ✅ Second container with different ports works
+- ✅ Both containers can run simultaneously
+
+### Scenario 7: Environment Variable Test
+
+Tests various environment variable configurations.
+
+```bash
+# Test with minimal config
+docker run -d --name codeflash-minimal \
+  -e AZURE_OPENAI_API_KEY=test-key \
+  -p 25432:5432 -p 28000:8000 -p 23001:3001 -p 23000:3000 \
+  codeflash/unified:latest
+
+sleep 20
+docker logs codeflash-minimal | grep "Generated SECRET_KEY"
+docker logs codeflash-minimal | grep "API Key:"
+
+# Test with full config
+docker run -d --name codeflash-full \
+  -e AZURE_OPENAI_API_KEY=test-key \
+  -e SECRET_KEY=my-custom-secret \
+  -e NEXT_PUBLIC_APP_URL=http://custom.domain:3000 \
+  -e WEBAPP_URL=http://custom.domain:3000 \
+  -p 35432:5432 -p 38000:8000 -p 33001:3001 -p 33000:3000 \
+  codeflash/unified:latest
+
+sleep 20
+docker logs codeflash-full | grep -v "Generated SECRET_KEY"
+
+# Cleanup
+docker stop codeflash-minimal codeflash-full
+docker rm codeflash-minimal codeflash-full
+```
+
+**Success criteria:**
+- ✅ Minimal config works with auto-generated values
+- ✅ Full config respects custom values
+- ✅ No required variables missing
+
+### Scenario 8: Upgrade Test
+
+Tests upgrading from one version to another.
+
+```bash
+# Run old version
+docker run -d --name codeflash-old \
+  -e AZURE_OPENAI_API_KEY=test-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-upgrade-test:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+sleep 20
+OLD_KEY=$(docker exec codeflash-old cat /app/API_KEY.txt)
+
+# Stop old container
+docker stop codeflash-old
+docker rm codeflash-old
+
+# Rebuild image (simulating new version)
+docker build -f deployment/onprem-simple/Dockerfile.unifiedall -t codeflash/unified:latest .
+
+# Run new version with same volume
+docker run -d --name codeflash-new \
+  -e AZURE_OPENAI_API_KEY=test-key \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-upgrade-test:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+sleep 20
+NEW_KEY=$(docker exec codeflash-new cat /app/API_KEY.txt)
+
+# Compare
+if [ "$OLD_KEY" = "$NEW_KEY" ]; then
+  echo "✅ Upgrade successful - data preserved"
+else
+  echo "❌ Upgrade failed - data lost"
+fi
+
+# Cleanup
+docker stop codeflash-new
+docker rm codeflash-new
+docker volume rm codeflash-upgrade-test
+```
+
+**Success criteria:**
+- ✅ New version starts successfully
+- ✅ Data from old version is preserved
+- ✅ API key remains the same
+- ✅ No data migration issues
+
+---
+
+## Monitoring and Debugging
+
+### Watch All Logs
+
+```bash
+# Follow all logs
+docker logs -f codeflash-test
+
+# Filter specific service
+docker logs codeflash-test | grep "cf-api"
+docker logs codeflash-test | grep "aiservice"
+docker logs codeflash-test | grep "postgres"
+docker logs codeflash-test | grep "webapp"
+```
+
+### Check Resource Usage
+
+```bash
+# Container stats
+docker stats codeflash-test
+
+# Disk usage
+docker system df
+du -sh /var/lib/docker/volumes/codeflash-test-data
+```
+
+### Interactive Debugging
+
+```bash
+# Access container shell
+docker exec -it codeflash-test bash
+
+# Check service logs inside container
+docker exec codeflash-test tail -f /var/log/supervisor/cf-api-stdout.log
+docker exec codeflash-test tail -f /var/log/supervisor/aiservice-stdout.log
+docker exec codeflash-test tail -f /var/log/supervisor/postgres-stdout.log
+docker exec codeflash-test tail -f /var/log/supervisor/webapp-stdout.log
+
+# Check process tree
+docker exec codeflash-test ps auxf
+```
+
+---
+
+## Common Test Failures
+
+### Container won't start
+```bash
+docker logs codeflash-test | grep -i error
+docker logs codeflash-test | grep -i fail
+```
+
+### Service shows FATAL
+```bash
+docker exec codeflash-test supervisorctl tail cf-api
+docker exec codeflash-test supervisorctl tail aiservice
+```
+
+### PostgreSQL not ready
+```bash
+docker exec codeflash-test pg_isready
+docker logs codeflash-test | grep postgres
+```
+
+### API key not generated
+```bash
+docker logs codeflash-test | grep "API Key"
+docker exec codeflash-test ls -la /app/API_KEY.txt
+```
+
+---
+
+## Automated Test Script
+
+Save this as `test-deployment.sh`:
+
+```bash
+#!/bin/bash
+set -e
+
+echo "🚀 Starting Codeflash deployment test..."
+
+# Cleanup
+echo "🧹 Cleaning up old test containers..."
+docker stop codeflash-test 2>/dev/null || true
+docker rm codeflash-test 2>/dev/null || true
+docker volume rm codeflash-test-data 2>/dev/null || true
+
+# Build
+echo "🔨 Building image..."
+docker build -f deployment/onprem-simple/Dockerfile.unifiedall -t codeflash/unified:test .
+
+# Run
+echo "🏃 Running container..."
+docker run -d --name codeflash-test \
+  -e AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-test-key} \
+  -p 5432:5432 -p 8000:8000 -p 3001:3001 -p 3000:3000 \
+  -v codeflash-test-data:/var/lib/postgresql/data \
+  codeflash/unified:test
+
+# Wait
+echo "⏳ Waiting for services to start..."
+sleep 20
+
+# Test
+echo "🔍 Testing services..."
+docker exec codeflash-test supervisorctl status | grep RUNNING || (echo "❌ Services not running" && exit 1)
+curl -f http://localhost:3001/cfapi/healthcheck || (echo "❌ cf-api not responding" && exit 1)
+curl -f http://localhost:8000/health || (echo "❌ aiservice not responding" && exit 1)
+docker exec codeflash-test pg_isready || (echo "❌ PostgreSQL not ready" && exit 1)
+
+# Get API key
+echo "🔑 Retrieving API key..."
+API_KEY=$(docker exec codeflash-test cat /app/API_KEY.txt)
+echo "API Key: $API_KEY"
+
+echo "✅ All tests passed!"
+echo ""
+echo "To clean up: docker stop codeflash-test && docker rm codeflash-test && docker volume rm codeflash-test-data"
+```
+
+Make it executable and run:
+```bash
+chmod +x test-deployment.sh
+./test-deployment.sh
+```
+
+---
+
+## Performance Benchmarks
+
+Expected performance metrics:
+
+| Metric | Value |
+|--------|-------|
+| Build time | 5-10 minutes |
+| Startup time | 15-20 seconds |
+| Container size | ~5GB |
+| Memory usage (idle) | ~500MB |
+| Memory usage (active) | ~1-2GB |
+| CPU usage (idle) | <5% |
+| CPU usage (optimizing) | 50-100% |
+
+---
+
+## Next Steps
+
+After successful testing:
+1. Update to your actual AI provider keys
+2. Configure custom domain/URLs if needed
+3. Set up monitoring and backups
+4. Deploy to production environment
+5. Document your specific configuration
+
+For production deployment best practices, see the main README.md.
diff --git a/deployment/onprem-simple/archive/old-compose/docker-compose-simple.yml b/deployment/onprem-simple/archive/old-compose/docker-compose-simple.yml
new file mode 100644
index 000000000..0cbf8b16c
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-compose/docker-compose-simple.yml
@@ -0,0 +1,108 @@
+version: '3.8'
+
+services:
+  postgres:
+    image: postgres:15-alpine
+    container_name: codeflash-postgres
+    environment:
+      POSTGRES_DB: codeflash
+      POSTGRES_USER: codeflash
+      POSTGRES_PASSWORD: codeflash
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U codeflash"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  db-init:
+    image: codeflash/cf-api:latest
+    container_name: codeflash-db-init
+    depends_on:
+      postgres:
+        condition: service_healthy
+    environment:
+      DATABASE_URL: postgresql://codeflash:codeflash@postgres:5432/codeflash
+    command: >
+      sh -c "
+        echo 'Running Prisma migrations...' &&
+        cd /app/common &&
+        npx prisma migrate deploy &&
+        echo 'Checking for existing users...' &&
+        USER_COUNT=$$(psql postgresql://codeflash:codeflash@postgres:5432/codeflash -t -c 'SELECT COUNT(*) FROM users;' 2>/dev/null | tr -d ' ' || echo '0') &&
+        if [ \"$$USER_COUNT\" = \"0\" ]; then
+          echo 'Creating default user and API key...' &&
+          API_KEY=\"cf_$$(openssl rand -hex 32)\" &&
+          SUFFIX=\"$${API_KEY: -4}\" &&
+          psql postgresql://codeflash:codeflash@postgres:5432/codeflash <<-EOSQL &&
+            INSERT INTO users (user_id, github_username, email, name, onboarding_completed, created_at)
+            VALUES ('local|default-user', 'codeflash-user', 'user@codeflash.local', 'Default User', true, NOW())
+            ON CONFLICT (user_id) DO NOTHING;
+            INSERT INTO cf_api_keys (key, suffix, name, user_id, tier, created_at)
+            VALUES ('$$API_KEY', '$$SUFFIX', 'Default API Key', 'local|default-user', 'free', NOW());
+EOSQL
+          echo '' &&
+          echo '======================================' &&
+          echo '  CODEFLASH API KEY CREATED' &&
+          echo '======================================' &&
+          echo '' &&
+          echo \"$$API_KEY\" &&
+          echo '' &&
+          echo 'Add to cli/codeflash/.env:' &&
+          echo \"CODEFLASH_API_KEY=$$API_KEY\" &&
+          echo 'CODEFLASH_AIS_SERVER=local' &&
+          echo 'CODEFLASH_CFAPI_SERVER=local' &&
+          echo '' &&
+          echo '======================================';
+        else
+          echo 'Users exist. To get API key run:' &&
+          echo 'docker exec codeflash-postgres psql -U codeflash -d codeflash -c \"SELECT key FROM cf_api_keys LIMIT 1;\"';
+        fi &&
+        echo 'Database ready!'
+      "
+    restart: "no"
+
+  aiservice:
+    image: codeflash/aiservice:latest
+    container_name: codeflash-aiservice
+    depends_on:
+      postgres:
+        condition: service_healthy
+      db-init:
+        condition: service_completed_successfully
+    environment:
+      DATABASE_URL: postgresql://codeflash:codeflash@postgres:5432/codeflash
+      SECRET_KEY: development-secret-key
+      OPENAI_API_TYPE: azure
+      AZURE_OPENAI_API_KEY: ${AZURE_OPENAI_API_KEY}
+      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
+      ENVIRONMENT: DEVELOPMENT
+    ports:
+      - "8000:8000"
+
+  cf-api:
+    image: codeflash/cf-api:latest
+    container_name: codeflash-cf-api
+    depends_on:
+      postgres:
+        condition: service_healthy
+      db-init:
+        condition: service_completed_successfully
+      aiservice:
+        condition: service_started
+    environment:
+      DATABASE_URL: postgresql://codeflash:codeflash@postgres:5432/codeflash
+      AISERVICE_URL: http://aiservice:8000
+      GH_APP_ID: 800528
+      GH_APP_USER_ID: 148906541
+      GH_APP_WEBHOOK_SECRET: dev-webhook-secret
+      SECRET_KEY: development-secret-key
+      NODE_ENV: local
+    ports:
+      - "3001:3001"
+
+volumes:
+  postgres_data:
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-compose/docker-compose.yml b/deployment/onprem-simple/archive/old-compose/docker-compose.yml
new file mode 100644
index 000000000..972c46715
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-compose/docker-compose.yml
@@ -0,0 +1,57 @@
+version: '3.8'
+
+services:
+  postgres:
+    image: postgres:15-alpine
+    environment:
+      POSTGRES_DB: ${POSTGRES_DB:-codeflash}
+      POSTGRES_USER: ${POSTGRES_USER:-codeflash}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    restart: unless-stopped
+
+  aiservice:
+    image: codeflash/aiservice:latest
+    environment:
+      DATABASE_URL: postgresql://${POSTGRES_USER:-codeflash}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-codeflash}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
+      SECRET_KEY: ${DJANGO_SECRET_KEY}
+    depends_on:
+      - postgres
+    restart: unless-stopped
+
+  cf-api:
+    image: codeflash/cf-api:latest
+    environment:
+      DATABASE_URL: postgresql://${POSTGRES_USER:-codeflash}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-codeflash}
+      GH_APP_ID: ${GH_APP_ID}
+      GH_APP_PRIVATE_KEY: ${GH_APP_PRIVATE_KEY}
+      GH_APP_WEBHOOK_SECRET: ${GH_APP_WEBHOOK_SECRET}
+      AISERVICE_URL: http://aiservice:8000
+    depends_on:
+      - postgres
+      - aiservice
+    ports:
+      - "3001:3001"
+    restart: unless-stopped
+
+  cf-webapp:
+    image: codeflash/cf-webapp:latest
+    environment:
+      DATABASE_URL: postgresql://${POSTGRES_USER:-codeflash}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-codeflash}
+      CODEFLASH_CFAPI_URL: http://cf-api:3001
+      AUTH0_CLIENT_ID: ${AUTH0_CLIENT_ID}
+      AUTH0_CLIENT_SECRET: ${AUTH0_CLIENT_SECRET}
+      AUTH0_ISSUER_BASE_URL: ${AUTH0_ISSUER_BASE_URL}
+      AUTH0_SECRET: ${AUTH0_SECRET}
+      NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL}
+    depends_on:
+      - cf-api
+    ports:
+      - "3000:3000"
+    restart: unless-stopped
+
+volumes:
+  postgres_data:
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.aiservice b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.aiservice
new file mode 100644
index 000000000..affd8ba59
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.aiservice
@@ -0,0 +1,30 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libpq-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv
+RUN pip install uv
+
+# Copy entire aiservice directory
+COPY aiservice ./
+
+# Install dependencies
+RUN uv sync
+
+# Set environment variables
+ENV ENVIRONMENT=PRODUCTION
+ENV PORT=8000
+
+EXPOSE 8000
+
+# Use the start script that references gunicorn.conf.py
+CMD ["uv", "run", "gunicorn", "-c", "gunicorn.conf.py", "aiservice.asgi:application", \
+     "--bind", "0.0.0.0:8000", \
+     "--timeout", "600", \
+     "--workers", "2"]
diff --git a/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.cfapi b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.cfapi
new file mode 100644
index 000000000..0613b1a75
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.cfapi
@@ -0,0 +1,51 @@
+FROM node:20-alpine
+
+WORKDIR /build
+
+# Copy both cf-api and common
+COPY cf-api ./cf-api
+COPY common ./common
+
+# Build common package first (since cf-api depends on it)
+WORKDIR /build/common
+RUN npm ci && npm run build
+
+# Install ALL dependencies for cf-api (including dev dependencies)
+WORKDIR /build/cf-api
+RUN npm ci
+
+# Replace the common package from registry with local build
+RUN rm -rf node_modules/@codeflash-ai/common && \
+    cp -r /build/common node_modules/@codeflash-ai/
+
+# Clean dist folder before build to avoid nested dist directories
+RUN rm -rf dist
+
+# Build TypeScript
+RUN npm run build
+
+# Production stage
+FROM node:20-alpine
+
+WORKDIR /app
+
+# Copy the built files
+COPY --from=0 /build/cf-api/dist ./dist
+COPY --from=0 /build/cf-api/package.json ./package.json
+COPY --from=0 /build/cf-api/package-lock.json ./package-lock.json
+COPY --from=0 /build/cf-api/resend ./resend
+COPY --from=0 /build/cf-api/github ./github
+
+# Copy common package to parent directory (matching Azure structure)
+COPY --from=0 /build/common ../common
+
+# Copy already installed node_modules from build stage
+COPY --from=0 /build/cf-api/node_modules ./node_modules
+
+# Copy node_modules into dist to match Azure deployment structure
+# Use -L to follow symlinks and avoid broken nested structures
+RUN cp -rL node_modules dist/ 2>/dev/null || cp -r node_modules dist/
+
+EXPOSE 3001
+
+CMD ["npm", "start"]
diff --git a/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified
new file mode 100644
index 000000000..c553d57ec
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified
@@ -0,0 +1,93 @@
+# Stage 1: Build cf-api and common
+FROM node:20-alpine AS js-builder
+
+WORKDIR /build
+
+# Copy JavaScript projects
+COPY js/cf-api ./cf-api
+COPY js/common ./common
+
+# Build common package first
+WORKDIR /build/common
+RUN npm ci && npm run build
+
+# Build cf-api
+WORKDIR /build/cf-api
+RUN npm ci
+
+# Replace common package from registry with local build
+RUN rm -rf node_modules/@codeflash-ai/common && \
+    cp -r /build/common node_modules/@codeflash-ai/
+
+# Clean and build cf-api
+RUN rm -rf dist && npm run build
+
+# Stage 2: Build aiservice
+FROM python:3.12-slim AS python-builder
+
+WORKDIR /build
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y build-essential libpq-dev && rm -rf /var/lib/apt/lists/*
+
+# Install uv
+RUN pip install uv
+
+# Copy and build aiservice
+COPY django/aiservice ./aiservice
+WORKDIR /build/aiservice
+RUN uv sync
+
+# Stage 3: Final unified image
+FROM node:20-alpine
+
+# Install Python, PostgreSQL client, and supervisor
+RUN apk add --no-cache \
+    python3 \
+    py3-pip \
+    postgresql15 \
+    postgresql15-client \
+    supervisor \
+    bash \
+    && rm -rf /var/cache/apk/*
+
+# Install Python dependencies globally
+RUN pip3 install --break-system-packages uv
+
+WORKDIR /app
+
+# Copy cf-api from js-builder
+COPY --from=js-builder /build/cf-api/dist ./cf-api/dist
+COPY --from=js-builder /build/cf-api/package.json ./cf-api/package.json
+COPY --from=js-builder /build/cf-api/package-lock.json ./cf-api/package-lock.json
+COPY --from=js-builder /build/cf-api/node_modules ./cf-api/node_modules
+COPY --from=js-builder /build/cf-api/resend ./cf-api/resend
+COPY --from=js-builder /build/cf-api/github ./cf-api/github
+COPY --from=js-builder /build/common ./common
+
+# Copy node_modules into dist for ESM resolution
+RUN cd cf-api && cp -rL node_modules dist/ 2>/dev/null || cp -r node_modules dist/
+
+# Copy aiservice from python-builder
+COPY --from=python-builder /build/aiservice ./aiservice
+
+# Create PostgreSQL data directory
+RUN mkdir -p /var/lib/postgresql/data /var/run/postgresql && \
+    chown -R postgres:postgres /var/lib/postgresql /var/run/postgresql
+
+# Copy configuration files
+COPY deployment/onprem-simple/supervisord.conf /etc/supervisord.conf
+COPY deployment/onprem-simple/init-db.sh /app/init-db.sh
+COPY deployment/onprem-simple/startup.sh /app/startup.sh
+
+RUN chmod +x /app/init-db.sh /app/startup.sh
+
+# Expose ports
+EXPOSE 5432 8000 3001
+
+# Set environment variables
+ENV PGDATA=/var/lib/postgresql/data
+ENV PATH="/var/lib/postgresql/bin:${PATH}"
+
+# Start supervisor
+CMD ["/app/startup.sh"]
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified-simple b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified-simple
new file mode 100644
index 000000000..e44e7efa1
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.unified-simple
@@ -0,0 +1,46 @@
+# Unified Codeflash Container - Using Pre-built Images
+FROM node:20-alpine
+
+# Install Python, PostgreSQL, and supervisor
+RUN apk add --no-cache \
+    python3 \
+    py3-pip \
+    postgresql15 \
+    postgresql15-client \
+    supervisor \
+    bash \
+    openssl \
+    && rm -rf /var/cache/apk/*
+
+# Install Python uv
+RUN pip3 install --break-system-packages uv
+
+WORKDIR /app
+
+# Copy cf-api from pre-built image
+COPY --from=codeflash/cf-api:latest /app /app/cf-api
+COPY --from=codeflash/cf-api:latest /common /common
+
+# Copy aiservice from pre-built image
+COPY --from=codeflash/aiservice:latest /app /app/aiservice
+
+# Create PostgreSQL data directory with correct permissions
+RUN mkdir -p /var/lib/postgresql/data /var/run/postgresql && \
+    chown -R postgres:postgres /var/lib/postgresql /var/run/postgresql
+
+# Copy configuration files
+COPY deployment/onprem-simple/supervisord.conf /etc/supervisord.conf
+COPY deployment/onprem-simple/init-db.sh /app/init-db.sh
+COPY deployment/onprem-simple/startup.sh /app/startup.sh
+
+RUN chmod +x /app/init-db.sh /app/startup.sh
+
+# Expose ports
+EXPOSE 5432 8000 3001
+
+# Set environment variables
+ENV PGDATA=/var/lib/postgresql/data
+ENV PATH="/var/lib/postgresql/bin:${PATH}"
+
+# Start supervisor
+CMD ["/app/startup.sh"]
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.webapp b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.webapp
new file mode 100644
index 000000000..55fac66b7
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-dockerfiles/Dockerfile.webapp
@@ -0,0 +1,27 @@
+FROM node:20-alpine
+
+WORKDIR /build
+
+# Copy both cf-webapp and common
+COPY cf-webapp ./cf-webapp
+COPY common ./common
+
+# Install ALL dependencies for build
+WORKDIR /build/cf-webapp
+RUN npm ci
+
+# Build Next.js
+RUN npm run build
+
+# Production stage
+FROM node:20-alpine
+
+WORKDIR /app
+
+# Copy entire built application (simpler approach)
+COPY --from=0 /build/cf-webapp ./
+COPY --from=0 /build/common ../common
+
+EXPOSE 3000
+
+CMD ["npm", "start"]
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-scripts/REBUILD-AND-TEST.sh b/deployment/onprem-simple/archive/old-scripts/REBUILD-AND-TEST.sh
new file mode 100755
index 000000000..2d845fecc
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-scripts/REBUILD-AND-TEST.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+set -e
+
+echo "=========================================="
+echo "  Codeflash Complete Rebuild and Test"
+echo "=========================================="
+echo ""
+
+cd /Users/saga4/orgs/codeflash-internal
+
+echo "Step 1: Stopping and removing old container..."
+docker stop codeflash-unified 2>/dev/null || true
+docker rm codeflash-unified 2>/dev/null || true
+echo "✓ Old container removed"
+echo ""
+
+echo "Step 2: Rebuilding image..."
+echo "This will take 5-10 minutes..."
+docker build -f deployment/onprem-simple/Dockerfile.unified-selfcontained -t codeflash/unified:latest .
+echo "✓ Image rebuilt"
+echo ""
+
+echo "Step 3: Starting new container..."
+docker run -d --name codeflash-unified \
+  -e OPENAI_API_TYPE=azure \
+  -e AZURE_OPENAI_API_KEY=dabd9790e9a54558b4ceafdd74425904 \
+  -e ANTHROPIC_API_KEY=sk-ant-api03-E85T16Zy7bGRo1BxVdFUJG_JRMVdMaePuLUJMFO-EQHqI17z0lWMYRHaHKUU47XeNNwZNHl86h1p-Yoq5vVgzg \
+  -e SECRET_KEY=bla \
+  -e NODE_ENV=local \
+  -e GH_APP_ID=800528 \
+  -e GH_APP_USER_ID=148906541 \
+  -e GH_APP_WEBHOOK_SECRET=dev-webhook-secret-2pjGGmaNy2gyEY4o3aU \
+  -e STRIPE_SECRET_KEY=sk_test_51Pap5bRrNDfNWAM0DpQb8D8sCSYxG9aFzc9N5wXN8pVT0fXLQwrJgZEZq1aRoQ9VZgVK7pXKp5aWQZYW7vXKp00aZX5aWQ \
+  -p 5432:5432 \
+  -p 8000:8000 \
+  -p 3001:3001 \
+  -p 3000:3000 \
+  -v codeflash-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+echo "✓ Container started"
+echo ""
+
+echo "Step 4: Waiting for services to start (30 seconds)..."
+sleep 30
+
+echo "Step 5: Checking service status..."
+docker exec codeflash-unified supervisorctl status
+echo ""
+
+echo "Step 6: Getting API key..."
+API_KEY=$(docker exec codeflash-unified psql postgresql://codeflash:codeflash@localhost:5432/codeflash -t -c "SELECT 'cf-' || key FROM cf_api_keys LIMIT 1;" | tr -d ' ')
+echo "API Key: $API_KEY"
+
+echo "Step 7: Testing CLI..."
+export CODEFLASH_API_KEY=$API_KEY
+export CODEFLASH_AIS_SERVER=local
+export CODEFLASH_CFAPI_SERVER=local
+
+cd /Users/saga4/orgs/optimize-me
+codeflash --file src/math/computation.py --function gcd_recursive --no-pr -v
+
+echo ""
+echo "=========================================="
+echo "  Test Complete!"
+echo "=========================================="
diff --git a/deployment/onprem-simple/archive/old-scripts/build-images.sh b/deployment/onprem-simple/archive/old-scripts/build-images.sh
new file mode 100755
index 000000000..b868685c2
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-scripts/build-images.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Simple script to build all images
+
+set -e
+
+echo "Building Codeflash Docker images..."
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+# Check if Docker is running
+if ! docker info > /dev/null 2>&1; then
+    echo "ERROR: Docker is not running. Please start Docker Desktop and try again."
+    exit 1
+fi
+
+cd "$ROOT_DIR"
+
+# Build cf-api
+echo ""
+echo "===================="
+echo "Building cf-api..."
+echo "===================="
+cd js
+docker build -t codeflash/cf-api:latest -f ../deployment/onprem-simple/Dockerfile.cfapi cf-api/
+
+# Build cf-webapp
+echo ""
+echo "===================="
+echo "Building cf-webapp..."
+echo "===================="
+docker build -t codeflash/cf-webapp:latest -f ../deployment/onprem-simple/Dockerfile.webapp cf-webapp/
+
+# Build aiservice
+echo ""
+echo "===================="
+echo "Building aiservice..."
+echo "===================="
+cd ../django
+docker build -t codeflash/aiservice:latest -f ../deployment/onprem-simple/Dockerfile.aiservice aiservice/
+
+echo ""
+echo "✓ All images built successfully!"
+echo ""
+echo "Images created:"
+docker images | grep codeflash
+
+echo ""
+echo "Next steps:"
+echo "1. Test locally: cd $SCRIPT_DIR && docker-compose up -d"
+echo "2. Push to registry: docker push codeflash/cf-api:latest"
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-scripts/build.sh b/deployment/onprem-simple/archive/old-scripts/build.sh
new file mode 100644
index 000000000..e7d779b1e
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-scripts/build.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+
+echo "======================================"
+echo "Codeflash Unified Container Builder"
+echo "======================================"
+echo ""
+
+# Get the repository root (2 levels up from this script)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+cd "$REPO_ROOT"
+
+echo "Repository root: $REPO_ROOT"
+echo ""
+
+# Check if we should skip individual builds (for faster rebuilds)
+SKIP_SERVICE_BUILDS=${SKIP_SERVICE_BUILDS:-false}
+
+if [ "$SKIP_SERVICE_BUILDS" = "false" ]; then
+  echo "Step 1/3: Building aiservice image..."
+  echo "--------------------------------------"
+  docker build -f deployment/onprem-simple/Dockerfile.aiservice -t codeflash/aiservice:latest .
+  echo "✓ aiservice image built successfully"
+  echo ""
+
+  echo "Step 2/3: Building cf-api image..."
+  echo "--------------------------------------"
+  docker build -f deployment/onprem-simple/Dockerfile.cfapi -t codeflash/cf-api:latest .
+  echo "✓ cf-api image built successfully"
+  echo ""
+else
+  echo "Skipping individual service builds (SKIP_SERVICE_BUILDS=true)"
+  echo "Using existing codeflash/aiservice:latest and codeflash/cf-api:latest"
+  echo ""
+fi
+
+echo "Step 3/3: Building unified container..."
+echo "--------------------------------------"
+docker build -f deployment/onprem-simple/Dockerfile.unified-simple -t codeflash/unified:latest .
+echo "✓ unified container built successfully"
+echo ""
+
+echo "======================================"
+echo "✓ Build Complete!"
+echo "======================================"
+echo ""
+echo "Image: codeflash/unified:latest"
+echo ""
+echo "To run the container, use:"
+echo "  docker run -d --name codeflash \\"
+echo "    --env-file .env \\"
+echo "    -p 5432:5432 \\"
+echo "    -p 8000:8000 \\"
+echo "    -p 3001:3001 \\"
+echo "    -v codeflash-data:/var/lib/postgresql/data \\"
+echo "    codeflash/unified:latest"
+echo ""
+echo "Or see deployment/onprem-simple/README.md for detailed instructions."
+echo ""
\ No newline at end of file
diff --git a/deployment/onprem-simple/archive/old-scripts/run-fresh.sh b/deployment/onprem-simple/archive/old-scripts/run-fresh.sh
new file mode 100644
index 000000000..ae9f7005c
--- /dev/null
+++ b/deployment/onprem-simple/archive/old-scripts/run-fresh.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+set -e
+
+echo "=========================================="
+echo "  Codeflash Fresh Deployment"
+echo "=========================================="
+echo ""
+
+# Get repository root
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+cd "$REPO_ROOT"
+
+echo "Repository: $REPO_ROOT"
+echo ""
+
+# Step 1: Clean up
+echo "Step 1: Cleaning up existing containers..."
+echo "-------------------------------------------"
+docker stop codeflash-unified 2>/dev/null && echo "✓ Stopped existing container" || echo "→ No existing container to stop"
+docker rm codeflash-unified 2>/dev/null && echo "✓ Removed existing container" || echo "→ No existing container to remove"
+docker volume rm codeflash-data 2>/dev/null && echo "✓ Removed data volume" || echo "→ No existing data volume to remove"
+echo ""
+
+# Step 2: Build
+echo "Step 2: Building unified container..."
+echo "-------------------------------------------"
+echo "This will take 5-10 minutes on first build..."
+echo ""
+
+docker build -f deployment/onprem-simple/Dockerfile.unified-selfcontained \
+  -t codeflash/unified:latest .
+
+echo ""
+echo "✓ Build complete!"
+echo ""
+
+# Step 3: Run
+echo "Step 3: Starting container..."
+echo "-------------------------------------------"
+
+docker run -d --name codeflash-unified \
+  -e OPENAI_API_TYPE=azure \
+  -e AZURE_OPENAI_API_KEY=dabd9790e9a54558b4ceafdd74425904 \
+  -e ANTHROPIC_API_KEY=sk-ant-api03-E85T16Zy7bGRo1BxVdFUJG_JRMVdMaePuLUJMFO-EQHqI17z0lWMYRHaHKUU47XeNNwZNHl86h1p-Yoq5vVgzg \
+  -e SECRET_KEY=bla \
+  -e NODE_ENV=local \
+  -e GH_APP_ID=800528 \
+  -e GH_APP_USER_ID=148906541 \
+  -e GH_APP_WEBHOOK_SECRET=dev-webhook-secret-2pjGGmaNy2gyEY4o3aU \
+  -e STRIPE_SECRET_KEY=sk_test_51Pap5bRrNDfNWAM0DpQb8D8sCSYxG9aFzc9N5wXN8pVT0fXLQwrJgZEZq1aRoQ9VZgVK7pXKp5aWQZYW7vXKp00aZX5aWQ \
+  -p 5432:5432 \
+  -p 8000:8000 \
+  -p 3001:3001 \
+  -v codeflash-data:/var/lib/postgresql/data \
+  codeflash/unified:latest
+
+echo ""
+echo "✓ Container started!"
+echo ""
+
+# Step 4: Wait for services to start
+echo "Step 4: Waiting for services to start..."
+echo "-------------------------------------------"
+echo "This may take 15-30 seconds..."
+echo ""
+
+sleep 10
+
+# Wait for the API key file to be created (indicates initialization is complete)
+for i in {1..30}; do
+  if docker exec codeflash-unified test -f /app/API_KEY.txt 2>/dev/null; then
+    break
+  fi
+  echo "  Waiting for database initialization... ($i/30)"
+  sleep 2
+done
+
+echo ""
+
+# Step 5: Verify services
+echo "Step 5: Verifying services..."
+echo "-------------------------------------------"
+
+# Wait a bit more for services to fully start
+sleep 5
+
+docker exec codeflash-unified supervisorctl status
+
+echo ""
+
+# Step 6: Show API key
+echo "=========================================="
+echo "  ✓ DEPLOYMENT COMPLETE!"
+echo "=========================================="
+echo ""
+
+API_KEY=$(docker exec codeflash-unified cat /app/API_KEY.txt 2>/dev/null || echo "API key not yet generated")
+
+if [ "$API_KEY" != "API key not yet generated" ]; then
+  echo "Your API Key: $API_KEY"
+  echo ""
+  echo "To use the CLI, run these commands:"
+  echo ""
+  echo "  cd /Users/saga4/orgs/optimize-me"
+  echo "  export CODEFLASH_API_KEY=$API_KEY"
+  echo "  export CODEFLASH_AIS_SERVER=local"
+  echo "  export CODEFLASH_CFAPI_SERVER=local"
+  echo "  codeflash --file src/math/computation.py --function gcd_recursive --no-pr -v"
+  echo ""
+else
+  echo "Services are still initializing. View logs with:"
+  echo "  docker logs -f codeflash-unified"
+  echo ""
+  echo "Once you see 'CODEFLASH SETUP COMPLETE!', retrieve your API key with:"
+  echo "  docker exec codeflash-unified cat /app/API_KEY.txt"
+  echo ""
+fi
+
+echo "=========================================="
+echo ""
+echo "Useful commands:"
+echo "  View logs:      docker logs -f codeflash-unified"
+echo "  Restart:        docker restart codeflash-unified"
+echo "  Stop:           docker stop codeflash-unified"
+echo "  Check services: docker exec codeflash-unified supervisorctl status"
+echo ""
\ No newline at end of file
diff --git a/deployment/onprem-simple/init-db.sh b/deployment/onprem-simple/init-db.sh
new file mode 100644
index 000000000..ca4c0a0f8
--- /dev/null
+++ b/deployment/onprem-simple/init-db.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+set -e
+
+echo "==================================="
+echo "Initializing Codeflash Database"
+echo "==================================="
+
+# Wait for PostgreSQL to be ready
+echo "Waiting for PostgreSQL to start..."
+timeout=60
+counter=0
+until pg_isready -h localhost -p 5432 -U codeflash 2>/dev/null; do
+  counter=$((counter + 1))
+  if [ $counter -gt $timeout ]; then
+    echo "ERROR: PostgreSQL did not start within ${timeout} seconds"
+    exit 1
+  fi
+  sleep 1
+done
+
+echo "PostgreSQL is ready!"
+
+# Run Prisma migrations
+echo ""
+echo "Running Prisma migrations..."
+cd /common
+export DATABASE_URL="${DATABASE_URL:-postgresql://codeflash:codeflash@localhost:5432/codeflash}"
+npx prisma migrate deploy
+
+echo ""
+echo "Checking for existing users..."
+USER_COUNT=$(psql "$DATABASE_URL" -t -c "SELECT COUNT(*) FROM users;" 2>/dev/null || echo "0")
+USER_COUNT=$(echo $USER_COUNT | tr -d ' ')
+
+if [ "$USER_COUNT" = "0" ]; then
+  echo ""
+  echo "No users found. Creating default user and API key..."
+
+  # Hardcoded API key for on-premise deployment
+  # Unhashed key (what users will use): cf-LDKLmsqjcZeX6SvjFPTz66NWgTV25njdWNUxinokmJcfegwRWytqFoJBoCkAKQad
+  # This is the SHA-384 hash stored in the database:
+  API_KEY_UNHASHED="LDKLmsqjcZeX6SvjFPTz66NWgTV25njdWNUxinokmJcfegwRWytqFoJBoCkAKQad"
+  API_KEY_HASHED="uXkhQcmQVmZbOpMtPMUTfeLRZOD3-s6GzYV1IpLMtJeHX4I9P8Ej_Kx3RftkP9yw"
+  SUFFIX="KQad"
+
+  # Use environment variables or defaults for user information
+  DEFAULT_USER_ID="${DEFAULT_USER_ID:-github|10488227}"
+  DEFAULT_USERNAME="${DEFAULT_USERNAME:-Saga4}"
+  DEFAULT_EMAIL="${DEFAULT_EMAIL:-sarthak.agarwal.cse12@iitbhu.ac.in}"
+  DEFAULT_NAME="${DEFAULT_NAME:-$DEFAULT_USERNAME}"
+
+  echo "Creating user: $DEFAULT_USERNAME (ID: $DEFAULT_USER_ID)"
+
+  # Insert user
+  psql "$DATABASE_URL" <<-EOSQL
+    INSERT INTO users (user_id, github_username, email, name, onboarding_completed, created_at)
+    VALUES ('$DEFAULT_USER_ID', '$DEFAULT_USERNAME', '$DEFAULT_EMAIL', '$DEFAULT_NAME', true, NOW())
+    ON CONFLICT (user_id) DO NOTHING;
+EOSQL
+
+  # Insert API key (HASHED version in database)
+  psql "$DATABASE_URL" <<-EOSQL
+    INSERT INTO cf_api_keys (key, suffix, name, user_id, tier, created_at)
+    VALUES ('$API_KEY_HASHED', '$SUFFIX', 'Default API Key', '$DEFAULT_USER_ID', 'free', NOW());
+EOSQL
+
+  # Insert subscription with unlimited usage for on-premise deployments
+  echo "Creating unlimited subscription for on-premise deployment..."
+  psql "$DATABASE_URL" <<-EOSQL
+    INSERT INTO subscriptions (
+      id,
+      user_id,
+      stripe_customer_id,
+      stripe_subscription_id,
+      plan_type,
+      optimizations_used,
+      optimizations_limit,
+      subscription_status,
+      created_at,
+      updated_at
+    )
+    VALUES (
+      gen_random_uuid(),
+      '$DEFAULT_USER_ID',
+      NULL,
+      NULL,
+      'enterprise',
+      0,
+      999999999,
+      'active',
+      NOW(),
+      NOW()
+    )
+    ON CONFLICT (user_id) DO NOTHING;
+EOSQL
+
+  # Format API key with cf- prefix for display (UNHASHED version for users)
+  DISPLAY_API_KEY="cf-${API_KEY_UNHASHED}"
+
+  echo ""
+  echo "======================================"
+  echo "  CODEFLASH SETUP COMPLETE!"
+  echo "======================================"
+  echo ""
+  echo "User: $DEFAULT_USERNAME"
+  echo "Email: $DEFAULT_EMAIL"
+  echo ""
+  echo "Your API Key: $DISPLAY_API_KEY"
+  echo ""
+  echo "Save this API key! You'll need it to configure the Codeflash CLI."
+  echo ""
+  echo "To use the CLI, set these environment variables:"
+  echo "  export CODEFLASH_API_KEY=$DISPLAY_API_KEY"
+  echo "  export CODEFLASH_AIS_SERVER=local"
+  echo "  export CODEFLASH_CFAPI_SERVER=local"
+  echo ""
+  echo "Or create a .env file in your project:"
+  echo "  CODEFLASH_API_KEY=$DISPLAY_API_KEY"
+  echo "  CODEFLASH_AIS_SERVER=local"
+  echo "  CODEFLASH_CFAPI_SERVER=local"
+  echo ""
+  echo "======================================"
+  echo ""
+
+  # Save API key to file for easy retrieval
+  echo "$DISPLAY_API_KEY" > /app/API_KEY.txt
+  echo "API key also saved to: /app/API_KEY.txt"
+  echo "(Retrieve anytime with: docker exec <container> cat /app/API_KEY.txt)"
+  echo ""
+else
+  echo "Found $USER_COUNT existing user(s). Skipping user creation."
+  echo ""
+  echo "To retrieve an existing API key, run:"
+  echo "  docker exec <container-name> psql \$DATABASE_URL -c \"SELECT key FROM cf_api_keys LIMIT 1;\""
+  echo ""
+fi
+
+echo "Database initialization complete!"
\ No newline at end of file
diff --git a/deployment/onprem-simple/startup.sh b/deployment/onprem-simple/startup.sh
new file mode 100644
index 000000000..8bf06ff6a
--- /dev/null
+++ b/deployment/onprem-simple/startup.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+set -e
+
+echo "==============================="
+echo "Starting Codeflash Services"
+echo "==============================="
+
+# Initialize PostgreSQL if needed
+if [ ! -f "/var/lib/postgresql/data/PG_VERSION" ]; then
+  echo "Initializing PostgreSQL database..."
+  su - postgres -c "initdb -D /var/lib/postgresql/data"
+
+  # Configure PostgreSQL
+  echo "Configuring PostgreSQL..."
+  cat >> /var/lib/postgresql/data/postgresql.conf <<EOF
+listen_addresses = '*'
+port = 5432
+max_connections = 100
+shared_buffers = 128MB
+EOF
+
+  cat >> /var/lib/postgresql/data/pg_hba.conf <<EOF
+host    all             all             0.0.0.0/0               md5
+host    all             all             ::0/0                   md5
+EOF
+fi
+
+# Start PostgreSQL temporarily to ensure user and database exist
+echo "Starting PostgreSQL for setup..."
+su - postgres -c "pg_ctl -D /var/lib/postgresql/data start -o '-c logging_collector=off'"
+
+# Wait for PostgreSQL to be ready
+echo "Waiting for PostgreSQL to be ready..."
+for i in {1..30}; do
+  if su - postgres -c "pg_isready" > /dev/null 2>&1; then
+    echo "PostgreSQL is ready!"
+    break
+  fi
+  sleep 1
+done
+
+# Check if codeflash user exists, create if not
+USER_EXISTS=$(su - postgres -c "psql -tAc \"SELECT 1 FROM pg_roles WHERE rolname='codeflash'\"" 2>/dev/null || echo "0")
+if [ "$USER_EXISTS" != "1" ]; then
+  echo "Creating database user and database..."
+  su - postgres -c "psql -c \"CREATE USER codeflash WITH PASSWORD 'codeflash';\""
+  su - postgres -c "psql -c \"CREATE DATABASE codeflash OWNER codeflash;\""
+  su - postgres -c "psql -c \"GRANT ALL PRIVILEGES ON DATABASE codeflash TO codeflash;\""
+  echo "User and database created successfully!"
+else
+  echo "Database user already exists, skipping creation."
+fi
+
+# Stop PostgreSQL so supervisord can start it properly
+echo "Stopping temporary PostgreSQL instance..."
+su - postgres -c "pg_ctl -D /var/lib/postgresql/data stop -w"
+sleep 2
+
+# Set default DATABASE_URL if not provided
+export DATABASE_URL="${DATABASE_URL:-postgresql://codeflash:codeflash@localhost:5432/codeflash}"
+
+# Auto-generate SECRET_KEY if not provided
+if [ -z "$SECRET_KEY" ]; then
+  echo "⚠️  SECRET_KEY not provided, generating random key..."
+  export SECRET_KEY=$(openssl rand -hex 32)
+  echo "✓ Generated SECRET_KEY"
+fi
+
+# Set default URLs if not provided
+export NEXT_PUBLIC_APP_URL="${NEXT_PUBLIC_APP_URL:-http://localhost:3000}"
+export WEBAPP_URL="${WEBAPP_URL:-http://localhost:3000}"
+export CODEFLASH_CFAPI_URL="${CODEFLASH_CFAPI_URL:-http://localhost:3001}"
+
+echo ""
+echo "Starting services with supervisord..."
+echo ""
+
+# Start supervisord in background
+/usr/bin/supervisord -c /etc/supervisord.conf &
+SUPERVISOR_PID=$!
+
+# Wait for PostgreSQL to be ready under supervisord
+echo "Waiting for PostgreSQL to start under supervisord..."
+sleep 10
+
+# Run database initialization (migrations and API key creation)
+/app/init-db.sh
+
+# Keep supervisord running
+wait $SUPERVISOR_PID
\ No newline at end of file
diff --git a/deployment/onprem-simple/supervisord.conf b/deployment/onprem-simple/supervisord.conf
new file mode 100644
index 000000000..356f8f70e
--- /dev/null
+++ b/deployment/onprem-simple/supervisord.conf
@@ -0,0 +1,62 @@
+[supervisord]
+nodaemon=true
+user=root
+logfile=/var/log/supervisord.log
+pidfile=/var/run/supervisord.pid
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+[program:postgres]
+command=/usr/libexec/postgresql15/postgres -D /var/lib/postgresql/data
+user=postgres
+autostart=true
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=1
+
+[program:aiservice]
+command=/bin/sh -c "cd /app/aiservice && uv run gunicorn -c gunicorn.conf.py aiservice.asgi:application --bind 0.0.0.0:8000 --timeout 600 --workers 2"
+directory=/app/aiservice
+autostart=true
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=10
+environment=PYTHONUNBUFFERED="1",DATABASE_URL="postgresql://codeflash:codeflash@localhost:5432/codeflash"
+
+[program:cf-api]
+command=/bin/sh -c "cd /app/cf-api && npm start"
+directory=/app/cf-api
+autostart=true
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=10
+environment=NODE_ENV="local",DATABASE_URL="postgresql://codeflash:codeflash@localhost:5432/codeflash"
+
+[program:cf-webapp]
+command=/bin/sh -c "cd /app/cf-webapp && npm start"
+directory=/app/cf-webapp
+autostart=true
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=10
+environment=DATABASE_URL="postgresql://codeflash:codeflash@localhost:5432/codeflash",NODE_ENV="production"
\ No newline at end of file
diff --git a/django/.dockerignore b/django/.dockerignore
new file mode 100644
index 000000000..5833a4234
--- /dev/null
+++ b/django/.dockerignore
@@ -0,0 +1,81 @@
+# Python virtual environments
+.venv/
+venv/
+env/
+ENV/
+aiservice/.venv/
+aiservice/venv/
+aiservice/__pycache__/
+
+# Python cache files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Testing
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Git
+.git/
+.gitignore
+
+# Logs
+*.log
+logs/
+
+# Database
+*.db
+*.sqlite3
+
+# Environment files
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+# Temporary files
+tmp/
+temp/
+.tmp/
diff --git a/django/aiservice/aiservice/env_specific.py b/django/aiservice/aiservice/env_specific.py
index cf943e053..3cc30b963 100644
--- a/django/aiservice/aiservice/env_specific.py
+++ b/django/aiservice/aiservice/env_specific.py
@@ -3,15 +3,15 @@ from __future__ import annotations
 import logging
 import os
 import sys
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
 
 from dotenv import load_dotenv
 from openai import AsyncOpenAI
-from openai.lib.azure import AsyncAzureOpenAI
 
 if TYPE_CHECKING:
     from collections.abc import Callable
 
+
 IS_PRODUCTION = os.environ.get("ENVIRONMENT", default="") == "PRODUCTION"
 
 LOGGING_FORMAT = "[%(levelname)s] %(message)s"
@@ -41,31 +41,34 @@ def debug_log_sensitive_data_from_callable(message: Callable[[], str | None]) ->
         logging.debug(message())
 
 
-def create_openai_client_instance(
-    client_type: str = os.environ.get("OPENAI_API_TYPE", default="azure"),
-) -> AsyncOpenAI | AsyncAzureOpenAI:
-    if client_type == "azure":
-        logging.info("OpenAIClient: Using Azure OpenAI service.")
-        openai_client: AsyncOpenAI | AsyncAzureOpenAI = AsyncAzureOpenAI(
-            # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-            api_version="2024-08-01-preview",
-            # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-            azure_endpoint="https://codeflash-openai-service-eastus2-0.openai.azure.com",
-
-        )
-    else:
-        logging.info("OpenAIClient: Using OpenAI API.")
-        openai_client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-    return openai_client
-
-
-def create_claude_client() -> AsyncOpenAI:
-    logging.info("Claude Client")
-    claude_client: AsyncOpenAI = AsyncOpenAI(
-        api_key=os.environ.get("ANTHROPIC_API_KEY"),
-        base_url="https://api.anthropic.com/v1/",
+def create_llm_client(model_type: Literal["openai", "anthropic", "google"]) -> AsyncOpenAI | None:
+    # use azure or openai
+    openai_api_type = os.environ.get("OPENAI_API_TYPE")
+    openai_api_base_url = os.environ.get(
+        "OPENAI_API_BASE"
+    )  # for us it is https://codeflash-openai-service-eastus2-0.openai.azure.com/openai/v1/
+    # we need both of the above to run on azure
+    azure_api_key, openai_key, anthropic_key, google_key = (
+        os.environ.get("AZURE_OPENAI_API_KEY"),
+        os.environ.get("OPENAI_API_KEY"),
+        os.environ.get("ANTHROPIC_API_KEY"),
+        os.environ.get("GEMINI_API_KEY"),
     )
-    return claude_client
+    if model_type == "openai" and azure_api_key and openai_api_type == "azure" and openai_api_base_url:
+        # check for azure first
+        return AsyncOpenAI(api_key=azure_api_key, base_url=openai_api_base_url)
+    if model_type == "openai" and openai_key:
+        return AsyncOpenAI(api_key=openai_key)  # baseurl not needed for regular openai
+    if model_type == "anthropic" and anthropic_key:
+        return AsyncOpenAI(api_key=anthropic_key, base_url="https://api.anthropic.com/v1/")
+    # # for future use : gemini supported only via GEMINI_API_KEY at the moment, todo for vertex ai
+    if model_type == "google" and google_key:
+        return AsyncOpenAI(api_key=google_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
+    return None
 
 
-openai_client = create_openai_client_instance()
+llm_clients = {
+    "openai": create_llm_client("openai"),
+    "anthropic": create_llm_client("anthropic"),
+    # "google": create_llm_client("google"), # no need to instantiate right now as we're not using it
+}
diff --git a/django/aiservice/aiservice/models/aimodels.py b/django/aiservice/aiservice/models/aimodels.py
index 31b60fd1f..c35fb37cf 100644
--- a/django/aiservice/aiservice/models/aimodels.py
+++ b/django/aiservice/aiservice/models/aimodels.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any
+from typing import Any, Literal
 
 from pydantic.dataclasses import dataclass
 
@@ -14,7 +14,7 @@ from pydantic.dataclasses import dataclass
 class LLM:
     name: str  # On Azure OpenAI Service, this is the deployment name
     max_tokens: int
-    api_version: str = ""
+    model_type: Literal["openai", "anthropic", "google"]
     # Add new pricing attributes in USD per 1M tokens
     input_cost: float | None = None
     output_cost: float | None = None
@@ -24,6 +24,7 @@ class LLM:
 @dataclass
 class GPT_4_OMNI(LLM):
     name: str = "gpt-4o-2" if os.environ.get("OPENAI_API_TYPE") == "azure" else "gpt-4o"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 128000
     input_cost: float = 2.50
     output_cost: float = 10.00
@@ -32,6 +33,7 @@ class GPT_4_OMNI(LLM):
 @dataclass
 class GPT_4_128k(LLM):
     name: str = "gpt-4-1106-preview"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 128000
     input_cost: float = 10.00
     output_cost: float = 30.00
@@ -40,6 +42,7 @@ class GPT_4_128k(LLM):
 @dataclass
 class GPT_4_32k(LLM):
     name: str = "gpt4-32k"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 32768
     input_cost: float = 60.00
     output_cost: float = 120.00
@@ -48,6 +51,7 @@ class GPT_4_32k(LLM):
 @dataclass
 class GPT_4(LLM):
     name: str = "gpt-4-0613"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 8192
     input_cost: float = 30.00
     output_cost: float = 60.00
@@ -56,6 +60,7 @@ class GPT_4(LLM):
 @dataclass
 class GPT_3_5_Turbo_16k(LLM):
     name: str = "gpt-3.5-turbo-16k"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 16384
     input_cost: float = 3.00
     output_cost: float = 4.00
@@ -64,6 +69,7 @@ class GPT_3_5_Turbo_16k(LLM):
 @dataclass
 class GPT_3_5_Turbo(LLM):
     name: str = "gpt-3.5-turbo"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 4096
     input_cost: float = 0.50
     output_cost: float = 1.50
@@ -72,6 +78,7 @@ class GPT_3_5_Turbo(LLM):
 @dataclass
 class Antropic_Claude_3_7(LLM):
     name: str = "claude-3-7-sonnet-20250219"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 100000
     input_cost: float = 3.00
     output_cost: float = 15.00
@@ -80,8 +87,8 @@ class Antropic_Claude_3_7(LLM):
 @dataclass
 class Anthropic_Claude_4(LLM):
     name: str = "claude-sonnet-4-20250514"
+    model_type: Literal["openai", "anthropic", "google"] = "anthropic"
     max_tokens: int = 100000
-    api_version: str = ""
     input_cost: float = 3.00
     output_cost: float = 15.00
 
@@ -90,8 +97,8 @@ class Anthropic_Claude_4(LLM):
 class OpenAI_GPT_4_1(LLM):
     # name: str = "azure/gpt-4.1"
     name: str = "gpt-4.1"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 100000
-    api_version: str = "2024-12-01-preview"
     input_cost: float = 2.00
     output_cost: float = 8.00
 
@@ -99,14 +106,15 @@ class OpenAI_GPT_4_1(LLM):
 @dataclass
 class Gemini_2_5(LLM):
     name: str = "gemini/gemini-2.5-pro-preview-03-25"
+    model_type: Literal["openai", "anthropic", "google"] = "google"
     max_tokens: int = 100000
 
 
 @dataclass
 class OpenAI_GPT_O_3(LLM):
     name: str = "azure/o3"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 100000
-    api_version = "2025-01-01-preview"
     input_cost: float = 2.00
     output_cost: float = 8.00
 
@@ -114,15 +122,16 @@ class OpenAI_GPT_O_3(LLM):
 @dataclass
 class OpenAI_GPT_O_4_MINI(LLM):
     name: str = "azure/o4-mini"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 100000
-    api_version = "2024-12-01-preview"
     input_cost: float = 1.10
     output_cost: float = 4.40
 
 
 @dataclass
-class GPT_5(LLM):
-    name: str = "gpt-5"
+class GPT_5(LLM):  # IT IS TOO SLOW AT THE MOMENT, just here for documentation
+    name: str = "gpt-5-codex"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 100000
     input_cost: float = 1.25
     output_cost: float = 10.00
@@ -131,6 +140,7 @@ class GPT_5(LLM):
 @dataclass
 class GPT_4_1_Nano(LLM):
     name: str = "gpt-4.1-nano"
+    model_type: Literal["openai", "anthropic", "google"] = "openai"
     max_tokens: int = 100000
     input_cost: float = 0.10
     output_cost: float = 0.40
@@ -148,8 +158,12 @@ def calculate_llm_cost(response: Any, llm: LLM) -> float | None:
     """
     try:
         usage = response.usage
-        prompt_tokens = usage.prompt_tokens
-        completion_tokens = usage.completion_tokens
+        if hasattr(usage, "prompt_tokens"):  # for openai
+            prompt_tokens = usage.prompt_tokens
+            completion_tokens = usage.completion_tokens
+        else:  # for claude
+            prompt_tokens = usage.input_tokens
+            completion_tokens = usage.output_tokens
 
         prompt_cost = (prompt_tokens / 1_000_000) * llm.input_cost
         completion_cost = (completion_tokens / 1_000_000) * llm.output_cost
@@ -163,11 +177,45 @@ def calculate_llm_cost(response: Any, llm: LLM) -> float | None:
         return None
 
 
-EXPLAIN_MODEL: LLM = OpenAI_GPT_4_1()
-PLAN_MODEL: LLM = OpenAI_GPT_4_1()
-EXECUTE_MODEL: LLM = OpenAI_GPT_4_1()
-OPTIMIZE_MODEL: LLM = OpenAI_GPT_4_1()
-REFINEMENT_MODEL: LLM = Anthropic_Claude_4()
-EXPLANATIONS_MODEL: LLM = Anthropic_Claude_4()
-RANKING_MODEL: LLM = OpenAI_GPT_4_1()
-OPTIMIZATION_REVIEW_MODEL: LLM = Anthropic_Claude_4()
+def _get_openai_model() -> LLM:
+    """Return OpenAI GPT-4.1 if available, otherwise falls back to Anthropic Claude 4.
+
+    Returns:
+        LLM: The appropriate model instance based on available API keys.
+
+    """
+    if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
+        return OpenAI_GPT_4_1()
+    # Fall back to Anthropic if OpenAI not available
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        return Anthropic_Claude_4()
+    # Default to OpenAI (will fail gracefully with clear error from env_specific.py)
+    return OpenAI_GPT_4_1()
+
+
+def _get_anthropic_model() -> LLM:
+    """Returns Anthropic Claude 4 if available, otherwise falls back to OpenAI GPT-4.1.
+
+    Returns:
+        LLM: The appropriate model instance based on available API keys.
+
+    """  # noqa: D401
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        return Anthropic_Claude_4()
+    # Fall back to OpenAI if Anthropic not available
+    if os.environ.get("AZURE_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY"):
+        return OpenAI_GPT_4_1()
+    # Default to Claude (will fail gracefully with clear error from env_specific.py)
+    return Anthropic_Claude_4()
+
+
+# Dynamically select models based on available API keys
+EXPLAIN_MODEL: LLM = _get_openai_model()
+PLAN_MODEL: LLM = _get_openai_model()
+EXECUTE_MODEL: LLM = _get_openai_model()
+OPTIMIZE_MODEL: LLM = _get_openai_model()
+RANKING_MODEL: LLM = _get_openai_model()
+
+REFINEMENT_MODEL: LLM = _get_anthropic_model()
+EXPLANATIONS_MODEL: LLM = _get_anthropic_model()
+OPTIMIZATION_REVIEW_MODEL: LLM = _get_anthropic_model()
diff --git a/django/aiservice/explanations/explanations.py b/django/aiservice/explanations/explanations.py
index aa69d998b..94451b1e7 100644
--- a/django/aiservice/explanations/explanations.py
+++ b/django/aiservice/explanations/explanations.py
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
 import sentry_sdk
 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import create_claude_client, debug_log_sensitive_data
+from aiservice.env_specific import create_llm_client, debug_log_sensitive_data, llm_clients
 from aiservice.models.aimodels import EXPLANATIONS_MODEL, LLM, calculate_llm_cost
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@@ -213,16 +213,16 @@ async def explain_optimizations(  # noqa: D417
         | ChatCompletionToolMessageParam
         | ChatCompletionFunctionMessageParam
     ] = [system_message, user_message]
-    async with create_claude_client() as claude_client:
-        try:
-            output = await claude_client.with_options(max_retries=2).chat.completions.create(
-                model=explanations_model.name, messages=messages, n=1
-            )
-            await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, explanations_model))
-        except OpenAIError as e:
-            sentry_sdk.capture_exception(e)
-            debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
-            return ExplanationsErrorResponseSchema(error=str(e))
+    llm_client = llm_clients[explanations_model.model_type]
+    try:
+        output = await llm_client.with_options(max_retries=2).chat.completions.create(
+            model=explanations_model.name, messages=messages, n=1
+        )
+        await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, explanations_model))
+    except OpenAIError as e:
+        sentry_sdk.capture_exception(e)
+        debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
+        return ExplanationsErrorResponseSchema(error=str(e))
     debug_log_sensitive_data(f"AIClient optimization response:\n{output}")
     if output.usage is not None:
         ph(
diff --git a/django/aiservice/optimization_review/optimization_review.py b/django/aiservice/optimization_review/optimization_review.py
index 696893a32..9e8261d62 100644
--- a/django/aiservice/optimization_review/optimization_review.py
+++ b/django/aiservice/optimization_review/optimization_review.py
@@ -7,7 +7,7 @@ from enum import Enum
 from typing import TYPE_CHECKING, cast
 
 import sentry_sdk
-from aiservice.env_specific import create_claude_client, debug_log_sensitive_data
+from aiservice.env_specific import create_llm_client, debug_log_sensitive_data, llm_clients
 from aiservice.models.aimodels import OPTIMIZATION_REVIEW_MODEL, calculate_llm_cost
 from log_features.log_event import update_optimization_cost, update_optimization_features_review
 from ninja import NinjaAPI, Schema
@@ -142,7 +142,9 @@ Output as a json markdown block with the key named as 'rating' and value being o
 
 
 async def get_optimization_review(
-    request, data: OptimizationReviewSchema, optimization_review_model: LLM = OPTIMIZATION_REVIEW_MODEL
+    request,
+    data: OptimizationReviewSchema,
+    optimization_review_model: LLM = OPTIMIZATION_REVIEW_MODEL,  # noqa: ANN001
 ) -> tuple[int, OptimizationReviewResponseSchema | OptimizationReviewErrorSchema]:
     """Compute optimization review via Claude."""
     ph(request.user, "aiservice-optimization-review-called")
@@ -151,11 +153,11 @@ async def get_optimization_review(
 
         debug_log_sensitive_data(f"{messages[0]}{messages[1]}")
 
-        async with create_claude_client() as claude_client:
-            # Call Claude API with retries
-            response = await claude_client.with_options(max_retries=2).chat.completions.create(
-                model=optimization_review_model.name, messages=messages
-            )
+        llm_client = llm_clients[optimization_review_model.model_type]
+        # Call Claude API with retries
+        response = await llm_client.with_options(max_retries=2).chat.completions.create(
+            model=optimization_review_model.name, messages=messages
+        )
         # Calculate and update cost
         cost = calculate_llm_cost(response, optimization_review_model)
         if cost:
@@ -197,7 +199,8 @@ async def get_optimization_review(
     },
 )
 async def optimization_review(
-    request, data: OptimizationReviewSchema
+    request,
+    data: OptimizationReviewSchema,  # noqa: ANN001
 ) -> tuple[int, OptimizationReviewResponseSchema | OptimizationReviewErrorSchema]:
     response_code, output = await get_optimization_review(request, data)
     try:
diff --git a/django/aiservice/optimizer/optimizer.py b/django/aiservice/optimizer/optimizer.py
index ad9533272..599206af0 100644
--- a/django/aiservice/optimizer/optimizer.py
+++ b/django/aiservice/optimizer/optimizer.py
@@ -8,18 +8,18 @@ from typing import TYPE_CHECKING
 
 import libcst as cst
 import sentry_sdk
+from aiservice.analytics.posthog import ph
+from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
+from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
+from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
+from authapp.user import get_user_by_id
+from log_features.log_event import log_optimization_event
+from log_features.log_features import log_features
 from ninja import NinjaAPI
 from ninja.errors import HttpError
 from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
 from pydantic import ValidationError
 
-from aiservice.analytics.posthog import ph
-from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
-from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, openai_client
-from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
-from authapp.user import get_user_by_id
-from log_features.log_event import log_optimization_event
-from log_features.log_features import log_features
 from optimizer.context_utils.context_helpers import group_code
 from optimizer.context_utils.optimizer_context import (
     BaseOptimizerContext,
@@ -30,6 +30,7 @@ from optimizer.context_utils.optimizer_context import (
 from optimizer.models import OptimizeSchema  # noqa: TC001
 
 if TYPE_CHECKING:
+    from aiservice.models.aimodels import LLM
     from django.http import HttpRequest
     from openai.types.chat import (
         ChatCompletionAssistantMessageParam,
@@ -37,8 +38,6 @@ if TYPE_CHECKING:
         ChatCompletionToolMessageParam,
     )
 
-    from aiservice.models.aimodels import LLM
-
 
 optimizations_json = [
     {
@@ -138,8 +137,9 @@ async def optimize_python_code(
         | ChatCompletionToolMessageParam
         | ChatCompletionFunctionMessageParam
     ] = [system_message, user_message]
+    llm_client = llm_clients[optimize_model.model_type]
     try:
-        output = await openai_client.with_options(max_retries=3).chat.completions.create(
+        output = await llm_client.with_options(max_retries=3).chat.completions.create(
             model=optimize_model.name, messages=messages, n=n
         )
     except Exception as e:
diff --git a/django/aiservice/optimizer/optimizer_line_profiler.py b/django/aiservice/optimizer/optimizer_line_profiler.py
index a58bc2745..0df6b4994 100644
--- a/django/aiservice/optimizer/optimizer_line_profiler.py
+++ b/django/aiservice/optimizer/optimizer_line_profiler.py
@@ -5,34 +5,30 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 
 import sentry_sdk
-from ninja import NinjaAPI, Schema
-from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
-
 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import parse_python_version, validate_trace_id
-from aiservice.env_specific import (
-    debug_log_sensitive_data,
-    debug_log_sensitive_data_from_callable,
-    openai_client,
-)
+from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
 from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
+from ninja import NinjaAPI, Schema
+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+
 from optimizer.context_utils.optimizer_context import (
     BaseOptimizerContext,
     OptimizeErrorResponseSchema,
-    OptimizeResponseItemSchema,
     OptimizeResponseSchema,
 )
 
 if TYPE_CHECKING:
+    from aiservice.models.aimodels import LLM
     from openai.types.chat import (
         ChatCompletionAssistantMessageParam,
         ChatCompletionFunctionMessageParam,
         ChatCompletionToolMessageParam,
     )
 
-    from aiservice.models.aimodels import LLM
+    from optimizer.context_utils.optimizer_context import OptimizeResponseItemSchema
 
 
 optimize_line_profiler_api = NinjaAPI(urls_namespace="optimize-line-profiler")
@@ -44,7 +40,7 @@ SYSTEM_PROMPT = (current_dir / "system_prompt.md").read_text()
 USER_PROMPT = (current_dir / "user_prompt.md").read_text()
 
 
-async def optimize_python_code_line_profiler(
+async def optimize_python_code_line_profiler(  # noqa: D417
     user_id: str,
     trace_id: str,
     line_profiler_results: str,
@@ -52,7 +48,7 @@ async def optimize_python_code_line_profiler(
     dependency_code: str | None = None,
     n: int = 1,
     optimize_model: LLM = OPTIMIZE_MODEL,
-    lsp_mode: bool = False,
+    lsp_mode: bool = False,  # noqa: FBT001, FBT002
     python_version: tuple[int, int, int] = (3, 12, 9),
 ) -> list[OptimizeResponseItemSchema]:
     """Optimize the given python code for performance using OpenAI's GPT-4o model.
@@ -92,8 +88,9 @@ async def optimize_python_code_line_profiler(
     ] = [system_message, user_message]
     debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
     # TODO: Verify if the context window length is within the model capability
+    llm_client = llm_clients[optimize_model.model_type]
     try:
-        output = await openai_client.with_options(max_retries=3).chat.completions.create(
+        output = await llm_client.with_options(max_retries=3).chat.completions.create(
             model=optimize_model.name, messages=messages, n=n
         )
         await update_optimization_cost(trace_id=trace_id, cost=calculate_llm_cost(output, optimize_model))
@@ -140,12 +137,12 @@ class OptimizeSchemaLP(Schema):
 @optimize_line_profiler_api.post(
     "/", response={200: OptimizeResponseSchema, 400: OptimizeErrorResponseSchema, 500: OptimizeErrorResponseSchema}
 )
-async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeResponseSchema | OptimizeErrorResponseSchema]:
+async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeResponseSchema | OptimizeErrorResponseSchema]:  # noqa: ANN001
     ph(request.user, "aiservice-optimize-called")
     ctx: BaseOptimizerContext = BaseOptimizerContext.get_dynamic_context(SYSTEM_PROMPT, USER_PROMPT, data.source_code)
     try:
         python_version: tuple[int, int, int] = parse_python_version(data.python_version)
-    except:
+    except:  # noqa: E722
         return 400, OptimizeErrorResponseSchema(
             error="Invalid Python version, it should look like 3.x.x. We only support Python 3.9 and above."
         )
@@ -197,7 +194,7 @@ async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeRespon
 
     response = OptimizeResponseSchema(optimizations=optimization_response_items)
 
-    def log_response():
+    def log_response() -> None:
         debug_log_sensitive_data(f"Response:\n{response.json()}")
         for opt in response.optimizations:
             debug_log_sensitive_data(f"Optimized source:\n{opt.source_code}")
diff --git a/django/aiservice/optimizer/refinement.py b/django/aiservice/optimizer/refinement.py
index 6969cb479..769ad9814 100644
--- a/django/aiservice/optimizer/refinement.py
+++ b/django/aiservice/optimizer/refinement.py
@@ -9,7 +9,7 @@ import libcst as cst
 import sentry_sdk
 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import create_claude_client, debug_log_sensitive_data
+from aiservice.env_specific import create_llm_client, debug_log_sensitive_data, llm_clients
 from aiservice.models.aimodels import REFINEMENT_MODEL, calculate_llm_cost
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@@ -227,17 +227,17 @@ async def refinement(  # noqa: D417
         | ChatCompletionFunctionMessageParam
     ] = [system_message, user_message]
     debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
-    async with create_claude_client() as claude_client:
-        try:
-            output = await claude_client.with_options(max_retries=2).chat.completions.create(
-                model=optimize_model.name, messages=messages, n=1
-            )
-            llm_cost = calculate_llm_cost(output, optimize_model)
-        except Exception as e:
-            logging.exception("Claude Code Generation error in refinement")
-            sentry_sdk.capture_exception(e)
-            debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
-            return OptimizeErrorResponseSchema(error=str(e))
+    llm_client = llm_clients[optimize_model.model_type]
+    try:
+        output = await llm_client.with_options(max_retries=2).chat.completions.create(
+            model=optimize_model.name, messages=messages, n=1
+        )
+        llm_cost = calculate_llm_cost(output, optimize_model)
+    except Exception as e:
+        logging.exception("Claude Code Generation error in refinement")
+        sentry_sdk.capture_exception(e)
+        debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
+        return OptimizeErrorResponseSchema(error=str(e))
     debug_log_sensitive_data(f"ClaudeClient optimization response:\n{output.model_dump_json(indent=2)}")
     if output.usage is not None:
         ph(user_id, "refinement-usage", properties={"model": optimize_model.name, "usage": output.usage.json()})
@@ -311,7 +311,8 @@ class Refinementschema(Schema):
 
 @refinement_api.post("/", response={200: Refinementschema, 400: Refinementschema, 500: Refinementschema})
 async def refine(
-    request, data: list[RefinementRequestSchema]
+    request,
+    data: list[RefinementRequestSchema],  # noqa: ANN001
 ) -> tuple[int, Refinementschema | OptimizeErrorResponseSchema]:
     ph(request.user, "aiservice-refinement-called")
     ctx_data_list = [
diff --git a/django/aiservice/ranker/ranker.py b/django/aiservice/ranker/ranker.py
index 5cbdebc9e..dbf028f37 100644
--- a/django/aiservice/ranker/ranker.py
+++ b/django/aiservice/ranker/ranker.py
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
 import sentry_sdk
 from aiservice.analytics.posthog import ph
 from aiservice.common_utils import validate_trace_id
-from aiservice.env_specific import debug_log_sensitive_data, openai_client
+from aiservice.env_specific import create_llm_client, debug_log_sensitive_data, llm_clients
 from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
 from log_features.log_event import update_optimization_cost
 from log_features.log_features import log_features
@@ -109,12 +109,13 @@ async def rank_optimizations(  # noqa: D417
         | ChatCompletionToolMessageParam
         | ChatCompletionFunctionMessageParam
     ] = [system_message, user_message]
+    llm_client = llm_clients[rank_model.model_type]
     try:
-        output = await openai_client.with_options(max_retries=2).chat.completions.create(
+        output = await llm_client.with_options(max_retries=2).chat.completions.create(
             model=rank_model.name, messages=messages, n=1
         )
         await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, rank_model))
-    except Exception as e:
+    except Exception as e:  # noqa: BLE001
         debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
         sentry_sdk.capture_exception(e)
         return RankErrorResponseSchema(error=str(e))
@@ -129,7 +130,7 @@ async def rank_optimizations(  # noqa: D417
     try:
         explanation_match = re.search(explain_regex_pattern, output.choices[0].message.content)
         explanation = explanation_match.group(1)
-    except:
+    except:  # noqa: E722
         # TODO add logging instead of print("No explanation found")
         explanation = ""
         # still doing stuff instead of returning coz ranking is important
@@ -141,10 +142,10 @@ async def rank_optimizations(  # noqa: D417
         ranking_match = re.search(rank_regex_pattern, output.choices[0].message.content)
         # TODO better parsing, could be only comma separated, need to handle all edge cases
         ranking = list(map(int, ranking_match.group(1).strip().split(",")))
-    except:
+    except:  # noqa: E722
         # TODO add logging instead of print("No ranking found")
         return RankErrorResponseSchema(error="No ranking found")
-    if not sorted(ranking) == list(range(1, len(data.diffs) + 1)):
+    if sorted(ranking) != list(range(1, len(data.diffs) + 1)):
         # TODO need to handle all edge cases
         # TODO add logging instead of print("Invalid ranking")
         return RankErrorResponseSchema(error="No ranking found")
diff --git a/django/aiservice/testgen/testgen.py b/django/aiservice/testgen/testgen.py
index b7d21b8fb..6d5238be8 100644
--- a/django/aiservice/testgen/testgen.py
+++ b/django/aiservice/testgen/testgen.py
@@ -6,21 +6,20 @@ import asyncio
 import logging
 import re
 from pathlib import Path
-from typing import SupportsIndex
+from typing import TYPE_CHECKING, SupportsIndex
 
 import sentry_sdk
 import stamina
+from aiservice.analytics.posthog import ph
+from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
+from aiservice.env_specific import IS_PRODUCTION, create_llm_client, debug_log_sensitive_data, llm_clients
+from aiservice.models.aimodels import EXECUTE_MODEL, calculate_llm_cost
+from log_features.log_event import update_optimization_cost
+from log_features.log_features import log_features
 from ninja import NinjaAPI
 from ninja.errors import HttpError
 from openai import OpenAIError
 
-from aiservice.analytics.posthog import ph
-from aiservice.common_utils import parse_python_version, safe_isort, should_hack_for_demo, validate_trace_id
-from aiservice.env_specific import IS_PRODUCTION, debug_log_sensitive_data, openai_client
-from aiservice.models.aimodels import EXECUTE_MODEL, LLM, calculate_llm_cost
-from authapp.auth import AuthBearer
-from log_features.log_event import update_optimization_cost
-from log_features.log_features import log_features
 from testgen.instrumentation.edit_generated_test import parse_module_to_cst, replace_definition_with_import
 from testgen.instrumentation.instrument_new_tests import instrument_test_source
 from testgen.models import (
@@ -34,6 +33,10 @@ from testgen.postprocessing.code_validator import has_test_functions, validate_t
 from testgen.postprocessing.postprocess_pipeline import postprocessing_testgen_pipeline
 from testgen.testgen_context import BaseTestGenContext, TestGenContextData
 
+if TYPE_CHECKING:
+    from aiservice.models.aimodels import LLM
+    from authapp.auth import AuthBearer
+
 testgen_api = NinjaAPI(urls_namespace="testgen")
 
 # Get the directory of the current file
@@ -111,7 +114,10 @@ To help unit test the function above, list diverse scenarios that the function s
     package_comment = ""
     # if unit_test_package == "pytest":
     #     package_comment = "# below, each test case is represented by a tuple passed to the @pytest.mark.parametrize decorator"
-    execute_system_message = {"role": "system", "content": execute_system_prompt.format(function_name=ctx.data.qualified_name)}
+    execute_system_message = {
+        "role": "system",
+        "content": execute_system_prompt.format(function_name=ctx.data.qualified_name),
+    }
 
     execute_messages = [execute_system_message, plan_user_message]
 
@@ -196,7 +202,8 @@ async def generate_and_validate_test_code(
     user_id: str,
     posthog_event_suffix: str,
 ) -> str:
-    response = await openai_client.with_options(max_retries=2).chat.completions.create(
+    llm_client = llm_clients[execute_model.model_type]
+    response = await llm_client.with_options(max_retries=2).chat.completions.create(
         model=model.name, messages=messages, temperature=temperature
     )
     cost = calculate_llm_cost(response, execute_model) or 0.0
diff --git a/js/cf-api/Dockerfile b/js/cf-api/Dockerfile
new file mode 100644
index 000000000..9e3c0a7cb
--- /dev/null
+++ b/js/cf-api/Dockerfile
@@ -0,0 +1,10 @@
+FROM node:20-alpine
+WORKDIR /app
+COPY /package*.json ./
+RUN npm ci --only=production
+COPY cf-api .
+RUN mkdir ../common
+COPY common ../common
+RUN npm run build
+EXPOSE 3001
+CMD ["node", "dist/index.js"]
diff --git a/js/cf-api/github/github-app.ts b/js/cf-api/github/github-app.ts
index 5558b03fe..65b55c449 100644
--- a/js/cf-api/github/github-app.ts
+++ b/js/cf-api/github/github-app.ts
@@ -61,11 +61,18 @@ const initializeApp = async () => {
 
 // Export the actual App instance, initialized based on environment
 export const githubApp = await (async () => {
-  if (process.env.NODE_ENV === "test") {
-    // In test environment, return a minimal mock that won't fail
+  // Check if GitHub App is configured
+  const GH_APP_ID = process.env.GH_APP_ID
+
+  if (!GH_APP_ID || GH_APP_ID === "" || process.env.NODE_ENV === "test") {
+    console.log("caution: GitHub App not configured (GH_APP_ID missing)")
+    console.log("caution: PR creation and GitHub webhook features are disabled")
+    console.log("caution: CLI and optimization features will continue to work")
+
+    // Return a minimal mock that won't fail
     return {
       octokit: {
-        request: async () => ({ data: { name: "Test App" } }),
+        request: async () => ({ data: { name: "GitHub App Disabled" } }),
         log: {
           debug: () => {},
         },
@@ -75,11 +82,14 @@ export const githubApp = await (async () => {
         onAny: () => {},
         onError: () => {},
       },
-      getInstallationOctokit: async () => ({}),
+      getInstallationOctokit: async () => {
+        throw new Error("GitHub App not configured. Set GH_APP_ID to enable PR creation.")
+      },
     } as any as App
   }
 
   // In other environments, initialize normally
+  console.log(`GitHub App ID ${GH_APP_ID} detected, initializing...`)
   const app = await initializeApp()
 
   console.log(`Github App Initialized`)
diff --git a/js/cf-webapp/src/lib/stripe.ts b/js/cf-webapp/src/lib/stripe.ts
index 6d44fca29..6ffd3790c 100644
--- a/js/cf-webapp/src/lib/stripe.ts
+++ b/js/cf-webapp/src/lib/stripe.ts
@@ -1,9 +1,11 @@
 import Stripe from "stripe"
 
 if (!process.env.STRIPE_SECRET_KEY) {
-  throw new Error("STRIPE_SECRET_KEY is not set in environment variables")
+  console.warn("⚠️  STRIPE_SECRET_KEY not set, billing features will be disabled")
 }
 
-export const stripe = new Stripe(process.env.STRIPE_SECRET_KEY, {
-  apiVersion: "2025-08-27.basil",
-})
+export const stripe = process.env.STRIPE_SECRET_KEY
+  ? new Stripe(process.env.STRIPE_SECRET_KEY, {
+      apiVersion: "2025-08-27.basil",
+    })
+  : null