diff --git a/experiments/ranking.ipynb b/experiments/ranking.ipynb
new file mode 100644
index 000000000..2770ce7ae
--- /dev/null
+++ b/experiments/ranking.ipynb
@@ -0,0 +1,359 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "e4d33681-e7f7-4508-b453-68c8c57c1f45",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-09-11T02:32:16.058137Z",
+     "start_time": "2025-09-11T02:32:16.052210Z"
+    }
+   },
+   "source": [
+    "import json\n",
+    "import os\n",
+    "from typing import Any, Dict, Optional\n",
+    "import re\n",
+    "import pandas as pd\n",
+    "from pandas import DataFrame\n",
+    "from scipy.stats import hmean\n",
+    "from sqlalchemy import create_engine\n",
+    "import difflib\n",
+    "from dotenv import load_dotenv\n",
+    "# Load environment variables from .env file\n",
+    "load_dotenv(dotenv_path='../django/aiservice/.env')\n",
+    "\n",
+    "# Access the environment variables\n",
+    "DATABASE_URI = os.getenv(\"DATABASE_URL\")\n",
+    "trace_id = 'your trace id'\n"
+   ],
+   "outputs": [],
+   "execution_count": 11
+  },
+  {
+   "cell_type": "code",
+   "id": "3c900e6c5dd188d9",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-09-11T02:32:28.565731Z",
+     "start_time": "2025-09-11T02:32:28.561708Z"
+    }
+   },
+   "source": [
+    "def load_data(\n",
+    "    trace_id: str, database_uri: str\n",
+    ") -> pd.DataFrame:\n",
+    "    engine = create_engine(database_uri)\n",
+    "    with engine.connect() as connection:\n",
+    "        query = \"\"\"\n",
+    "            SELECT * FROM optimization_features\n",
+    "            WHERE created_at > 'time you want to look at'\n",
+    "            AND user_id LIKE 'git hub user id'\n",
+    "        \"\"\"\n",
+    "        return pd.read_sql_query(\n",
+    "            query, connection, params=(trace_id,)\n",
+    "        )\n"
+   ],
+   "outputs": [],
+   "execution_count": 14
+  },
+  {
+   "cell_type": "code",
+   "id": "3642113faff9515b",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-09-11T02:32:33.436079Z",
+     "start_time": "2025-09-11T02:32:29.373321Z"
+    }
+   },
+   "source": "data = load_data(trace_id, DATABASE_URI)\n",
+   "outputs": [],
+   "execution_count": 15
+  },
+  {
+   "cell_type": "code",
+   "id": "bf7a941e66d5ed97",
+   "metadata": {},
+   "source": [
+    "data"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "95820a54-c15c-48a9-9f3a-5858855681fa",
+   "metadata": {},
+   "source": "excel_out = {'diffs':[],'optimization_ids':[],'speedups':[], 'trace_id':[]}",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "48a7deca-e4be-4f57-bc60-af73fa73a65e",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-09-11T02:32:44.274590Z",
+     "start_time": "2025-09-11T02:32:44.270598Z"
+    }
+   },
+   "source": [
+    "def extract(text: str) -> str:\n",
+    "    # Regex to capture content inside triple backticks\n",
+    "    match = re.search(r\"```(?:[\\w+:.\\-]*)\\n(.*?)```\", text, re.DOTALL)\n",
+    "    if match:\n",
+    "        return match.group(1).strip()\n",
+    "    return \"\"\n",
+    "def unified_diff_strings(code1: str, code2: str, fromfile=\"original\", tofile=\"modified\") -> str:\n",
+    "    \"\"\"\n",
+    "    Return the unified diff between two code strings as a single string.\n",
+    "\n",
+    "    :param code1: First code string (original).\n",
+    "    :param code2: Second code string (modified).\n",
+    "    :param fromfile: Label for the first code string.\n",
+    "    :param tofile: Label for the second code string.\n",
+    "    :return: Unified diff as a string.\n",
+    "    \"\"\"\n",
+    "    code1_lines = code1.splitlines(keepends=True)\n",
+    "    code2_lines = code2.splitlines(keepends=True)\n",
+    "\n",
+    "    diff = difflib.unified_diff(\n",
+    "        code1_lines,\n",
+    "        code2_lines,\n",
+    "        fromfile=fromfile,\n",
+    "        tofile=tofile,\n",
+    "        lineterm=\"\"\n",
+    "    )\n",
+    "\n",
+    "    return \"\".join(diff)\n",
+    "def diff_length(a: str, b: str) -> int:\n",
+    "    \"\"\"Compute the length (in characters) of the unified diff between two strings.\n",
+    "\n",
+    "    Args:\n",
+    "        a (str): Original string.\n",
+    "        b (str): Modified string.\n",
+    "\n",
+    "    Returns:\n",
+    "        int: Total number of characters in the diff.\n",
+    "\n",
+    "    \"\"\"\n",
+    "    # Split input strings into lines for line-by-line diff\n",
+    "    a_lines = a.splitlines(keepends=True)\n",
+    "    b_lines = b.splitlines(keepends=True)\n",
+    "\n",
+    "    # Compute unified diff\n",
+    "    diff_lines = list(difflib.unified_diff(a_lines, b_lines, lineterm=\"\"))\n",
+    "\n",
+    "    # Join all lines with newline to calculate total diff length\n",
+    "    diff_text = \"\\n\".join(diff_lines)\n",
+    "\n",
+    "    return len(diff_text)\n",
+    "\n",
+    "\n",
+    "def count_changed_lines(code1: str, code2: str) -> int:\n",
+    "    \"\"\"\n",
+    "    Count the number of changed lines between two code strings.\n",
+    "\n",
+    "    Args:\n",
+    "        code1 (str): The first code string.\n",
+    "        code2 (str): The second code string.\n",
+    "\n",
+    "    Returns:\n",
+    "        int: The number of changed lines (additions + deletions).\n",
+    "    \"\"\"\n",
+    "    lines1 = code1.splitlines()\n",
+    "    lines2 = code2.splitlines()\n",
+    "\n",
+    "    diff = difflib.ndiff(lines1, lines2)\n",
+    "\n",
+    "    # Count lines starting with '+' (added) or '-' (removed)\n",
+    "    changed_lines = sum(1 for line in diff if line.startswith(('+', '-')))\n",
+    "\n",
+    "    return changed_lines"
+   ],
+   "outputs": [],
+   "execution_count": 16
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "data['trace_id']",
+   "id": "cce7246ce5aa2b0f",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-09-11T02:36:38.194171Z",
+     "start_time": "2025-09-11T02:36:38.190926Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "print(data['original_code'][0])\n",
+    "#extract(data['original_code'][0])"
+   ],
+   "id": "aac5f37de751c812",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "```python:langgraph/pregel/debug.py\n",
+      "from __future__ import annotations\n",
+      "\n",
+      "def get_colored_text(text: str, color: str) -> str:\n",
+      "    \"\"\"Get colored text.\"\"\"\n",
+      "    return f\"\\033[1;3{COLOR_MAPPING[color]}m{text}\\033[0m\"\n",
+      "```\n"
+     ]
+    }
+   ],
+   "execution_count": 20
+  },
+  {
+   "cell_type": "code",
+   "id": "5dc620ce-2be6-4aca-829c-ea026792f5f0",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-09-11T02:39:02.539570Z",
+     "start_time": "2025-09-11T02:39:02.495813Z"
+    }
+   },
+   "source": [
+    "excel_out = {'diffs':[],'diff_lens':[],'speedups':[], 'explanations':[]}\n",
+    "i=0\n",
+    "difflens=[]\n",
+    "difflens_lines = []\n",
+    "for _, row in data.iterrows():\n",
+    "    #print(row)\n",
+    "    if row['ranking']: #if it exists\n",
+    "        for opt_id in row['ranking']['ranking']:\n",
+    "            raw_original = row['original_code']\n",
+    "            raw_modified = row['optimizations_post'][opt_id]\n",
+    "            excel_out['diffs'].append(unified_diff_strings(raw_original,raw_modified))\n",
+    "            excel_out['diff_lens'].append(diff_length(raw_original,raw_modified))\n",
+    "            #print(row['trace_id'],row['speedup_ratio'])\n",
+    "            excel_out['speedups'].append(row['speedup_ratio'][opt_id])\n",
+    "            excel_out['explanations'].append(row['ranking']['explanation'])\n",
+    "df = pd.DataFrame(excel_out)\n",
+    "df.to_excel(\"output_newsep10.xlsx\", index=False)"
+   ],
+   "outputs": [],
+   "execution_count": 21
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "!uv pip install openpyxl\n",
+   "id": "d2b1c5601a786c32",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "b612aa33-0f5d-4c8d-9cba-0426af654299",
+   "metadata": {},
+   "source": [
+    "import json\n",
+    "# Convert the dictionary to a JSON string\n",
+    "json_string = json.dumps(excel_out)\n",
+    "\n",
+    "# Print the resulting JSON string\n",
+    "print(json_string)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "226cedec-2f7e-4a45-a789-e0d233170678",
+   "metadata": {},
+   "source": [
+    "with open('excel_out.json', 'r') as f:\n",
+    "    excel_out_json = json.load(f)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "print(excel_out[''][11],excel_out['diffs'][12])\n",
+    "\n"
+   ],
+   "id": "b26e7119057f215",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "print(difflens[11],difflens[12])",
+   "id": "c4633c670ce48202",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "ae19abe6-37f7-4a13-9659-e60c06e217e0",
+   "metadata": {},
+   "source": [
+    "df = pd.DataFrame(excel_out)\n",
+    "df.to_excel(\"output.xlsx\", index=False)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "5f9cf122-54db-4377-99fa-57a668729deb",
+   "metadata": {},
+   "source": "print(difflens_lines[11],difflens_lines[12])",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "print(excel_out['speedups'][11])\n",
+    "print(excel_out['speedups'][12])"
+   ],
+   "id": "cef80d598f330dca",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "f2001959c761c8e0",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}