codeflash/experiments/bench_analytics.ipynb

{
 "cells": [
  {
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-10-14T00:25:56.438384Z",
     "start_time": "2024-10-14T00:25:50.078208Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from codeflash.verification.test_results import InvocationId\n",
    "from codeflash.models.models import OriginalCodeBaseline\n",
    "import dill as pickle\n",
    "\n",
    "data: list[OriginalCodeBaseline] = pickle.load(open(\"/Users/renaud/Desktop/baseline100.pkl\", \"rb\"))\n",
    "invocation_ids = {\n",
    "    function_test_invocation.id for function_test_invocation in data[0].overall_test_results} # The first run represents the Oracle.\n",
    "\n",
    "# Timing results where the test passed, and the runtime is not None or 0.\n",
    "usable_runtime_results: list[dict[InvocationId, dict[int, int]]] = [{invocation_id: {\n",
    "            function_test_invocation.loop_index: runtime for function_test_invocation in result.overall_test_results if (\n",
    "            runtime := function_test_invocation.runtime) and function_test_invocation.id == invocation_id and function_test_invocation.did_pass}\n",
    "        for invocation_id in invocation_ids}\n",
    "    for result in data]\n",
    "valid_invocation_ids = {invocation_id for invocation_id in invocation_ids if usable_runtime_results[0][invocation_id]}\n",
    "# A run is invalid if one of its test invocations has no valid result.\n",
    "nonempty_runtime_results: list[dict[InvocationId, dict[int, int]]] = [{invocation_id: run_runtimes[invocation_id] for invocation_id in valid_invocation_ids} for run_runtimes in usable_runtime_results if all(run_runtimes[invocation_id] for invocation_id in valid_invocation_ids)]\n",
    "\n",
    "run_min_runtimes = [{invocation_id: min(runtimes[invocation_id].values()) for invocation_id in runtimes} for runtimes in nonempty_runtime_results]\n",
    "run_total_runtimes = [sum(test_invocation_runtimes.values()) for test_invocation_runtimes in run_min_runtimes]\n",
    "run_total_runtimes2 = [result.runtime for result in data]\n",
    "    \n",
    "print(f\"Timing calculations are consistent: {run_total_runtimes == run_total_runtimes2}\")\n",
    "print(run_total_runtimes)"
   ],
   "id": "initial_id",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Timing calculations are consistent: True\n",
      "[3427917, 3397916, 3395124, 3390412, 3303873, 3468999, 3434249, 3445252, 3342791, 3444248, 3549496, 3285623, 3617039, 3444914, 3540250, 3410374, 3539542, 3292583, 3413747, 3453915, 3597335, 3391166, 3355912, 3691122, 3467460, 3682375, 3522458, 3463334, 3717790, 3595706, 3375916, 3307706, 3388250, 3403586, 3393580, 3393750, 3369835, 3489581, 3363870, 3478123, 3417915, 3427583, 3390582, 3588542, 3508168, 3461457, 3479496, 3316957, 3461040, 3429001, 3650874, 3484789, 3667246, 3483750, 3358830, 3448291, 3456958, 3415290, 3181582, 3443668, 3361624, 3640580, 3410539, 3475081, 3510458, 3516707, 3369163, 3379706, 3694418, 3376625, 3485831, 3372290, 3424334, 3461540, 3630829, 3665957, 3474542, 3289749, 3358750, 3204707, 3449957, 3335665, 3364667, 3466831, 3616958, 3614122, 3543041, 3316167, 3466373, 3423167, 3403418, 3409211, 3402127, 3360996, 3388913, 3662916, 3423126, 3655789, 3287874, 3470374]\n"
     ]
    }
   ],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-10-14T00:26:01.268153Z",
     "start_time": "2024-10-14T00:26:01.253449Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from typing import Callable, SupportsFloat\n",
    "from codeflash.code_utils.time_utils import humanize_runtime\n",
    "import numpy as np\n",
    "from numpy.typing import ArrayLike\n",
    "\n",
    "NumberType = type[SupportsFloat]\n",
    "\n",
    "def analyze_num_array(\n",
    "        num_array: ArrayLike,\n",
    "        formatter: Callable[[NumberType], str]\n",
    ")-> None:\n",
    "    array = np.array(num_array)\n",
    "    \n",
    "    mean = np.mean(array)\n",
    "    max_value = np.max(array)\n",
    "    min_value = np.min(array)\n",
    "    median = np.median(array)\n",
    "    std_dev = np.std(array)\n",
    "    \n",
    "    percentages = [0, 5, 25, 50, 75, 95, 100]\n",
    "    percentiles = np.percentile(array, percentages)\n",
    "    q1 = percentiles[2]    # 25th percentile\n",
    "    q3 = percentiles[4]    # 75th percentile\n",
    "    iqr = q3 - q1\n",
    "    outlier_min = (q1 - 1.5 * iqr)\n",
    "    outlier_max = (q3 + 1.5 * iqr)\n",
    "    small_outliers = sorted([value for value in array if value < outlier_min])\n",
    "    large_outliers = sorted([value for value in array if value > outlier_max])\n",
    "    \n",
    "    print(f\"Mean +- std dev: {formatter(mean)} +- {formatter(std_dev)}\")\n",
    "    print(f\"Max: {formatter(max_value)}\")\n",
    "    print(f\"Median: {formatter(median)}\")\n",
    "    print(f\"Min: {formatter(min_value)}\")\n",
    "    print()\n",
    "    for i, percentage in enumerate(percentages):\n",
    "        print(f\"{percentage}th percentile: {formatter(percentiles[i])} (\"\n",
    "              f\"{(percentiles[i] - mean) / mean:.0%} of the mean)\")\n",
    "    print()\n",
    "    # Outliers\n",
    "    print(f\"Small outliers (< {formatter(outlier_min)}): {[formatter(outlier) for outlier in small_outliers]}\")\n",
    "    print()\n",
    "    print(f\"Large outliers (> {formatter(outlier_max)}): {[formatter(outlier) for outlier in large_outliers]}\")\n",
    "    print()\n",
    "    print(f\"Total number of outliers: {len(small_outliers) + len(large_outliers)}\")\n",
    "    print(f\"Number of small outliers: {len(small_outliers)}\")\n",
    "    print(f\"Number of large outliers: {len(large_outliers)}\")\n",
    "    \n",
    "analyze_num_array(run_total_runtimes, humanize_runtime)"
   ],
   "id": "5dcd4d4ae5288f1d",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean +- std dev: 3.45 milliseconds +- 110 microseconds\n",
      "Max: 3.72 milliseconds\n",
      "Median: 3.44 milliseconds\n",
      "Min: 3.18 milliseconds\n",
      "\n",
      "0th percentile: 3.18 milliseconds (-8% of the mean)\n",
      "5th percentile: 3.29 milliseconds (-5% of the mean)\n",
      "25th percentile: 3.39 milliseconds (-2% of the mean)\n",
      "50th percentile: 3.44 milliseconds (-0% of the mean)\n",
      "75th percentile: 3.49 milliseconds (1% of the mean)\n",
      "95th percentile: 3.67 milliseconds (6% of the mean)\n",
      "100th percentile: 3.72 milliseconds (8% of the mean)\n",
      "\n",
      "Small outliers (< 3.22 milliseconds): ['3.18 milliseconds', '3.20 milliseconds']\n",
      "\n",
      "Large outliers (> 3.66 milliseconds): ['3.66 milliseconds', '3.67 milliseconds', '3.67 milliseconds', '3.68 milliseconds', '3.69 milliseconds', '3.69 milliseconds', '3.72 milliseconds']\n",
      "\n",
      "Total number of outliers: 9\n",
      "Number of small outliers: 2\n",
      "Number of large outliers: 7\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "",
   "id": "d58b61bf62ce2780"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-10-14T00:26:36.172976Z",
     "start_time": "2024-10-14T00:26:36.166724Z"
    }
   },
   "cell_type": "code",
   "source": [
    "run_loop_counts = [max([max(run_runtimes[invocation_id]) for invocation_id in run_runtimes]) for run_runtimes in nonempty_runtime_results]\n",
    "\n",
    "print(f\"Loop counts: {run_loop_counts}\")\n",
    "print()\n",
    "analyze_num_array(run_loop_counts, str)"
   ],
   "id": "57c73a31483e06b5",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loop counts: [106, 127, 116, 127, 123, 136, 132, 131, 130, 130, 127, 131, 130, 133, 123, 126, 133, 129, 120, 135, 114, 121, 121, 125, 127, 123, 116, 134, 122, 118, 128, 118, 128, 124, 116, 126, 130, 124, 118, 131, 124, 130, 123, 93, 115, 131, 127, 128, 132, 133, 131, 128, 132, 124, 131, 132, 130, 135, 130, 129, 128, 124, 129, 121, 129, 131, 129, 126, 128, 115, 131, 120, 122, 115, 125, 119, 130, 126, 124, 87, 126, 123, 108, 124, 130, 119, 121, 125, 119, 128, 121, 108, 90, 121, 125, 109, 104, 128, 123, 104]\n",
      "\n",
      "Mean +- std dev: 123.62 +- 9.040774303122493\n",
      "Max: 136\n",
      "Median: 126.0\n",
      "Min: 87\n",
      "\n",
      "0th percentile: 87.0 (-30% of the mean)\n",
      "5th percentile: 105.9 (-14% of the mean)\n",
      "25th percentile: 121.0 (-2% of the mean)\n",
      "50th percentile: 126.0 (2% of the mean)\n",
      "75th percentile: 130.0 (5% of the mean)\n",
      "95th percentile: 133.0 (8% of the mean)\n",
      "100th percentile: 136.0 (10% of the mean)\n",
      "\n",
      "Small outliers (< 107.5): ['87', '90', '93', '104', '104', '106']\n",
      "\n",
      "Large outliers (> 143.5): []\n",
      "\n",
      "Total number of outliers: 6\n",
      "Number of small outliers: 6\n",
      "Number of large outliers: 0\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-10-14T00:26:46.184288Z",
     "start_time": "2024-10-14T00:26:46.081388Z"
    }
   },
   "cell_type": "code",
   "source": [
    "run_reach_min_loop = [max({invocation_id: min([loop_index for loop_index in run_runtimes[invocation_id] if run_runtimes[invocation_id][loop_index] == run_min_runtimes[run_index][invocation_id]]) for invocation_id in valid_invocation_ids}.values()) for run_index, run_runtimes in enumerate(nonempty_runtime_results)]\n",
    "\n",
    "print(f\"Loop count to reach min runtime: {run_reach_min_loop}\")\n",
    "print()\n",
    "analyze_num_array(run_reach_min_loop, str)"
   ],
   "id": "2ab96deed074385d",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loop count to reach min runtime: [106, 127, 116, 125, 106, 135, 116, 131, 122, 107, 126, 127, 127, 131, 123, 124, 129, 122, 115, 127, 113, 112, 114, 108, 120, 114, 111, 127, 111, 113, 110, 118, 108, 77, 104, 121, 130, 116, 105, 111, 119, 127, 121, 93, 113, 117, 121, 98, 118, 110, 121, 118, 127, 114, 126, 125, 119, 129, 126, 84, 118, 122, 119, 101, 110, 114, 103, 119, 124, 108, 114, 115, 117, 103, 110, 93, 128, 119, 121, 72, 123, 90, 101, 110, 122, 111, 121, 107, 118, 126, 114, 107, 80, 99, 116, 104, 71, 101, 112, 86]\n",
      "\n",
      "Mean +- std dev: 113.6 +- 12.804686642007294\n",
      "Max: 135\n",
      "Median: 116.0\n",
      "Min: 71\n",
      "\n",
      "0th percentile: 71.0 (-37% of the mean)\n",
      "5th percentile: 85.9 (-24% of the mean)\n",
      "25th percentile: 108.0 (-5% of the mean)\n",
      "50th percentile: 116.0 (2% of the mean)\n",
      "75th percentile: 122.0 (7% of the mean)\n",
      "95th percentile: 129.0 (14% of the mean)\n",
      "100th percentile: 135.0 (19% of the mean)\n",
      "\n",
      "Small outliers (< 87.0): ['71', '72', '77', '80', '84', '86']\n",
      "\n",
      "Large outliers (> 143.0): []\n",
      "\n",
      "Total number of outliers: 6\n",
      "Number of small outliers: 6\n",
      "Number of large outliers: 0\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-10-14T01:41:58.090916Z",
     "start_time": "2024-10-14T01:41:58.081916Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# Allocated benchmarking time is 5 seconds.\n",
    "time_reach_min_loop = [min_loop / loop_count * 5_000_000_000 for min_loop, loop_count in zip(run_reach_min_loop, run_loop_counts)] \n",
    "print(f\"Times to reach min loop: {time_reach_min_loop}\")\n",
    "print()\n",
    "analyze_num_array(time_reach_min_loop, humanize_runtime)"
   ],
   "id": "c8bc504cfcdaca6",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Times to reach min loop: [5000000000.0, 5000000000.0, 5000000000.0, 4921259842.519685, 4308943089.430895, 4963235294.117647, 4393939393.939394, 5000000000.0, 4692307692.307693, 4115384615.3846154, 4960629921.259843, 4847328244.274809, 4884615384.615384, 4924812030.075188, 5000000000.0, 4920634920.63492, 4849624060.150376, 4728682170.542636, 4791666666.666667, 4703703703.703704, 4956140350.8771925, 4628099173.5537195, 4710743801.652892, 4320000000.0, 4724409448.818897, 4634146341.463415, 4784482758.620689, 4738805970.149254, 4549180327.868853, 4788135593.220339, 4296875000.0, 5000000000.0, 4218750000.0, 3104838709.677419, 4482758620.689655, 4801587301.587302, 5000000000.0, 4677419354.83871, 4449152542.372881, 4236641221.374046, 4798387096.774194, 4884615384.615384, 4918699186.99187, 5000000000.0, 4913043478.26087, 4465648854.961832, 4763779527.559055, 3828125000.0, 4469696969.69697, 4135338345.864661, 4618320610.687023, 4609375000.0, 4810606060.606061, 4596774193.548387, 4809160305.343512, 4734848484.848485, 4576923076.923077, 4777777777.777778, 4846153846.153846, 3255813953.4883723, 4609375000.0, 4919354838.709678, 4612403100.775193, 4173553719.0082645, 4263565891.472868, 4351145038.167939, 3992248062.015504, 4722222222.222222, 4843750000.0, 4695652173.913044, 4351145038.167939, 4791666666.666667, 4795081967.213115, 4478260869.565218, 4400000000.0, 3907563025.210084, 4923076923.076923, 4722222222.222222, 4879032258.064516, 4137931034.4827585, 4880952380.952381, 3658536585.365854, 4675925925.925926, 4435483870.967742, 4692307692.307693, 4663865546.218488, 5000000000.0, 4280000000.0, 4957983193.277311, 4921875000.0, 4710743801.652892, 4953703703.703704, 4444444444.444445, 4090909090.909091, 4640000000.0, 4770642201.834863, 3413461538.4615383, 3945312500.0, 4552845528.455284, 4134615384.6153846]\n",
      "\n",
      "Mean +- std dev: 4.59 seconds +- 383 milliseconds\n",
      "Max: 5.00 seconds\n",
      "Median: 4.71 seconds\n",
      "Min: 3.10 seconds\n",
      "\n",
      "0th percentile: 3.10 seconds (-32% of the mean)\n",
      "5th percentile: 3.90 seconds (-15% of the mean)\n",
      "25th percentile: 4.43 seconds (-4% of the mean)\n",
      "50th percentile: 4.71 seconds (2% of the mean)\n",
      "75th percentile: 4.88 seconds (6% of the mean)\n",
      "95th percentile: 5.00 seconds (9% of the mean)\n",
      "100th percentile: 5.00 seconds (9% of the mean)\n",
      "\n",
      "Small outliers (< 3.75 seconds): ['3.10 seconds', '3.26 seconds', '3.41 seconds', '3.66 seconds']\n",
      "\n",
      "Large outliers (> 5.56 seconds): []\n",
      "\n",
      "Total number of outliers: 4\n",
      "Number of small outliers: 4\n",
      "Number of large outliers: 0\n"
     ]
    }
   ],
   "execution_count": 8
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}