312 lines
15 KiB
Text
312 lines
15 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"ExecuteTime": {
|
|
"end_time": "2024-10-14T00:25:56.438384Z",
|
|
"start_time": "2024-10-14T00:25:50.078208Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"from codeflash.verification.test_results import InvocationId\n",
|
|
"from codeflash.models.models import OriginalCodeBaseline\n",
|
|
"import dill as pickle\n",
|
|
"\n",
|
|
"data: list[OriginalCodeBaseline] = pickle.load(open(\"/Users/renaud/Desktop/baseline100.pkl\", \"rb\"))\n",
|
|
"invocation_ids = {\n",
|
|
" function_test_invocation.id for function_test_invocation in data[0].overall_test_results} # The first run represents the Oracle.\n",
|
|
"\n",
|
|
"# Timing results where the test passed, and the runtime is not None or 0.\n",
|
|
"usable_runtime_results: list[dict[InvocationId, dict[int, int]]] = [{invocation_id: {\n",
|
|
" function_test_invocation.loop_index: runtime for function_test_invocation in result.overall_test_results if (\n",
|
|
" runtime := function_test_invocation.runtime) and function_test_invocation.id == invocation_id and function_test_invocation.did_pass}\n",
|
|
" for invocation_id in invocation_ids}\n",
|
|
" for result in data]\n",
|
|
"valid_invocation_ids = {invocation_id for invocation_id in invocation_ids if usable_runtime_results[0][invocation_id]}\n",
|
|
"# A run is invalid if one of its test invocations has no valid result.\n",
|
|
"nonempty_runtime_results: list[dict[InvocationId, dict[int, int]]] = [{invocation_id: run_runtimes[invocation_id] for invocation_id in valid_invocation_ids} for run_runtimes in usable_runtime_results if all(run_runtimes[invocation_id] for invocation_id in valid_invocation_ids)]\n",
|
|
"\n",
|
|
"run_min_runtimes = [{invocation_id: min(runtimes[invocation_id].values()) for invocation_id in runtimes} for runtimes in nonempty_runtime_results]\n",
|
|
"run_total_runtimes = [sum(test_invocation_runtimes.values()) for test_invocation_runtimes in run_min_runtimes]\n",
|
|
"run_total_runtimes2 = [result.runtime for result in data]\n",
|
|
" \n",
|
|
"print(f\"Timing calculations are consistent: {run_total_runtimes == run_total_runtimes2}\")\n",
|
|
"print(run_total_runtimes)"
|
|
],
|
|
"id": "initial_id",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Timing calculations are consistent: True\n",
|
|
"[3427917, 3397916, 3395124, 3390412, 3303873, 3468999, 3434249, 3445252, 3342791, 3444248, 3549496, 3285623, 3617039, 3444914, 3540250, 3410374, 3539542, 3292583, 3413747, 3453915, 3597335, 3391166, 3355912, 3691122, 3467460, 3682375, 3522458, 3463334, 3717790, 3595706, 3375916, 3307706, 3388250, 3403586, 3393580, 3393750, 3369835, 3489581, 3363870, 3478123, 3417915, 3427583, 3390582, 3588542, 3508168, 3461457, 3479496, 3316957, 3461040, 3429001, 3650874, 3484789, 3667246, 3483750, 3358830, 3448291, 3456958, 3415290, 3181582, 3443668, 3361624, 3640580, 3410539, 3475081, 3510458, 3516707, 3369163, 3379706, 3694418, 3376625, 3485831, 3372290, 3424334, 3461540, 3630829, 3665957, 3474542, 3289749, 3358750, 3204707, 3449957, 3335665, 3364667, 3466831, 3616958, 3614122, 3543041, 3316167, 3466373, 3423167, 3403418, 3409211, 3402127, 3360996, 3388913, 3662916, 3423126, 3655789, 3287874, 3470374]\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 1
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-10-14T00:26:01.268153Z",
|
|
"start_time": "2024-10-14T00:26:01.253449Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"from typing import Callable, SupportsFloat\n",
|
|
"from codeflash.code_utils.time_utils import humanize_runtime\n",
|
|
"import numpy as np\n",
|
|
"from numpy.typing import ArrayLike\n",
|
|
"\n",
|
|
"NumberType = type[SupportsFloat]\n",
|
|
"\n",
|
|
"def analyze_num_array(\n",
|
|
" num_array: ArrayLike,\n",
|
|
" formatter: Callable[[NumberType], str]\n",
|
|
")-> None:\n",
|
|
" array = np.array(num_array)\n",
|
|
" \n",
|
|
" mean = np.mean(array)\n",
|
|
" max_value = np.max(array)\n",
|
|
" min_value = np.min(array)\n",
|
|
" median = np.median(array)\n",
|
|
" std_dev = np.std(array)\n",
|
|
" \n",
|
|
" percentages = [0, 5, 25, 50, 75, 95, 100]\n",
|
|
" percentiles = np.percentile(array, percentages)\n",
|
|
" q1 = percentiles[2] # 25th percentile\n",
|
|
" q3 = percentiles[4] # 75th percentile\n",
|
|
" iqr = q3 - q1\n",
|
|
" outlier_min = (q1 - 1.5 * iqr)\n",
|
|
" outlier_max = (q3 + 1.5 * iqr)\n",
|
|
" small_outliers = sorted([value for value in array if value < outlier_min])\n",
|
|
" large_outliers = sorted([value for value in array if value > outlier_max])\n",
|
|
" \n",
|
|
" print(f\"Mean +- std dev: {formatter(mean)} +- {formatter(std_dev)}\")\n",
|
|
" print(f\"Max: {formatter(max_value)}\")\n",
|
|
" print(f\"Median: {formatter(median)}\")\n",
|
|
" print(f\"Min: {formatter(min_value)}\")\n",
|
|
" print()\n",
|
|
" for i, percentage in enumerate(percentages):\n",
|
|
" print(f\"{percentage}th percentile: {formatter(percentiles[i])} (\"\n",
|
|
" f\"{(percentiles[i] - mean) / mean:.0%} of the mean)\")\n",
|
|
" print()\n",
|
|
" # Outliers\n",
|
|
" print(f\"Small outliers (< {formatter(outlier_min)}): {[formatter(outlier) for outlier in small_outliers]}\")\n",
|
|
" print()\n",
|
|
" print(f\"Large outliers (> {formatter(outlier_max)}): {[formatter(outlier) for outlier in large_outliers]}\")\n",
|
|
" print()\n",
|
|
" print(f\"Total number of outliers: {len(small_outliers) + len(large_outliers)}\")\n",
|
|
" print(f\"Number of small outliers: {len(small_outliers)}\")\n",
|
|
" print(f\"Number of large outliers: {len(large_outliers)}\")\n",
|
|
" \n",
|
|
"analyze_num_array(run_total_runtimes, humanize_runtime)"
|
|
],
|
|
"id": "5dcd4d4ae5288f1d",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Mean +- std dev: 3.45 milliseconds +- 110 microseconds\n",
|
|
"Max: 3.72 milliseconds\n",
|
|
"Median: 3.44 milliseconds\n",
|
|
"Min: 3.18 milliseconds\n",
|
|
"\n",
|
|
"0th percentile: 3.18 milliseconds (-8% of the mean)\n",
|
|
"5th percentile: 3.29 milliseconds (-5% of the mean)\n",
|
|
"25th percentile: 3.39 milliseconds (-2% of the mean)\n",
|
|
"50th percentile: 3.44 milliseconds (-0% of the mean)\n",
|
|
"75th percentile: 3.49 milliseconds (1% of the mean)\n",
|
|
"95th percentile: 3.67 milliseconds (6% of the mean)\n",
|
|
"100th percentile: 3.72 milliseconds (8% of the mean)\n",
|
|
"\n",
|
|
"Small outliers (< 3.22 milliseconds): ['3.18 milliseconds', '3.20 milliseconds']\n",
|
|
"\n",
|
|
"Large outliers (> 3.66 milliseconds): ['3.66 milliseconds', '3.67 milliseconds', '3.67 milliseconds', '3.68 milliseconds', '3.69 milliseconds', '3.69 milliseconds', '3.72 milliseconds']\n",
|
|
"\n",
|
|
"Total number of outliers: 9\n",
|
|
"Number of small outliers: 2\n",
|
|
"Number of large outliers: 7\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 2
|
|
},
|
|
{
|
|
"metadata": {},
|
|
"cell_type": "markdown",
|
|
"source": "",
|
|
"id": "d58b61bf62ce2780"
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-10-14T00:26:36.172976Z",
|
|
"start_time": "2024-10-14T00:26:36.166724Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"run_loop_counts = [max([max(run_runtimes[invocation_id]) for invocation_id in run_runtimes]) for run_runtimes in nonempty_runtime_results]\n",
|
|
"\n",
|
|
"print(f\"Loop counts: {run_loop_counts}\")\n",
|
|
"print()\n",
|
|
"analyze_num_array(run_loop_counts, str)"
|
|
],
|
|
"id": "57c73a31483e06b5",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Loop counts: [106, 127, 116, 127, 123, 136, 132, 131, 130, 130, 127, 131, 130, 133, 123, 126, 133, 129, 120, 135, 114, 121, 121, 125, 127, 123, 116, 134, 122, 118, 128, 118, 128, 124, 116, 126, 130, 124, 118, 131, 124, 130, 123, 93, 115, 131, 127, 128, 132, 133, 131, 128, 132, 124, 131, 132, 130, 135, 130, 129, 128, 124, 129, 121, 129, 131, 129, 126, 128, 115, 131, 120, 122, 115, 125, 119, 130, 126, 124, 87, 126, 123, 108, 124, 130, 119, 121, 125, 119, 128, 121, 108, 90, 121, 125, 109, 104, 128, 123, 104]\n",
|
|
"\n",
|
|
"Mean +- std dev: 123.62 +- 9.040774303122493\n",
|
|
"Max: 136\n",
|
|
"Median: 126.0\n",
|
|
"Min: 87\n",
|
|
"\n",
|
|
"0th percentile: 87.0 (-30% of the mean)\n",
|
|
"5th percentile: 105.9 (-14% of the mean)\n",
|
|
"25th percentile: 121.0 (-2% of the mean)\n",
|
|
"50th percentile: 126.0 (2% of the mean)\n",
|
|
"75th percentile: 130.0 (5% of the mean)\n",
|
|
"95th percentile: 133.0 (8% of the mean)\n",
|
|
"100th percentile: 136.0 (10% of the mean)\n",
|
|
"\n",
|
|
"Small outliers (< 107.5): ['87', '90', '93', '104', '104', '106']\n",
|
|
"\n",
|
|
"Large outliers (> 143.5): []\n",
|
|
"\n",
|
|
"Total number of outliers: 6\n",
|
|
"Number of small outliers: 6\n",
|
|
"Number of large outliers: 0\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 3
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-10-14T00:26:46.184288Z",
|
|
"start_time": "2024-10-14T00:26:46.081388Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"run_reach_min_loop = [max({invocation_id: min([loop_index for loop_index in run_runtimes[invocation_id] if run_runtimes[invocation_id][loop_index] == run_min_runtimes[run_index][invocation_id]]) for invocation_id in valid_invocation_ids}.values()) for run_index, run_runtimes in enumerate(nonempty_runtime_results)]\n",
|
|
"\n",
|
|
"print(f\"Loop count to reach min runtime: {run_reach_min_loop}\")\n",
|
|
"print()\n",
|
|
"analyze_num_array(run_reach_min_loop, str)"
|
|
],
|
|
"id": "2ab96deed074385d",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Loop count to reach min runtime: [106, 127, 116, 125, 106, 135, 116, 131, 122, 107, 126, 127, 127, 131, 123, 124, 129, 122, 115, 127, 113, 112, 114, 108, 120, 114, 111, 127, 111, 113, 110, 118, 108, 77, 104, 121, 130, 116, 105, 111, 119, 127, 121, 93, 113, 117, 121, 98, 118, 110, 121, 118, 127, 114, 126, 125, 119, 129, 126, 84, 118, 122, 119, 101, 110, 114, 103, 119, 124, 108, 114, 115, 117, 103, 110, 93, 128, 119, 121, 72, 123, 90, 101, 110, 122, 111, 121, 107, 118, 126, 114, 107, 80, 99, 116, 104, 71, 101, 112, 86]\n",
|
|
"\n",
|
|
"Mean +- std dev: 113.6 +- 12.804686642007294\n",
|
|
"Max: 135\n",
|
|
"Median: 116.0\n",
|
|
"Min: 71\n",
|
|
"\n",
|
|
"0th percentile: 71.0 (-37% of the mean)\n",
|
|
"5th percentile: 85.9 (-24% of the mean)\n",
|
|
"25th percentile: 108.0 (-5% of the mean)\n",
|
|
"50th percentile: 116.0 (2% of the mean)\n",
|
|
"75th percentile: 122.0 (7% of the mean)\n",
|
|
"95th percentile: 129.0 (14% of the mean)\n",
|
|
"100th percentile: 135.0 (19% of the mean)\n",
|
|
"\n",
|
|
"Small outliers (< 87.0): ['71', '72', '77', '80', '84', '86']\n",
|
|
"\n",
|
|
"Large outliers (> 143.0): []\n",
|
|
"\n",
|
|
"Total number of outliers: 6\n",
|
|
"Number of small outliers: 6\n",
|
|
"Number of large outliers: 0\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 4
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2024-10-14T01:41:58.090916Z",
|
|
"start_time": "2024-10-14T01:41:58.081916Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# Allocated benchmarking time is 5 seconds.\n",
|
|
"time_reach_min_loop = [min_loop / loop_count * 5_000_000_000 for min_loop, loop_count in zip(run_reach_min_loop, run_loop_counts)] \n",
|
|
"print(f\"Times to reach min loop: {time_reach_min_loop}\")\n",
|
|
"print()\n",
|
|
"analyze_num_array(time_reach_min_loop, humanize_runtime)"
|
|
],
|
|
"id": "c8bc504cfcdaca6",
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Times to reach min loop: [5000000000.0, 5000000000.0, 5000000000.0, 4921259842.519685, 4308943089.430895, 4963235294.117647, 4393939393.939394, 5000000000.0, 4692307692.307693, 4115384615.3846154, 4960629921.259843, 4847328244.274809, 4884615384.615384, 4924812030.075188, 5000000000.0, 4920634920.63492, 4849624060.150376, 4728682170.542636, 4791666666.666667, 4703703703.703704, 4956140350.8771925, 4628099173.5537195, 4710743801.652892, 4320000000.0, 4724409448.818897, 4634146341.463415, 4784482758.620689, 4738805970.149254, 4549180327.868853, 4788135593.220339, 4296875000.0, 5000000000.0, 4218750000.0, 3104838709.677419, 4482758620.689655, 4801587301.587302, 5000000000.0, 4677419354.83871, 4449152542.372881, 4236641221.374046, 4798387096.774194, 4884615384.615384, 4918699186.99187, 5000000000.0, 4913043478.26087, 4465648854.961832, 4763779527.559055, 3828125000.0, 4469696969.69697, 4135338345.864661, 4618320610.687023, 4609375000.0, 4810606060.606061, 4596774193.548387, 4809160305.343512, 4734848484.848485, 4576923076.923077, 4777777777.777778, 4846153846.153846, 3255813953.4883723, 4609375000.0, 4919354838.709678, 4612403100.775193, 4173553719.0082645, 4263565891.472868, 4351145038.167939, 3992248062.015504, 4722222222.222222, 4843750000.0, 4695652173.913044, 4351145038.167939, 4791666666.666667, 4795081967.213115, 4478260869.565218, 4400000000.0, 3907563025.210084, 4923076923.076923, 4722222222.222222, 4879032258.064516, 4137931034.4827585, 4880952380.952381, 3658536585.365854, 4675925925.925926, 4435483870.967742, 4692307692.307693, 4663865546.218488, 5000000000.0, 4280000000.0, 4957983193.277311, 4921875000.0, 4710743801.652892, 4953703703.703704, 4444444444.444445, 4090909090.909091, 4640000000.0, 4770642201.834863, 3413461538.4615383, 3945312500.0, 4552845528.455284, 4134615384.6153846]\n",
|
|
"\n",
|
|
"Mean +- std dev: 4.59 seconds +- 383 milliseconds\n",
|
|
"Max: 5.00 seconds\n",
|
|
"Median: 4.71 seconds\n",
|
|
"Min: 3.10 seconds\n",
|
|
"\n",
|
|
"0th percentile: 3.10 seconds (-32% of the mean)\n",
|
|
"5th percentile: 3.90 seconds (-15% of the mean)\n",
|
|
"25th percentile: 4.43 seconds (-4% of the mean)\n",
|
|
"50th percentile: 4.71 seconds (2% of the mean)\n",
|
|
"75th percentile: 4.88 seconds (6% of the mean)\n",
|
|
"95th percentile: 5.00 seconds (9% of the mean)\n",
|
|
"100th percentile: 5.00 seconds (9% of the mean)\n",
|
|
"\n",
|
|
"Small outliers (< 3.75 seconds): ['3.10 seconds', '3.26 seconds', '3.41 seconds', '3.66 seconds']\n",
|
|
"\n",
|
|
"Large outliers (> 5.56 seconds): []\n",
|
|
"\n",
|
|
"Total number of outliers: 4\n",
|
|
"Number of small outliers: 4\n",
|
|
"Number of large outliers: 0\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 8
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 2
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython2",
|
|
"version": "2.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|