codeflash-internal/django/aiservice/ranker/ranker.py

from __future__ import annotations

import re
from typing import TYPE_CHECKING

import sentry_sdk
from aiservice.analytics.posthog import ph
from aiservice.common_utils import validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, openai_client
from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost
from log_features.log_event import update_optimization_cost
from log_features.log_features import log_features
from ninja import NinjaAPI, Schema
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam

if TYPE_CHECKING:
    from aiservice.models.aimodels import LLM
    from openai.types.chat import (
        ChatCompletionAssistantMessageParam,
        ChatCompletionFunctionMessageParam,
        ChatCompletionToolMessageParam,
    )

# from google import genai
# from pydantic import BaseModel
#
# class RankerResponseSchema(BaseModel):
#     ranking: list[int]
#     explanation: str


ranker_api = NinjaAPI(urls_namespace="ranker")
rank_regex_pattern = re.compile(r"<rank>(.*)<\/rank>", re.DOTALL | re.IGNORECASE)
explain_regex_pattern = re.compile(r"<explain>(.*)<\/explain>", re.DOTALL | re.IGNORECASE)

SYSTEM_PROMPT = """You are an expert code reviewer who understands why programs run fast.

You are provided with a list of optimization candidates with their code diff with respect to the baseline code and speedup ratio information. Your task is to rank the candidates in decreasing order of their viability as a pull request. Your goal is to improve the probability of acceptance of the optimization by an expert engineer.

You are also provided with the following information.
- python_version - The version of python the code would be executed on.
- function_references - Python markdown blocks with filename and references of some functions which call the function being optimized. The filenames and/or references could indicate if the function being optimized is in a hot path. The reference could have the function being called from a place that is important, for example in a loop, which means the effect of optimization might be important.

Rules to follow while ranking optimization candidates -
- Prefer optimizations with higher speedup ratios. If the higher speedups happen due to strange hacks or micro-optimizations or something an expert won't write then prefer it less.
- Prefer optimizations which contain precise diffs unless the speedup provided is very high. Larger pull requests are typically harder to accept then more precise smaller pull requests.
- Introduction of the `global` and `nonlocal` keywords in optimizations is **HIGHLY DISCOURAGED** as it reduces code clarity and maintainability, introduces hidden dependencies, can cause subtle bugs and breaks modularity. **DO NOT** prefer such optimizations.
- If the only optimizations are micro-optimizations like inlining a function call, or localizing variables or methods (not being used in a loop), especially with python_version older than 3.11, do not prefer the optimizations.
- The optimization candidate should not impact the code readability unless the speedup provided is very high.

Sometimes, these criteria maybe in conflict with each other. In such cases you have to remember that the goal is acceptance of the pull request, so make a judgement on what optimization candidate would be most likely to be accepted.

Please provide your response in the following format:

<rank>
Comma separated list of candidate indices in decreasing order of their viability as a pull request candidate.
</rank>
<explain>
A brief explanation of why the particular ranking was made.
</explain>
"""

USER_PROMPT = """Here is a numbered list of optimization candidates' and their speedup ratios.

{ranking_context}

Here's the python version
{python_version}

Here are the function references
{function_references}
"""


async def rank_optimizations(  # noqa: D417
    user_id: str, data: RankInputSchema, rank_model: LLM = RANKING_MODEL
) -> RankResponseSchema | RankErrorResponseSchema:
    """Optimize the given python code for performance using the Claude 4 model.

    Parameters
    ----------
    - speedups list[str]: list of speedups of optimized candidates.
    - diffs list[str]: list of diffs of optimized candidates.
    - python_version (tuple[int, int, int]): The python version to use. Default is (3,12,9).

    Returns: - List[Tuple[Union[str, None], Union[str, None]]]: A list of tuples where the first element is the
    optimized code and the second is the explanation.
    :param optimization_ids:

    """
    debug_log_sensitive_data(f"Generating a ranking for {user_id}")
    # TODO add logging instead of print(optimization_ids)
    ranking_context = ""
    for i, (diff, speedup) in enumerate(zip(data.diffs, data.speedups, strict=False)):
        ranking_context += f"{i + 1}. Diff:\n```diff\n{diff}\n```\nSpeedup: {speedup:.3f}\n"

    user_prompt = USER_PROMPT.format(
        ranking_context=ranking_context,
        python_version=data.python_version or "Not available",
        function_references=data.function_references or "Not available",
    )
    system_message = ChatCompletionSystemMessageParam(role="system", content=SYSTEM_PROMPT)
    user_message = ChatCompletionUserMessageParam(role="user", content=user_prompt)
    debug_log_sensitive_data(f"{SYSTEM_PROMPT}{user_prompt}")
    messages: list[
        ChatCompletionSystemMessageParam
        | ChatCompletionUserMessageParam
        | ChatCompletionAssistantMessageParam
        | ChatCompletionToolMessageParam
        | ChatCompletionFunctionMessageParam
    ] = [system_message, user_message]
    try:
        output = await openai_client.with_options(max_retries=2).chat.completions.create(
            model=rank_model.name, messages=messages, n=1
        )
        await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, rank_model))
    except Exception as e:
        debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")
        sentry_sdk.capture_exception(e)
        return RankErrorResponseSchema(error=str(e))
    debug_log_sensitive_data(f"AIClient optimization response:\n{output}")
    if output.usage is not None:
        ph(
            user_id,
            "aiservice-optimize-openai-usage",
            properties={"model": rank_model.name, "n": 1, "usage": output.usage.model_dump_json()},
        )
    # parse xml tag for explanation, ranking
    try:
        explanation_match = re.search(explain_regex_pattern, output.choices[0].message.content)
        explanation = explanation_match.group(1)
    except:
        # TODO add logging instead of print("No explanation found")
        explanation = ""
        # still doing stuff instead of returning coz ranking is important
    if explanation == "":
        # TODO add logging instead of print("No explanation found")
        pass
        # still doing stuff instead of returning coz ranking is important
    try:
        ranking_match = re.search(rank_regex_pattern, output.choices[0].message.content)
        # TODO better parsing, could be only comma separated, need to handle all edge cases
        ranking = list(map(int, ranking_match.group(1).strip().split(",")))
    except:
        # TODO add logging instead of print("No ranking found")
        return RankErrorResponseSchema(error="No ranking found")
    if not sorted(ranking) == list(range(1, len(data.diffs) + 1)):
        # TODO need to handle all edge cases
        # TODO add logging instead of print("Invalid ranking")
        return RankErrorResponseSchema(error="No ranking found")
    return RankResponseSchema(ranking=ranking, explanation=explanation)


class RankInputSchema(Schema):
    trace_id: str
    speedups: list[float]
    diffs: list[str]
    optimization_ids: list[str]  # which diff corresponded to which opt candidate
    python_version: str | None = None
    function_references: str | None = None


class RankResponseSchema(Schema):
    ranking: list[int]
    explanation: str


class RankErrorResponseSchema(Schema):
    error: str


@ranker_api.post("/", response={200: RankResponseSchema, 400: RankErrorResponseSchema, 500: RankErrorResponseSchema})
async def rank(request, data: RankInputSchema) -> tuple[int, RankResponseSchema | RankErrorResponseSchema]:
    ph(request.user, "aiservice-rank-called")
    if not validate_trace_id(data.trace_id):
        return 400, RankErrorResponseSchema(error="Invalid trace ID. Please provide a valid UUIDv4.")
    ranking_response = await rank_optimizations(request.user, data)
    if isinstance(ranking_response, RankErrorResponseSchema):
        ph(request.user, "Invalid Ranking, fallback to default")
        debug_log_sensitive_data("No valid ranking was generated")
        return 500, RankErrorResponseSchema(error="Error generating ranking. Internal server error.")
    ph(request.user, "ranking generated", properties={"ranking": ranking_response})
    ranking_with_0_idx = [x - 1 for x in ranking_response.ranking]
    if hasattr(request, "should_log_features") and request.should_log_features:
        ranked_opt_ids = [data.optimization_ids[i] for i in ranking_with_0_idx]
        await log_features(
            trace_id=data.trace_id,
            user_id=request.user,
            ranking={"ranking": ranked_opt_ids, "explanation": ranking_response.explanation},
        )
    response = RankResponseSchema(explanation=ranking_response.explanation, ranking=ranking_with_0_idx)
    return 200, response
wip 2025-09-03 23:26:09 +00:00			`from __future__ import annotations`

			`import re`
			`from typing import TYPE_CHECKING`

formatting changes 2025-10-23 04:20:20 +00:00			`import sentry_sdk`
wip 2025-09-03 23:26:09 +00:00			`from aiservice.analytics.posthog import ph`
			`from aiservice.common_utils import validate_trace_id`
shared instance 2025-10-31 10:07:19 +00:00			`from aiservice.env_specific import debug_log_sensitive_data, openai_client`
3.10 to 3.11 2025-11-04 08:27:58 +00:00			`from aiservice.models.aimodels import LLM, RANKING_MODEL, calculate_llm_cost`
wip 2025-09-03 23:26:09 +00:00			`from log_features.log_event import update_optimization_cost`
			`from log_features.log_features import log_features`
edit prompts 2025-11-04 05:01:22 +00:00			`from ninja import NinjaAPI, Schema`
			`from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam`
wip 2025-09-03 23:26:09 +00:00
			`if TYPE_CHECKING:`
need to finetune the prompts a bit 2025-11-04 08:18:14 +00:00			`from aiservice.models.aimodels import LLM`
wip 2025-09-03 23:26:09 +00:00			`from openai.types.chat import (`
			`ChatCompletionAssistantMessageParam,`
			`ChatCompletionFunctionMessageParam,`
			`ChatCompletionToolMessageParam,`
			`)`

wip 2025-09-04 23:10:35 +00:00			`# from google import genai`
			`# from pydantic import BaseModel`
			`#`
			`# class RankerResponseSchema(BaseModel):`
			`# ranking: list[int]`
			`# explanation: str`


Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			`ranker_api = NinjaAPI(urls_namespace="ranker")`
wip 2025-09-03 23:26:09 +00:00			`rank_regex_pattern = re.compile(r"<rank>(.*)<\/rank>", re.DOTALL \| re.IGNORECASE)`
			`explain_regex_pattern = re.compile(r"<explain>(.*)<\/explain>", re.DOTALL \| re.IGNORECASE)`

			`SYSTEM_PROMPT = """You are an expert code reviewer who understands why programs run fast.`

incorporating feedback 2025-09-10 00:05:47 +00:00			`You are provided with a list of optimization candidates with their code diff with respect to the baseline code and speedup ratio information. Your task is to rank the candidates in decreasing order of their viability as a pull request. Your goal is to improve the probability of acceptance of the optimization by an expert engineer.`
wip 2025-09-03 23:26:09 +00:00
more work needed on prompts 2025-11-04 05:21:00 +00:00			`You are also provided with the following information.`
			`- python_version - The version of python the code would be executed on.`
			`- function_references - Python markdown blocks with filename and references of some functions which call the function being optimized. The filenames and/or references could indicate if the function being optimized is in a hot path. The reference could have the function being called from a place that is important, for example in a loop, which means the effect of optimization might be important.`

incorporating feedback 2025-09-10 00:05:47 +00:00			`Rules to follow while ranking optimization candidates -`
			`- Prefer optimizations with higher speedup ratios. If the higher speedups happen due to strange hacks or micro-optimizations or something an expert won't write then prefer it less.`
			`- Prefer optimizations which contain precise diffs unless the speedup provided is very high. Larger pull requests are typically harder to accept then more precise smaller pull requests.`
need to finetune the prompts a bit 2025-11-04 08:18:14 +00:00			- Introduction of the `global` and `nonlocal` keywords in optimizations is HIGHLY DISCOURAGED as it reduces code clarity and maintainability, introduces hidden dependencies, can cause subtle bugs and breaks modularity. DO NOT prefer such optimizations.
3.10 to 3.11 2025-11-04 08:27:58 +00:00			`- If the only optimizations are micro-optimizations like inlining a function call, or localizing variables or methods (not being used in a loop), especially with python_version older than 3.11, do not prefer the optimizations.`
incorporating feedback 2025-09-10 00:05:47 +00:00			`- The optimization candidate should not impact the code readability unless the speedup provided is very high.`

			`Sometimes, these criteria maybe in conflict with each other. In such cases you have to remember that the goal is acceptance of the pull request, so make a judgement on what optimization candidate would be most likely to be accepted.`

			`Please provide your response in the following format:`
wip 2025-09-03 23:26:09 +00:00
			`<rank>`
			`Comma separated list of candidate indices in decreasing order of their viability as a pull request candidate.`
			`</rank>`
			`<explain>`
			`A brief explanation of why the particular ranking was made.`
			`</explain>`
			`"""`

incorporating feedback 2025-09-10 00:05:47 +00:00			`USER_PROMPT = """Here is a numbered list of optimization candidates' and their speedup ratios.`
wip 2025-09-03 23:26:09 +00:00
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`{ranking_context}`
more work needed on prompts 2025-11-04 05:21:00 +00:00
			`Here's the python version`
			`{python_version}`

			`Here are the function references`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`{function_references}`
wip 2025-09-03 23:26:09 +00:00			`"""`


more work needed on prompts 2025-11-04 05:21:00 +00:00			`async def rank_optimizations( # noqa: D417`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`user_id: str, data: RankInputSchema, rank_model: LLM = RANKING_MODEL`
wip 2025-09-03 23:26:09 +00:00			`) -> RankResponseSchema \| RankErrorResponseSchema:`
			`"""Optimize the given python code for performance using the Claude 4 model.`

			`Parameters`
			`----------`
			`- speedups list[str]: list of speedups of optimized candidates.`
			`- diffs list[str]: list of diffs of optimized candidates.`
			`- python_version (tuple[int, int, int]): The python version to use. Default is (3,12,9).`

			`Returns: - List[Tuple[Union[str, None], Union[str, None]]]: A list of tuples where the first element is the`
			`optimized code and the second is the explanation.`
incorporating feedback 2025-09-10 00:05:47 +00:00			`:param optimization_ids:`
wip 2025-09-03 23:26:09 +00:00
			`"""`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`debug_log_sensitive_data(f"Generating a ranking for {user_id}")`
			`# TODO add logging instead of print(optimization_ids)`
			`ranking_context = ""`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`for i, (diff, speedup) in enumerate(zip(data.diffs, data.speedups, strict=False)):`
Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			ranking_context += f"{i + 1}. Diff:\n```diff\n{diff}\n```\nSpeedup: {speedup:.3f}\n"
wip 2025-09-03 23:26:09 +00:00
needs more cleaning up 2025-11-04 06:30:44 +00:00			`user_prompt = USER_PROMPT.format(`
			`ranking_context=ranking_context,`
			`python_version=data.python_version or "Not available",`
			`function_references=data.function_references or "Not available",`
			`)`
wip 2025-09-03 23:26:09 +00:00			`system_message = ChatCompletionSystemMessageParam(role="system", content=SYSTEM_PROMPT)`
			`user_message = ChatCompletionUserMessageParam(role="user", content=user_prompt)`
			`debug_log_sensitive_data(f"{SYSTEM_PROMPT}{user_prompt}")`
			`messages: list[`
			`ChatCompletionSystemMessageParam`
			`\| ChatCompletionUserMessageParam`
			`\| ChatCompletionAssistantMessageParam`
			`\| ChatCompletionToolMessageParam`
			`\| ChatCompletionFunctionMessageParam`
			`] = [system_message, user_message]`
shared instance 2025-10-31 10:07:19 +00:00			`try:`
			`output = await openai_client.with_options(max_retries=2).chat.completions.create(`
			`model=rank_model.name, messages=messages, n=1`
			`)`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`await update_optimization_cost(trace_id=data.trace_id, cost=calculate_llm_cost(output, rank_model))`
shared instance 2025-10-31 10:07:19 +00:00			`except Exception as e:`
			`debug_log_sensitive_data(f"Failed to generate new explanation, Error message: {e}")`
			`sentry_sdk.capture_exception(e)`
			`return RankErrorResponseSchema(error=str(e))`
wip 2025-09-03 23:26:09 +00:00			`debug_log_sensitive_data(f"AIClient optimization response:\n{output}")`
			`if output.usage is not None:`
			`ph(`
			`user_id,`
			`"aiservice-optimize-openai-usage",`
wip 2025-09-04 23:10:35 +00:00			`properties={"model": rank_model.name, "n": 1, "usage": output.usage.model_dump_json()},`
wip 2025-09-03 23:26:09 +00:00			`)`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`# parse xml tag for explanation, ranking`
			`try:`
			`explanation_match = re.search(explain_regex_pattern, output.choices[0].message.content)`
			`explanation = explanation_match.group(1)`
			`except:`
			`# TODO add logging instead of print("No explanation found")`
			`explanation = ""`
			`# still doing stuff instead of returning coz ranking is important`
Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			`if explanation == "":`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`# TODO add logging instead of print("No explanation found")`
			`pass`
			`# still doing stuff instead of returning coz ranking is important`
			`try:`
			`ranking_match = re.search(rank_regex_pattern, output.choices[0].message.content)`
logging to db 2025-09-05 18:51:38 +00:00			`# TODO better parsing, could be only comma separated, need to handle all edge cases`
Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			`ranking = list(map(int, ranking_match.group(1).strip().split(",")))`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`except:`
			`# TODO add logging instead of print("No ranking found")`
			`return RankErrorResponseSchema(error="No ranking found")`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`if not sorted(ranking) == list(range(1, len(data.diffs) + 1)):`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`# TODO need to handle all edge cases`
			`# TODO add logging instead of print("Invalid ranking")`
			`return RankErrorResponseSchema(error="No ranking found")`
Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			`return RankResponseSchema(ranking=ranking, explanation=explanation)`
wip 2025-09-03 23:26:09 +00:00

wip 2025-09-04 23:10:35 +00:00			`class RankInputSchema(Schema):`
wip 2025-09-03 23:26:09 +00:00			`trace_id: str`
int to float with significance of 3 2025-09-05 02:04:32 +00:00			`speedups: list[float]`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`diffs: list[str]`
Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			`optimization_ids: list[str] # which diff corresponded to which opt candidate`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`python_version: str \| None = None`
edit prompts 2025-11-04 05:01:22 +00:00			`function_references: str \| None = None`
wip 2025-09-03 23:26:09 +00:00

wip 2025-09-04 23:10:35 +00:00			`class RankResponseSchema(Schema):`
			`ranking: list[int]`
wip 2025-09-03 23:26:09 +00:00			`explanation: str`


wip 2025-09-04 23:10:35 +00:00			`class RankErrorResponseSchema(Schema):`
wip 2025-09-03 23:26:09 +00:00			`error: str`


Ranking different Optimization candidates based on their speedup and diff (#1797) Co-authored-by: Saga4 <sarthak.saga@gmail.com> 2025-09-11 19:46:26 +00:00			`@ranker_api.post("/", response={200: RankResponseSchema, 400: RankErrorResponseSchema, 500: RankErrorResponseSchema})`
			`async def rank(request, data: RankInputSchema) -> tuple[int, RankResponseSchema \| RankErrorResponseSchema]:`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`ph(request.user, "aiservice-rank-called")`
wip 2025-09-03 23:26:09 +00:00			`if not validate_trace_id(data.trace_id):`
wip 2025-09-04 23:10:35 +00:00			`return 400, RankErrorResponseSchema(error="Invalid trace ID. Please provide a valid UUIDv4.")`
needs more cleaning up 2025-11-04 06:30:44 +00:00			`ranking_response = await rank_optimizations(request.user, data)`
todo logging message, db logging, cli 2025-09-05 00:30:48 +00:00			`if isinstance(ranking_response, RankErrorResponseSchema):`
			`ph(request.user, "Invalid Ranking, fallback to default")`
			`debug_log_sensitive_data("No valid ranking was generated")`
			`return 500, RankErrorResponseSchema(error="Error generating ranking. Internal server error.")`
			`ph(request.user, "ranking generated", properties={"ranking": ranking_response})`
modify ranking if response is not of type RankErrorResponseSchema (#1807) 2025-09-16 03:22:15 +00:00			`ranking_with_0_idx = [x - 1 for x in ranking_response.ranking]`
logging to db 2025-09-05 18:51:38 +00:00			`if hasattr(request, "should_log_features") and request.should_log_features:`
0-idx ranking calculation on internal instead of cli 2025-09-12 18:13:48 +00:00			`ranked_opt_ids = [data.optimization_ids[i] for i in ranking_with_0_idx]`
format only 2025-10-14 06:02:30 +00:00			`await log_features(`
			`trace_id=data.trace_id,`
			`user_id=request.user,`
			`ranking={"ranking": ranked_opt_ids, "explanation": ranking_response.explanation},`
			`)`
0-idx ranking calculation on internal instead of cli 2025-09-12 18:13:48 +00:00			`response = RankResponseSchema(explanation=ranking_response.explanation, ranking=ranking_with_0_idx)`
wip 2025-09-03 23:26:09 +00:00			`return 200, response`