diff --git a/django/aiservice/aiservice/models/aimodels.py b/django/aiservice/aiservice/models/aimodels.py
index c35fb37cf..a3aae9326 100644
--- a/django/aiservice/aiservice/models/aimodels.py
+++ b/django/aiservice/aiservice/models/aimodels.py
@@ -219,3 +219,4 @@ RANKING_MODEL: LLM = _get_openai_model()
REFINEMENT_MODEL: LLM = _get_anthropic_model()
EXPLANATIONS_MODEL: LLM = _get_anthropic_model()
OPTIMIZATION_REVIEW_MODEL: LLM = _get_anthropic_model()
+CODE_REPAIR_MODEL: LLM = _get_anthropic_model()
diff --git a/django/aiservice/aiservice/urls.py b/django/aiservice/aiservice/urls.py
index ce5485f52..d1b7b468f 100644
--- a/django/aiservice/aiservice/urls.py
+++ b/django/aiservice/aiservice/urls.py
@@ -20,6 +20,7 @@ Including another URLconf
# from django.contrib import admin
from django.urls import path
+from code_repair.code_repair import code_repair_api
from explanations.explanations import explanations_api
from log_features.log_features import features_api
from optimization_review.optimization_review import optimization_review_api
@@ -39,5 +40,6 @@ urlpatterns = [
path("ai/explain", explanations_api.urls),
path("ai/rank", ranker_api.urls),
path("ai/optimization_review", optimization_review_api.urls),
+ path("ai/code_repair", code_repair_api.urls),
path("ai/workflow-gen", workflow_gen_api.urls),
]
diff --git a/django/aiservice/code_repair/CODE_REPAIR_SYSTEM_PROMPT.md b/django/aiservice/code_repair/CODE_REPAIR_SYSTEM_PROMPT.md
new file mode 100644
index 000000000..c1bf41af8
--- /dev/null
+++ b/django/aiservice/code_repair/CODE_REPAIR_SYSTEM_PROMPT.md
@@ -0,0 +1,61 @@
+You are a senior software engineer who is great at reviewing and repairing python code for performance and behavior.
+The goal of repairing code is to ensure that the optimized code is performant and has the same behavior as the original code based on the provided test results.
+You are provided the following information:
+
+- original_source_code
+- optimized_source_code - This is the optimized implementation of the original code.
+- test_details - This has the details of the behavioral differences between the original and optimized code.
+
+### Output format
+
+Request to replace sections of the optimized code in an existing file using SEARCH/REPLACE blocks that define exact changes to specific parts of the file.
+Define your output in XML-style tags like here:
+
+
+src/main.py
+
+<<<<<<< SEARCH
+a = 2
+=======
+a = 3
+>>>>>>> REPLACE
+
+
+
+Always adhere to this format for tool use to ensure proper parsing and execution.
+
+## replace_in_file
+Description: Request to replace sections of content in an existing file using SEARCH/REPLACE blocks that define exact changes to specific parts of the file. This tool should be used when you need to make targeted changes to specific parts of a file.
+Parameters:
+- path: (required) The path of the file to modify
+- diff: (required) One or more SEARCH/REPLACE blocks following this exact format:
+ ```
+ <<<<<<< SEARCH
+ [exact content to find]
+ =======
+ [new content to replace with]
+ >>>>>>> REPLACE
+ ```
+ Critical rules:
+ 1. SEARCH content must match the associated file section to find EXACTLY:
+ * Match character-for-character including whitespace, indentation, line endings
+ * Include all comments, docstrings, etc.
+ 2. SEARCH/REPLACE blocks will ONLY replace the first match occurrence.
+ * Including multiple unique SEARCH/REPLACE blocks if you need to make multiple changes.
+ * Include *just* enough lines in each SEARCH section to uniquely match each set of lines that need to change.
+ * When using multiple SEARCH/REPLACE blocks, list them in the order they appear in the file.
+ 3. Keep SEARCH/REPLACE blocks concise:
+ * Break large SEARCH/REPLACE blocks into a series of smaller blocks that each change a small portion of the file.
+ * Include just the changing lines, and a few surrounding lines if needed for uniqueness.
+ * Do not include long runs of unchanging lines in SEARCH/REPLACE blocks.
+ * Each line must be complete. Never truncate lines mid-way through as this can cause matching failures.
+ 4. Special operations:
+ * To move code: Use two SEARCH/REPLACE blocks (one to delete from original + one to insert at new location)
+ * To delete code: Use empty REPLACE section
+Usage:
+
+File path here
+
+Search and replace blocks here
+
+
diff --git a/django/aiservice/code_repair/CODE_REPAIR_USER_PROMPT.md b/django/aiservice/code_repair/CODE_REPAIR_USER_PROMPT.md
new file mode 100644
index 000000000..6ce6cfd68
--- /dev/null
+++ b/django/aiservice/code_repair/CODE_REPAIR_USER_PROMPT.md
@@ -0,0 +1,13 @@
+Please fix the optimized code to match the behaviour of the original code, while trying to keep the optimization logic intact.
+
+### The original source code:
+
+{original_source_code}
+
+### The optimized source code
+
+{modified_source_code}
+
+### The test result details
+
+{test_details}
diff --git a/django/aiservice/code_repair/__init__.py b/django/aiservice/code_repair/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/django/aiservice/code_repair/apps.py b/django/aiservice/code_repair/apps.py
new file mode 100644
index 000000000..2e2619b2c
--- /dev/null
+++ b/django/aiservice/code_repair/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class CodeRepairConfig(AppConfig):
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "code_repair"
diff --git a/django/aiservice/code_repair/code_repair.py b/django/aiservice/code_repair/code_repair.py
new file mode 100644
index 000000000..7709c294b
--- /dev/null
+++ b/django/aiservice/code_repair/code_repair.py
@@ -0,0 +1,204 @@
+from __future__ import annotations
+
+import logging
+import uuid
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import libcst as cst
+import sentry_sdk
+from ninja import NinjaAPI, Schema
+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+from pydantic import ValidationError
+
+from aiservice.analytics.posthog import ph
+from aiservice.common_utils import validate_trace_id
+from aiservice.env_specific import debug_log_sensitive_data, llm_clients
+from aiservice.models.aimodels import CODE_REPAIR_MODEL, calculate_llm_cost
+from log_features.log_event import update_optimization_cost
+from log_features.log_features import log_features
+from optimizer.models import OptimizedCandidateSource
+
+from .code_repair_context import ( # noqa: TC001 (don't move CodeRepairRequestSchema to type-checking because it's the schema definition)
+ CodeRepairContext,
+ CodeRepairContextData,
+ CodeRepairRequestSchema,
+)
+
+if TYPE_CHECKING:
+ from django.http import HttpRequest as Request
+ from openai.types.chat import (
+ ChatCompletionAssistantMessageParam,
+ ChatCompletionFunctionMessageParam,
+ ChatCompletionToolMessageParam,
+ )
+
+ from aiservice.models.aimodels import LLM
+
+code_repair_api = NinjaAPI(urls_namespace="code_repair")
+
+# Get the directory of the current file
+current_dir = Path(__file__).parent
+SYSTEM_PROMPT = (current_dir / "CODE_REPAIR_SYSTEM_PROMPT.md").read_text()
+
+USER_PROMPT = (current_dir / "CODE_REPAIR_USER_PROMPT.md").read_text()
+
+
+async def code_repair( # noqa: D417
+ user_id: str, optimization_id: str, ctx: CodeRepairContext, optimize_model: LLM = CODE_REPAIR_MODEL
+) -> CodeRepairIntermediateResponseItemschema | CodeRepairErrorResponseSchema:
+ """Repair the given candidate to match the behaviour of the original code.
+
+ Parameters
+ ----------
+ :param user_id:
+ :param optimization_id
+ :param optimize_model: LLM for getting the code_repairs
+ :param ctx: the repair context, has the data property which includes
+ - original code
+ - optimized code
+ - behaviour test diffs
+ Returns
+ -------
+ CodeRepairIntermediateResponseItemschema or CodeRepairErrorResponseSchema
+ """
+ system_prompt = ctx.get_system_prompt()
+ user_prompt = ctx.get_user_prompt()
+
+ new_op_id = str(uuid.uuid4())
+ system_message = ChatCompletionSystemMessageParam(role="system", content=system_prompt)
+ user_message = ChatCompletionUserMessageParam(role="user", content=user_prompt)
+ messages: list[
+ ChatCompletionSystemMessageParam
+ | ChatCompletionUserMessageParam
+ | ChatCompletionAssistantMessageParam
+ | ChatCompletionToolMessageParam
+ | ChatCompletionFunctionMessageParam
+ ] = [system_message, user_message]
+ debug_log_sensitive_data(f"This was the user prompt\n {user_prompt}\n")
+ llm_client = llm_clients[optimize_model.model_type]
+ try:
+ output = await llm_client.with_options(max_retries=2).chat.completions.create(
+ model=optimize_model.name, messages=messages, n=1
+ )
+ llm_cost = calculate_llm_cost(output, optimize_model)
+ except Exception as e:
+ logging.exception("Claude Code Generation error in code_repair")
+ sentry_sdk.capture_exception(e)
+ debug_log_sensitive_data(f"Failed to generate code for source:\n{ctx.data.original_source_code}")
+ return CodeRepairErrorResponseSchema(error=str(e))
+ debug_log_sensitive_data(f"ClaudeClient optimization response:\n{output.model_dump_json(indent=2)}")
+ if output.usage is not None:
+ ph(user_id, "code_repair-usage", properties={"model": optimize_model.name, "usage": output.usage.json()})
+ results = [content for op in output.choices if (content := op.message.content)] # will be of size 1
+
+ # Regex doesn't work yet in extracting everything else other than the search replace block
+ explanation = results[0]
+
+ repaired_optimization = ""
+ try:
+ diff_patches = ctx.extract_diff_patches_from_llm_res(results[0])
+ repaired_optimization = ctx.apply_patches_to_optimized_code(diff_patches)
+ except (ValueError, ValidationError) as exc:
+ sentry_sdk.capture_exception(exc)
+ debug_log_sensitive_data(f"{type(exc).__name__} for source:\n{ctx.data.modified_source_code}")
+ debug_log_sensitive_data(f"Traceback: {exc}")
+ repaired_optimization = ""
+
+ if not ctx.is_valid(repaired_optimization):
+ repaired_optimization = ""
+
+ return CodeRepairIntermediateResponseItemschema(
+ optimization_id=new_op_id,
+ parent_id=optimization_id,
+ source_code=repaired_optimization,
+ llm_cost=llm_cost,
+ explanation=explanation,
+ )
+
+
+class CodeRepairErrorResponseSchema(Schema):
+ error: str
+
+
+class CodeRepairIntermediateResponseItemschema(Schema):
+ # the key will be the optimization id and the value will be the actual refined code
+ optimization_id: str
+ parent_id: str
+ source_code: str
+ llm_cost: float
+ explanation: str
+
+
+class CodeRepairResponseItemschema(Schema):
+ # the key will be the optimization id and the value will be the actual refined code
+ optimization_id: str
+ parent_id: str
+ source_code: str
+ explanation: str
+
+
+@code_repair_api.post(
+ "/",
+ response={
+ 200: CodeRepairResponseItemschema,
+ 400: CodeRepairErrorResponseSchema,
+ 500: CodeRepairErrorResponseSchema,
+ },
+)
+async def repair(
+ request: Request, data: CodeRepairRequestSchema
+) -> tuple[int, CodeRepairResponseItemschema | CodeRepairErrorResponseSchema]:
+ ph(request.user, "aiservice-code_repair-called")
+ ctx_data = CodeRepairContextData(
+ original_source_code=data.original_source_code,
+ modified_source_code=data.modified_source_code,
+ test_diffs=data.test_diffs,
+ )
+ ctx = CodeRepairContext(ctx_data=ctx_data, base_system_prompt=SYSTEM_PROMPT, base_user_prompt=USER_PROMPT)
+ trace_id = data.trace_id
+ if not validate_trace_id(trace_id):
+ return 400, CodeRepairErrorResponseSchema(error="Invalid trace ID. Please provide a valid UUIDv4.")
+
+ code_repair_data = await code_repair(user_id=request.user, optimization_id=data.optimization_id, ctx=ctx)
+ total_llm_cost = 0.0
+ if isinstance(code_repair_data, CodeRepairErrorResponseSchema):
+ return 500, code_repair_data
+ total_llm_cost += code_repair_data.llm_cost
+ try:
+ ctx.validate_python_module()
+ except cst.ParserSyntaxError as e:
+ # log exception with sentry
+ sentry_sdk.capture_exception(e)
+ debug_log_sensitive_data(f"ParserSyntaxError for source:\n{code_repair_data.source_code}")
+ debug_log_sensitive_data(f"Traceback: {e}")
+ return 500, CodeRepairErrorResponseSchema(error=str(e))
+ except (ValueError, ValidationError) as exc:
+ # Another one bites the Pydantic validation dust
+ sentry_sdk.capture_exception(exc)
+ debug_log_sensitive_data(f"{type(exc).__name__} for source:\n{code_repair_data.source_code}")
+ debug_log_sensitive_data(f"Traceback: {exc}")
+ return 500, CodeRepairErrorResponseSchema(error=str(exc))
+
+ if hasattr(request, "should_log_features") and request.should_log_features:
+ await log_features(
+ trace_id=data.trace_id,
+ user_id=request.user,
+ optimizations_raw={code_repair_data.optimization_id: code_repair_data.source_code},
+ explanations_raw={code_repair_data.optimization_id: code_repair_data.explanation},
+ # explanations_post={cei.optimization_id: cei.explanation for cei in optimization_response_items},
+ # optimizations_post={cei.optimization_id: cei.source_code for cei in optimization_response_items},
+ optimizations_origin={
+ code_repair_data.optimization_id: {
+ "source": OptimizedCandidateSource.REPAIR,
+ "parent": code_repair_data.parent_id,
+ }
+ },
+ )
+ await update_optimization_cost(trace_id=trace_id, cost=total_llm_cost)
+ return 200, CodeRepairResponseItemschema(
+ source_code=code_repair_data.source_code,
+ optimization_id=code_repair_data.optimization_id,
+ parent_id=code_repair_data.parent_id,
+ explanation=code_repair_data.explanation,
+ )
diff --git a/django/aiservice/code_repair/code_repair_context.py b/django/aiservice/code_repair/code_repair_context.py
new file mode 100644
index 000000000..689e0b3a2
--- /dev/null
+++ b/django/aiservice/code_repair/code_repair_context.py
@@ -0,0 +1,153 @@
+import logging
+from collections import defaultdict
+from dataclasses import dataclass
+from enum import Enum
+
+import libcst as cst
+import sentry_sdk
+from aiservice.env_specific import debug_log_sensitive_data
+from ninja import Field, Schema
+from optimizer.context_utils.constants import REPLACE_IN_FILE_TAGS_REGEX
+from optimizer.context_utils.context_helpers import group_code, is_markdown_structure_changed, split_markdown_code
+from optimizer.diff_patches_utils.patches_v2 import apply_patches, group_diff_patches_by_path
+from optimizer.models import CodeAndExplanation
+from pydantic import ValidationError
+from testgen.instrumentation.edit_generated_test import parse_module_to_cst
+
+
+class TestDiffScope(str, Enum):
+ RETURN_VALUE = "return_value"
+ STDOUT = "stdout"
+ DID_PASS = "did_pass" # noqa: S105
+ TIMED_OUT = "timed_out"
+
+
+SCOPE_DESCRIPTIONS = {
+ TestDiffScope.RETURN_VALUE: (
+ "The function returned a different value in the optimized code compared to the original."
+ ),
+ TestDiffScope.STDOUT: ("The output printed to stdout is different in the optimized code compared to the original."),
+ TestDiffScope.DID_PASS: (
+ "The test passed in one version but failed in the other (a change in pass/fail behavior)."
+ ),
+}
+
+
+class TestDiff(Schema):
+ scope: TestDiffScope
+ original_value: bool | str | int | float | dict | list | None = None
+ candidate_value: bool | str | int | float | dict | list | None = None
+ original_pass: bool
+ candidate_pass: bool
+ test_src_code: str
+ candidate_pytest_error: str | None = None
+ original_pytest_error: str | None = None
+
+
+class CodeRepairRequestSchema(Schema):
+ trace_id: str
+ optimization_id: str
+ original_source_code: str
+ modified_source_code: str
+ test_diffs: list[TestDiff] = Field(..., alias="test_diffs")
+
+
+@dataclass()
+class CodeRepairContextData:
+ original_source_code: str
+ modified_source_code: str
+ test_diffs: list[TestDiff]
+
+
+class CodeRepairContext:
+ def __init__(self, ctx_data: CodeRepairContextData, base_system_prompt: str, base_user_prompt: str) -> None:
+ self.data = ctx_data
+ self.base_system_prompt = base_system_prompt
+ self.base_user_prompt = base_user_prompt
+
+ def get_system_prompt(self) -> str:
+ return self.base_system_prompt
+
+ def build_test_details(self, test_diffs: list[TestDiff]) -> str:
+ sections = defaultdict(str)
+ for diff in test_diffs:
+ try:
+ if sections[diff.test_src_code] == "":
+ # add error strings and test def only once per test function
+ sections[diff.test_src_code] += f"""Test Source:
+ ```python
+ {diff.test_src_code}
+ ```
+ Pytest error (original code): {diff.original_pytest_error if diff.original_pytest_error else ""}
+ Pytest error (optimized code): {diff.candidate_pytest_error if diff.candidate_pytest_error else ""}
+ """
+ sections[diff.test_src_code] += "\n".join(
+ [
+ f"{SCOPE_DESCRIPTIONS.get(diff.scope, diff.scope.value)}",
+ f"Expected: {diff.original_value!r}.\nGot: {diff.candidate_value!r}."
+ if diff.scope != TestDiffScope.DID_PASS
+ else "",
+ f"Original code test status: {'Passed' if diff.original_pass else 'Failed'}. Optimized code test status: {'Passed' if diff.candidate_pass else 'Failed'}",
+ "---",
+ ]
+ )
+ except Exception as e:
+ logging.exception("Some issue in parsing test diffs")
+ sentry_sdk.capture_exception(e)
+ return "\n".join(sections.values())
+
+ def get_user_prompt(self) -> str:
+ return self.base_user_prompt.format(
+ original_source_code=self.data.original_source_code,
+ modified_source_code=self.data.modified_source_code,
+ test_details=self.build_test_details(self.data.test_diffs),
+ )
+
+ def extract_diff_patches_from_llm_res(self, llm_res: str) -> str:
+ matches = REPLACE_IN_FILE_TAGS_REGEX.findall(llm_res)
+ replace_tags = ""
+ if matches and len(matches) != 0:
+ replace_tags = f"{matches[0]}"
+
+ return replace_tags
+
+ def apply_patches_to_optimized_code(self, replace_tags: str) -> str:
+ if replace_tags == "":
+ return ""
+
+ file_to_code = split_markdown_code(self.data.modified_source_code)
+ # sometimes the llm can write multiple replace tags for the same file, so we group them by path to avoid parsing & applying multiple times
+ file_to_diffs = group_diff_patches_by_path(replace_tags)
+
+ for path, diff in file_to_diffs.items():
+ scoped_code = file_to_code.get(path, None)
+ if scoped_code is None:
+ debug_log_sensitive_data(f"no scoped code for {path}, existing: {file_to_code.keys()}")
+ continue
+ new_code = apply_patches(diff, scoped_code)
+ file_to_code[path] = new_code
+ return group_code(file_to_code)
+
+ def is_valid(self, new_refined_code: str) -> bool:
+ if is_markdown_structure_changed(new_refined_code, self.data.modified_source_code):
+ return False
+ valid = True
+ for code in split_markdown_code(new_refined_code).values():
+ stripped_code = code.strip()
+ if not stripped_code:
+ valid = False
+ break
+ try:
+ parse_module_to_cst(code)
+ except cst.ParserSyntaxError:
+ valid = False
+ break
+ return valid
+
+ def validate_python_module(self) -> None:
+ for _code in split_markdown_code(self.data.modified_source_code).values():
+ try:
+ cst_module = parse_module_to_cst(_code)
+ CodeAndExplanation(cst_module, "")
+ except (ValueError, ValidationError, cst.ParserSyntaxError): # noqa: TRY203
+ raise
diff --git a/django/aiservice/log_features/log_features.py b/django/aiservice/log_features/log_features.py
index bfef9f92b..727e36c8a 100644
--- a/django/aiservice/log_features/log_features.py
+++ b/django/aiservice/log_features/log_features.py
@@ -44,6 +44,7 @@ def log_features(
experiment_metadata: dict[str, str] | None = None,
final_explanation: str | None = None,
ranking: dict[str, Any] | None = None,
+ optimizations_origin: dict[str, dict[str, str]] | None = None,
) -> None:
"""Log features of a code optimization run to the database.
@@ -95,6 +96,7 @@ def log_features(
"experiment_metadata": experiment_metadata,
"final_explanation": final_explanation,
"ranking": ranking,
+ "optimizations_origin": optimizations_origin,
},
)
@@ -148,6 +150,14 @@ def log_features(
f.ranking = f.ranking | ranking if ranking is not None else f.ranking
else:
f.ranking = ranking if ranking is not None else f.ranking
+
+ if f.optimizations_origin is not None:
+ # merge the optimizations_origin with the existing ones
+ f.optimizations_origin = merge_dicts(f.optimizations_origin, optimizations_origin or {})
+ else:
+ f.optimizations_origin = (
+ optimizations_origin if optimizations_origin is not None else f.optimizations_origin
+ )
f.speedup_ratio = speedup_ratio if speedup_ratio is not None else f.speedup_ratio
f.original_runtime = original_runtime if original_runtime is not None else f.original_runtime
f.optimized_runtime = optimized_runtime if optimized_runtime is not None else f.optimized_runtime
@@ -165,6 +175,22 @@ def log_features(
f.save()
+def merge_dicts(a: dict[str, dict[str, str]], b: dict[str, dict[str, str]]) -> dict[str, dict[str, str]]:
+ result: dict[str, dict[str, str]] = {}
+
+ for key, inner in a.items():
+ result[key] = inner.copy()
+
+ for key, inner in b.items():
+ if key not in result:
+ result[key] = inner.copy()
+ else:
+ # b overrides a
+ result[key].update(inner)
+
+ return result
+
+
@features_api.post("/", response={200: None, 500: LoggingErrorResponseSchema})
async def log_features_cli(request: HttpRequest, data: LoggingSchema) -> int | tuple[int, LoggingErrorResponseSchema]:
try:
diff --git a/django/aiservice/log_features/models.py b/django/aiservice/log_features/models.py
index 12ea6b832..b66f3ddc5 100644
--- a/django/aiservice/log_features/models.py
+++ b/django/aiservice/log_features/models.py
@@ -31,6 +31,7 @@ class OptimizationFeatures(models.Model):
experiment_metadata = models.JSONField(null=True, blank=True)
final_explanation = models.TextField(null=True, blank=True)
ranking = models.JSONField(null=True, blank=True)
+ optimizations_origin = models.JSONField(null=True, blank=True)
# PR suggestions or create Approval fields
approval_required = models.BooleanField(default=False)
diff --git a/django/aiservice/optimizer/diff_patches_utils/patches_v2.py b/django/aiservice/optimizer/diff_patches_utils/patches_v2.py
index 7988d705d..949b9ea7d 100644
--- a/django/aiservice/optimizer/diff_patches_utils/patches_v2.py
+++ b/django/aiservice/optimizer/diff_patches_utils/patches_v2.py
@@ -1,5 +1,7 @@
from pydantic import ValidationError
+from optimizer.context_utils.constants import MULTI_REPLACE_IN_FILE_TAGS_REGEX
+
class SearchReplaceBlock:
def __init__(self, search, replace):
@@ -19,36 +21,44 @@ def parse_diff(diff: str) -> list[SearchReplaceBlock]:
blocks: list[SearchReplaceBlock] = []
lines = diff.splitlines(keepends=True)
+ n = len(lines)
idx = 0
- while idx < len(lines):
- line = lines[idx].strip()
- if line == "<<<<<<< SEARCH":
- search_lines = []
+ # Precompute the "marker" strings for efficiency
+ search_marker = "<<<<<<< SEARCH"
+ delimiter_marker = "======="
+ replace_marker = ">>>>>>> REPLACE"
+
+ while idx < n:
+ line_stripped = lines[idx].strip()
+ if line_stripped.startswith(search_marker):
idx += 1
-
- while idx < len(lines) and lines[idx].strip() != "=======":
- search_lines.append(lines[idx])
+ search_start = idx
+ # Find delimiter_marker line
+ while idx < n and lines[idx].strip() != delimiter_marker:
idx += 1
+ search_end = idx
- if idx >= len(lines):
+ if idx >= n:
raise ValueError("Invalid diff format: Missing '=======' marker")
-
- replace_lines = []
idx += 1
-
- while idx < len(lines) and lines[idx].strip() != ">>>>>>> REPLACE":
- replace_lines.append(lines[idx])
+ replace_start = idx
+ while idx < n and not lines[idx].strip().startswith(replace_marker):
idx += 1
+ replace_end = idx
- if idx >= len(lines):
- raise ValueError("Invalid diff format: Missing '>>>>>>> REPLACE' marker")
+ if idx >= n:
+ raise ValueError(
+ "Invalid diff format: Missing '>>>>>>> REPLACE' marker"
+ )
- search_content = "".join(search_lines).rstrip()
- replace_content = "".join(replace_lines).rstrip()
+ search_content = "".join(lines[search_start:search_end]).rstrip()
+ replace_content = "".join(lines[replace_start:replace_end]).rstrip()
try:
- block = SearchReplaceBlock.from_block(search=search_content, replace=replace_content)
+ block = SearchReplaceBlock.from_block(
+ search=search_content, replace=replace_content
+ )
blocks.append(block)
except ValidationError as ve:
raise ValueError(f"Invalid block format: {ve}")
@@ -61,13 +71,35 @@ def parse_diff(diff: str) -> list[SearchReplaceBlock]:
return blocks
+def group_diff_patches_by_path(replace_tags_str: str) -> dict[str, str]:
+ matches = MULTI_REPLACE_IN_FILE_TAGS_REGEX.findall(replace_tags_str)
+ file_to_diffs = {}
+
+ current_file = None
+ current_diff = ""
+
+ for path, diff in matches:
+ if path != current_file:
+ if current_file:
+ file_to_diffs[current_file] = current_diff
+ current_file = path
+ current_diff = diff
+ else:
+ current_diff += diff
+
+ if current_file:
+ file_to_diffs[current_file] = current_diff
+
+ return file_to_diffs
+
+
def apply_patches(diff_str: str, content: str) -> str:
try:
patch_blocks = parse_diff(diff_str)
except ValueError:
return content
- for idx, block in enumerate(patch_blocks, 1):
+ for block in patch_blocks:
if not block.search:
if block.replace:
# a replacement block without a search, then just add the replace block
@@ -79,5 +111,7 @@ def apply_patches(diff_str: str, content: str) -> str:
start_char_idx = content.find(block.search)
if start_char_idx != -1:
end_char_idx = start_char_idx + len(block.search)
- content = f"{content[:start_char_idx]}{block.replace}{content[end_char_idx:]}"
+ content = (
+ f"{content[:start_char_idx]}{block.replace}{content[end_char_idx:]}"
+ )
return content
diff --git a/django/aiservice/optimizer/models.py b/django/aiservice/optimizer/models.py
index b9d7d724c..03cff0f79 100644
--- a/django/aiservice/optimizer/models.py
+++ b/django/aiservice/optimizer/models.py
@@ -1,9 +1,18 @@
+import enum
+
import libcst
from ninja import Schema
from pydantic import field_validator
from pydantic.dataclasses import dataclass
+class OptimizedCandidateSource(str, enum.Enum):
+ OPTIMIZE = "OPTIMIZE"
+ OPTIMIZE_LP = "OPTIMIZE_LP"
+ REFINE = "REFINE"
+ REPAIR = "REPAIR"
+
+
@dataclass(frozen=True)
class CodeAndExplanation:
cst_module: libcst.Module | None
diff --git a/django/aiservice/optimizer/optimizer.py b/django/aiservice/optimizer/optimizer.py
index c4fad9cf5..c70795b1f 100644
--- a/django/aiservice/optimizer/optimizer.py
+++ b/django/aiservice/optimizer/optimizer.py
@@ -8,6 +8,11 @@ from typing import TYPE_CHECKING
import libcst as cst
import sentry_sdk
+from ninja import NinjaAPI
+from ninja.errors import HttpError
+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+from pydantic import ValidationError
+
from aiservice.analytics.posthog import ph
from aiservice.common_utils import parse_python_version, should_hack_for_demo, validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
@@ -15,11 +20,6 @@ from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
from authapp.user import get_user_by_id
from log_features.log_event import log_optimization_event
from log_features.log_features import log_features
-from ninja import NinjaAPI
-from ninja.errors import HttpError
-from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
-from pydantic import ValidationError
-
from optimizer.context_utils.context_helpers import group_code
from optimizer.context_utils.optimizer_context import (
BaseOptimizerContext,
@@ -27,10 +27,9 @@ from optimizer.context_utils.optimizer_context import (
OptimizeResponseItemSchema,
OptimizeResponseSchema,
)
-from optimizer.models import OptimizeSchema # noqa: TC001
+from optimizer.models import OptimizedCandidateSource, OptimizeSchema # noqa: TC001
if TYPE_CHECKING:
- from aiservice.models.aimodels import LLM
from django.http import HttpRequest
from openai.types.chat import (
ChatCompletionAssistantMessageParam,
@@ -38,6 +37,8 @@ if TYPE_CHECKING:
ChatCompletionToolMessageParam,
)
+ from aiservice.models.aimodels import LLM
+
optimizations_json = [
{
@@ -345,7 +346,7 @@ async def optimize(
},
explanations_post={cei.optimization_id: cei.explanation for cei in optimization_response_items},
experiment_metadata=data.experiment_metadata if data.experiment_metadata else None,
- # request=request,
+ optimizations_origin={cei.optimization_id: {"source": OptimizedCandidateSource.OPTIMIZE, "parent": None} for cei in optimization_response_items},
)
)
diff --git a/django/aiservice/optimizer/optimizer_line_profiler.py b/django/aiservice/optimizer/optimizer_line_profiler.py
index 0df6b4994..a24aafbad 100644
--- a/django/aiservice/optimizer/optimizer_line_profiler.py
+++ b/django/aiservice/optimizer/optimizer_line_profiler.py
@@ -5,29 +5,30 @@ from pathlib import Path
from typing import TYPE_CHECKING
import sentry_sdk
+from ninja import NinjaAPI, Schema
+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+
from aiservice.analytics.posthog import ph
from aiservice.common_utils import parse_python_version, validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, debug_log_sensitive_data_from_callable, llm_clients
from aiservice.models.aimodels import OPTIMIZE_MODEL, calculate_llm_cost
from log_features.log_event import update_optimization_cost
from log_features.log_features import log_features
-from ninja import NinjaAPI, Schema
-from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
-
from optimizer.context_utils.optimizer_context import (
BaseOptimizerContext,
OptimizeErrorResponseSchema,
OptimizeResponseSchema,
)
+from optimizer.models import OptimizedCandidateSource
if TYPE_CHECKING:
- from aiservice.models.aimodels import LLM
from openai.types.chat import (
ChatCompletionAssistantMessageParam,
ChatCompletionFunctionMessageParam,
ChatCompletionToolMessageParam,
)
+ from aiservice.models.aimodels import LLM
from optimizer.context_utils.optimizer_context import OptimizeResponseItemSchema
@@ -190,6 +191,8 @@ async def optimize(request, data: OptimizeSchemaLP) -> tuple[int, OptimizeRespon
},
explanations_post={cei.optimization_id: cei.explanation for cei in optimization_response_items},
experiment_metadata=data.experiment_metadata if data.experiment_metadata else None,
+ optimizations_origin={cei.optimization_id: {"source": OptimizedCandidateSource.OPTIMIZE_LP, "parent": None} for cei in optimization_response_items},
+
)
response = OptimizeResponseSchema(optimizations=optimization_response_items)
diff --git a/django/aiservice/optimizer/refinement.py b/django/aiservice/optimizer/refinement.py
index eaf643426..b46606432 100644
--- a/django/aiservice/optimizer/refinement.py
+++ b/django/aiservice/optimizer/refinement.py
@@ -2,31 +2,34 @@ from __future__ import annotations
import asyncio
import logging
+import uuid
from pathlib import Path
from typing import TYPE_CHECKING
import libcst as cst
import sentry_sdk
+from ninja import NinjaAPI, Schema
+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+from pydantic import ValidationError
+
from aiservice.analytics.posthog import ph
from aiservice.common_utils import validate_trace_id
from aiservice.env_specific import debug_log_sensitive_data, llm_clients
from aiservice.models.aimodels import REFINEMENT_MODEL, calculate_llm_cost
from log_features.log_event import update_optimization_cost
from log_features.log_features import log_features
-from ninja import NinjaAPI, Schema
-from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
-from pydantic import ValidationError
-
from optimizer.context_utils.refiner_context import BaseRefinerContext, RefinementContextData
+from optimizer.models import OptimizedCandidateSource
if TYPE_CHECKING:
- from aiservice.models.aimodels import LLM
from openai.types.chat import (
ChatCompletionAssistantMessageParam,
ChatCompletionFunctionMessageParam,
ChatCompletionToolMessageParam,
)
+ from aiservice.models.aimodels import LLM
+
refinement_api = NinjaAPI(urls_namespace="refinement")
@@ -260,7 +263,8 @@ async def refinement( # noqa: D417
refined_optimization = ""
return RefinementIntermediateResponseItemschema(
- optimization_id=optimization_id,
+ parent_id=optimization_id,
+ optimization_id=str(uuid.uuid4()),
source_code=refined_optimization,
explanation=refined_explanation,
original_explanation=ctx.data.optimized_explanation,
@@ -291,6 +295,7 @@ class OptimizeErrorResponseSchema(Schema):
class RefinementIntermediateResponseItemschema(Schema):
# the key will be the optimization id and the value will be the actual refined code
explanation: str
+ parent_id: str
optimization_id: str
source_code: str
original_explanation: str
@@ -301,6 +306,7 @@ class RefinementResponseItemschema(Schema):
# the key will be the optimization id and the value will be the actual refined code
explanation: str
optimization_id: str
+ parent_id: str
source_code: str
@@ -374,27 +380,30 @@ async def refine(
trace_id=trace_id,
user_id=request.user,
optimizations_raw={
- cei.optimization_id[:-4] + "refi": cei.source_code
+ cei.optimization_id: cei.source_code
for cei in refinement_data
if not isinstance(cei, OptimizeErrorResponseSchema)
},
- optimizations_post={
- cei.optimization_id[:-4] + "refi": cei.source_code for cei in filtered_refined_optimizations
- },
+ optimizations_post={cei.optimization_id: cei.source_code for cei in filtered_refined_optimizations},
explanations_raw={
- cei.optimization_id[:-4] + "refi": cei.explanation
+ cei.optimization_id: cei.explanation
for cei in refinement_data
if not isinstance(cei, OptimizeErrorResponseSchema)
},
- explanations_post={
- cei.optimization_id[:-4] + "refi": cei.explanation for cei in filtered_refined_optimizations
+ explanations_post={cei.optimization_id: cei.explanation for cei in filtered_refined_optimizations},
+ optimizations_origin={
+ cei.optimization_id: {"source": OptimizedCandidateSource.REFINE, "parent": cei.parent_id}
+ for cei in filtered_refined_optimizations
},
)
await update_optimization_cost(trace_id=trace_id, cost=total_llm_cost)
return 200, Refinementschema(
refinements=[
RefinementResponseItemschema(
- source_code=x.source_code, explanation=x.original_explanation, optimization_id=x.optimization_id
+ source_code=x.source_code,
+ explanation=x.original_explanation,
+ optimization_id=x.optimization_id,
+ parent_id=x.parent_id,
)
for x in filtered_refined_optimizations
]
diff --git a/django/aiservice/testgen/instrumentation/edit_generated_test.py b/django/aiservice/testgen/instrumentation/edit_generated_test.py
index acdb61549..0f3246be9 100644
--- a/django/aiservice/testgen/instrumentation/edit_generated_test.py
+++ b/django/aiservice/testgen/instrumentation/edit_generated_test.py
@@ -3,6 +3,7 @@ from __future__ import annotations
from typing import TYPE_CHECKING, cast
from libcst import CSTTransformer, ImportAlias, ImportFrom, MetadataWrapper, Name, parse_expression, parse_module
+from functools import lru_cache
if TYPE_CHECKING:
from libcst import (
@@ -19,6 +20,7 @@ if TYPE_CHECKING:
from aiservice.models.functions_to_optimize import FunctionToOptimize
+@lru_cache(maxsize=128)
def parse_module_to_cst(module_str: str) -> Module:
"""Parse a module string into its libCST representation.
diff --git a/django/aiservice/tests/optimizer/test_code_repair.py b/django/aiservice/tests/optimizer/test_code_repair.py
new file mode 100644
index 000000000..889ce7170
--- /dev/null
+++ b/django/aiservice/tests/optimizer/test_code_repair.py
@@ -0,0 +1,389 @@
+
+
+from code_repair.code_repair_context import CodeRepairContext, CodeRepairContextData
+from optimizer.diff_patches_utils.patches_v2 import apply_patches
+
+
+def test_code_repair_single_file():
+
+ original_code = """```python:demo.py
+import math
+from typing import List, Tuple, Optional
+
+def calculate_portfolio_metrics(
+ investments: List[Tuple[str, float, float]],
+ risk_free_rate: float = 0.02
+) -> dict:
+ if not investments:
+ raise ValueError("Investments list cannot be empty")
+
+ if abs(sum(weight for _, weight, _ in investments) - 1.0) > 1e-10:
+ raise ValueError("Portfolio weights must sum to 1.0")
+
+ # Calculate weighted return
+ weighted_return = sum(weight * ret for _, weight, ret in investments)
+
+ # Calculate portfolio volatility (simplified)
+ volatility = math.sqrt(sum((weight * ret) ** 2 for _, weight, ret in investments))
+
+ # Calculate Sharpe ratio
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ # Find best and worst performing assets
+ best_asset = max(investments, key=lambda x: x[2])
+ worst_asset = min(investments, key=lambda x: x[2])
+
+ return {
+ 'weighted_return': round(weighted_return, 6),
+ 'volatility': round(volatility, 6),
+ 'sharpe_ratio': round(sharpe_ratio, 6),
+ 'best_performing': (best_asset[0], round(best_asset[2], 6)),
+ 'worst_performing': (worst_asset[0], round(worst_asset[2], 6)),
+ 'total_assets': len(investments)
+ }
+```
+"""
+ optimized_code = """```python:demo.py
+import math
+from typing import List, Tuple, Optional
+
+def calculate_portfolio_metrics(
+ investments: List[Tuple[str, float, float]],
+ risk_free_rate: float = 0.02
+) -> dict:
+ if not investments:
+ raise ValueError("Investments list cannot be empty")
+
+ total_weight = sum(w for _, w, _ in investments)
+ if total_weight != 1.0: # Should use tolerance check
+ raise ValueError("Portfolio weights must sum to 1.0")
+
+ weighted_return = 1.0
+ for _, weight, ret in investments:
+ weighted_return *= (1 + ret) ** weight
+ weighted_return = weighted_return - 1.0 # Convert back from geometric
+
+ returns = [r for _, _, r in investments]
+ mean_return = sum(returns) / len(returns)
+ volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
+
+ # BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ def risk_adjusted_return(return_val, weight):
+ return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
+
+ best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+ worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+
+ return {
+ "weighted_return": round(weighted_return, 6),
+ "volatility": 2,
+ "sharpe_ratio": round(sharpe_ratio, 6),
+ "best_performing": (best_asset[0], round(best_asset[2], 6)),
+ "worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
+ "total_assets": len(investments),
+ }
+```
+"""
+
+
+ llm_response = """Looking at the test failure and comparing the original and modified code, I can see several issues in the modified implementation:
+
+1. **Hardcoded volatility**: The volatility is hardcoded to `2` instead of being calculated
+2. **Incorrect weight validation**: Missing tolerance check for weight sum validation
+3. **Wrong volatility calculation**: Using standard deviation of returns instead of weighted portfolio volatility
+4. **Wrong weighted return calculation**: Using geometric mean instead of arithmetic weighted return
+5. **Wrong asset selection criteria**: Using risk-adjusted return instead of simple return
+
+Let me fix these issues:
+
+
+demo.py
+
+<<<<<<< SEARCH
+ total_weight = sum(w for _, w, _ in investments)
+ if total_weight != 1.0: # Should use tolerance check
+ raise ValueError("Portfolio weights must sum to 1.0")
+=======
+ if abs(sum(weight for _, weight, _ in investments) - 1.0) > 1e-10:
+ raise ValueError("Portfolio weights must sum to 1.0")
+>>>>>>> REPLACE
+
+
+
+
+demo.py
+
+<<<<<<< SEARCH
+ weighted_return = 1.0
+ for _, weight, ret in investments:
+ weighted_return *= (1 + ret) ** weight
+ weighted_return = weighted_return - 1.0 # Convert back from geometric
+=======
+ # Calculate weighted return
+ weighted_return = sum(weight * ret for _, weight, ret in investments)
+>>>>>>> REPLACE
+
+
+
+
+demo.py
+
+<<<<<<< SEARCH
+ returns = [r for _, _, r in investments]
+ mean_return = sum(returns) / len(returns)
+ volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
+=======
+ # Calculate portfolio volatility (simplified)
+ volatility = math.sqrt(sum((weight * ret) ** 2 for _, weight, ret in investments))
+>>>>>>> REPLACE
+
+
+
+
+demo.py
+
+<<<<<<< SEARCH
+ # BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ def risk_adjusted_return(return_val, weight):
+ return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
+
+ best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+ worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+=======
+ # Calculate Sharpe ratio
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ # Find best and worst performing assets
+ best_asset = max(investments, key=lambda x: x[2])
+ worst_asset = min(investments, key=lambda x: x[2])
+>>>>>>> REPLACE
+
+
+
+
+demo.py
+
+<<<<<<< SEARCH
+ return {
+ "weighted_return": round(weighted_return, 6),
+ "volatility": 2,
+ "sharpe_ratio": round(sharpe_ratio, 6),
+ "best_performing": (best_asset[0], round(best_asset[2], 6)),
+ "worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
+ "total_assets": len(investments),
+ }
+=======
+ return {
+ 'weighted_return': round(weighted_return, 6),
+ 'volatility': round(volatility, 6),
+ 'sharpe_ratio': round(sharpe_ratio, 6),
+ 'best_performing': (best_asset[0], round(best_asset[2], 6)),
+ 'worst_performing': (worst_asset[0], round(worst_asset[2], 6)),
+ 'total_assets': len(investments)
+ }
+>>>>>>> REPLACE
+
+
+
+The key changes I made:
+
+1. **Fixed weight validation**: Restored the tolerance-based check using `abs(sum(...) - 1.0) > 1e-10` instead of exact equality
+2. **Fixed weighted return calculation**: Changed from geometric mean back to simple weighted arithmetic mean
+3. **Fixed volatility calculation**: Restored the original simplified portfolio volatility formula using weighted returns
+4. **Fixed volatility return value**: Removed the hardcoded `2` and properly calculated and rounded the volatility
+5. **Fixed asset selection**: Restored the original logic to find best/worst assets based on simple returns rather than risk-adjusted returns
+6. **Fixed dictionary formatting**: Changed from double quotes to single quotes to match original formatting
+
+These changes align the modified code with the original implementation's behavior, ensuring that the test for zero volatility passes (when cash investment has 0% return, the volatility should indeed be 0.0)."""
+ ctx = CodeRepairContext(CodeRepairContextData(original_code, optimized_code, ""), "" , "")
+ diff_patches = ctx.extract_diff_patches_from_llm_res(llm_response)
+ refined_optimization = ctx.apply_patches_to_optimized_code(diff_patches)
+
+ print(refined_optimization)
+ assert ctx.is_valid(refined_optimization)
+
+
+"""
+```python:demo.py
+import math
+from typing import List, Tuple, Optional
+
+def calculate_portfolio_metrics(
+ investments: List[Tuple[str, float, float]],
+ risk_free_rate: float = 0.02
+) -> dict:
+ if not investments:
+ raise ValueError("Investments list cannot be empty")
+
+ total_weight = sum(w for _, w, _ in investments)
+ if total_weight != 1.0: # Should use tolerance check
+ raise ValueError("Portfolio weights must sum to 1.0")
+
+ weighted_return = 1.0
+ for _, weight, ret in investments:
+ weighted_return *= (1 + ret) ** weight
+ weighted_return = weighted_return - 1.0 # Convert back from geometric
+
+ returns = [r for _, _, r in investments]
+ mean_return = sum(returns) / len(returns)
+ volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
+
+ # BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ def risk_adjusted_return(return_val, weight):
+ return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
+
+ best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+ worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+
+ return {
+ "weighted_return": round(weighted_return, 6),
+ "volatility": 2,
+ "sharpe_ratio": round(sharpe_ratio, 6),
+ "best_performing": (best_asset[0], round(best_asset[2], 6)),
+ "worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
+ "total_assets": len(investments),
+ }
+```
+"""
+
+
+def test_patch_apply():
+ patch = """<<<<<<< SEARCH
+ total_weight = sum(w for _, w, _ in investments)
+ if total_weight != 1.0: # Should use tolerance check
+ raise ValueError("Portfolio weights must sum to 1.0")
+=======
+ if abs(sum(weight for _, weight, _ in investments) - 1.0) > 1e-10:
+ raise ValueError("Portfolio weights must sum to 1.0")
+>>>>>>> REPLACE
+
+<<<<<<< SEARCH
+ weighted_return = 1.0
+ for _, weight, ret in investments:
+ weighted_return *= (1 + ret) ** weight
+ weighted_return = weighted_return - 1.0 # Convert back from geometric
+=======
+ # Calculate weighted return
+ weighted_return = sum(weight * ret for _, weight, ret in investments)
+>>>>>>> REPLACE
+
+<<<<<<< SEARCH
+ returns = [r for _, _, r in investments]
+ mean_return = sum(returns) / len(returns)
+ volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
+=======
+ # Calculate portfolio volatility (simplified)
+ volatility = math.sqrt(sum((weight * ret) ** 2 for _, weight, ret in investments))
+>>>>>>> REPLACE
+
+<<<<<<< SEARCH
+ # BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ def risk_adjusted_return(return_val, weight):
+ return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
+
+ best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+ worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+=======
+ # Calculate Sharpe ratio
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ # Find best and worst performing assets
+ best_asset = max(investments, key=lambda x: x[2])
+ worst_asset = min(investments, key=lambda x: x[2])
+>>>>>>> REPLACE
+
+<<<<<<< SEARCH
+ return {
+ "weighted_return": round(weighted_return, 6),
+ "volatility": 2,
+ "sharpe_ratio": round(sharpe_ratio, 6),
+ "best_performing": (best_asset[0], round(best_asset[2], 6)),
+ "worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
+ "total_assets": len(investments),
+ }
+=======
+ return {
+ 'weighted_return': round(weighted_return, 6),
+ 'volatility': round(volatility, 6),
+ 'sharpe_ratio': round(sharpe_ratio, 6),
+ 'best_performing': (best_asset[0], round(best_asset[2], 6)),
+ 'worst_performing': (worst_asset[0], round(worst_asset[2], 6)),
+ 'total_assets': len(investments)
+ }
+>>>>>>> REPLACE
+"""
+ code = """import math
+from typing import List, Tuple, Optional
+
+def calculate_portfolio_metrics(
+ investments: List[Tuple[str, float, float]],
+ risk_free_rate: float = 0.02
+) -> dict:
+ if not investments:
+ raise ValueError("Investments list cannot be empty")
+
+ total_weight = sum(w for _, w, _ in investments)
+ if total_weight != 1.0: # Should use tolerance check
+ raise ValueError("Portfolio weights must sum to 1.0")
+
+ weighted_return = 1.0
+ for _, weight, ret in investments:
+ weighted_return *= (1 + ret) ** weight
+ weighted_return = weighted_return - 1.0 # Convert back from geometric
+
+ returns = [r for _, _, r in investments]
+ mean_return = sum(returns) / len(returns)
+ volatility = math.sqrt(sum((r - mean_return) ** 2 for r in returns) / len(returns))
+
+ # BUG 4: Sharpe ratio calculation is correct but uses wrong inputs
+ if volatility == 0:
+ sharpe_ratio = 0.0
+ else:
+ sharpe_ratio = (weighted_return - risk_free_rate) / volatility
+
+ def risk_adjusted_return(return_val, weight):
+ return (return_val - risk_free_rate) / (weight * return_val) if weight * return_val != 0 else return_val
+
+ best_asset = max(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+ worst_asset = min(investments, key=lambda x: risk_adjusted_return(x[2], x[1]))
+
+ return {
+ "weighted_return": round(weighted_return, 6),
+ "volatility": 2,
+ "sharpe_ratio": round(sharpe_ratio, 6),
+ "best_performing": (best_asset[0], round(best_asset[2], 6)),
+ "worst_performing": (worst_asset[0], round(worst_asset[2], 6)),
+ "total_assets": len(investments),
+ }
+"""
+ new_code = apply_patches(patch, code)
+ print(new_code)
\ No newline at end of file
diff --git a/experiments/code_repair_dashboard.html b/experiments/code_repair_dashboard.html
new file mode 100644
index 000000000..e6112b3df
--- /dev/null
+++ b/experiments/code_repair_dashboard.html
@@ -0,0 +1,475 @@
+
+
+
+
+
+ Code Repair Logs Dashboard
+
+
+
+ Code Repair Logs Dashboard
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/experiments/generate_dashboard.py b/experiments/generate_dashboard.py
new file mode 100644
index 000000000..e7d9119a3
--- /dev/null
+++ b/experiments/generate_dashboard.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""Generate an HTML dashboard from code_repair_logs SQLite database."""
+
+import json
+import sqlite3
+import webbrowser
+from pathlib import Path
+
+
+def main():
+ db_path = Path(__file__).parent / "code_repair_log.db"
+ cf_db_path = Path(__file__).parent / "code_repair_logs_cf.db"
+ template_path = Path(__file__).parent / "code_repair_dashboard.html"
+ output_path = Path(__file__).parent / "code_repair_dashboard_live.html"
+
+ # Connect to main database and fetch all logs
+ conn = sqlite3.connect(db_path)
+ conn.row_factory = sqlite3.Row
+ cursor = conn.cursor()
+
+ cursor.execute("""
+ SELECT optimization_id, trace_id, user_prompt, explanation,
+ refined_optimization, created_at, updated_at
+ FROM code_repair_logs
+ ORDER BY created_at DESC
+ """)
+
+ rows = cursor.fetchall()
+ conn.close()
+
+ # Connect to cf database and fetch passed/faster status
+ cf_conn = sqlite3.connect(cf_db_path)
+ cf_conn.row_factory = sqlite3.Row
+ cf_cursor = cf_conn.cursor()
+
+ cf_cursor.execute("""
+ SELECT optimization_id, passed, faster
+ FROM code_repair_logs_cf
+ """)
+
+ cf_rows = cf_cursor.fetchall()
+ cf_conn.close()
+
+ # Create lookup dict for cf data
+ cf_data = {row["optimization_id"]: {"passed": row["passed"], "faster": row["faster"]} for row in cf_rows}
+
+ # Convert to list of dicts and merge cf data
+ data = []
+ for row in rows:
+ d = dict(row)
+ opt_id = d["optimization_id"][:-4] + "cdrp"
+ if opt_id in cf_data:
+ d["passed"] = cf_data[opt_id]["passed"]
+ d["faster"] = cf_data[opt_id]["faster"]
+ else:
+ d["passed"] = None
+ d["faster"] = None
+ data.append(d)
+
+ # Read template
+ template = template_path.read_text()
+
+ # Replace placeholder with actual data
+ json_data = json.dumps(data, default=str, indent=2)
+ html_content = template.replace("DATA_PLACEHOLDER", json_data)
+
+ # Write output
+ output_path.write_text(html_content)
+
+ print(f"Dashboard generated: {output_path}")
+ print(f"Total logs: {len(data)}")
+ print(f"Unique traces: {len(set(d['trace_id'] for d in data))}")
+
+ # Open in browser
+ webbrowser.open(f"file://{output_path.absolute()}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/js/VSC-Extension/packages/sidebar-webview/src/components/codeBlock/collapsibleCodeBlock.tsx b/js/VSC-Extension/packages/sidebar-webview/src/components/codeBlock/collapsibleCodeBlock.tsx
index bad91af04..a8a3ac424 100644
--- a/js/VSC-Extension/packages/sidebar-webview/src/components/codeBlock/collapsibleCodeBlock.tsx
+++ b/js/VSC-Extension/packages/sidebar-webview/src/components/codeBlock/collapsibleCodeBlock.tsx
@@ -39,7 +39,7 @@ const CollapsibleCodeBlock = memo(
setIsExpanded(false);
}
},
- [isExpanded]
+ [isExpanded],
);
if (!hasMoreLines) {
diff --git a/js/VSC-Extension/packages/sidebar-webview/src/components/tabs.tsx b/js/VSC-Extension/packages/sidebar-webview/src/components/tabs.tsx
index ed024a4f2..ae19a8afb 100644
--- a/js/VSC-Extension/packages/sidebar-webview/src/components/tabs.tsx
+++ b/js/VSC-Extension/packages/sidebar-webview/src/components/tabs.tsx
@@ -23,7 +23,9 @@ const Tabs = () => {
>
Tasks
- {tasksCount > 0 && {tasksCount}}
+ {tasksCount > 0 && (
+ {tasksCount}
+ )}
diff --git a/js/VSC-Extension/packages/sidebar-webview/src/initView/script.js b/js/VSC-Extension/packages/sidebar-webview/src/initView/script.js
index 15f9e245b..23ed75cf0 100644
--- a/js/VSC-Extension/packages/sidebar-webview/src/initView/script.js
+++ b/js/VSC-Extension/packages/sidebar-webview/src/initView/script.js
@@ -180,7 +180,12 @@ function renderStepsUI(steps) {
// Show "select different interpreter" option after install button
if (vscodeActionCommand) {
- const { title, btnText: vscodeBtnText, command: vscodeCmd, args = [] } = vscodeActionCommand;
+ const {
+ title,
+ btnText: vscodeBtnText,
+ command: vscodeCmd,
+ args = [],
+ } = vscodeActionCommand;
if (title) {
const detailsElem = document.createElement("p");
detailsElem.className = "step-action";
@@ -192,7 +197,11 @@ function renderStepsUI(steps) {
actionBtn.textContent = vscodeBtnText;
actionBtn.className = "step-action-btn secondary-btn";
actionBtn.addEventListener("click", () => {
- vscode.postMessage({ command: "vscodeCommand", cmd: vscodeCmd, args });
+ vscode.postMessage({
+ command: "vscodeCommand",
+ cmd: vscodeCmd,
+ args,
+ });
});
actionsContainer.appendChild(actionBtn);
}
diff --git a/js/cf-api/github/secret-utils.ts b/js/cf-api/github/secret-utils.ts
index f6a15b10b..0463e428a 100644
--- a/js/cf-api/github/secret-utils.ts
+++ b/js/cf-api/github/secret-utils.ts
@@ -56,9 +56,7 @@ export async function getRepositoryPublicKey(
repo: string,
): Promise<{ public_key: string; key_id: string }> {
try {
- console.log(
- `[secret-utils.ts:getRepositoryPublicKey] Getting public key for ${owner}/${repo}`,
- )
+ console.log(`[secret-utils.ts:getRepositoryPublicKey] Getting public key for ${owner}/${repo}`)
const response = await octokit.rest.actions.getRepoPublicKey({
owner,
@@ -166,4 +164,3 @@ export async function encryptAndStoreSecret(
`[secret-utils.ts:encryptAndStoreSecret] Successfully encrypted and stored secret ${secretName} for ${owner}/${repo}`,
)
}
-
diff --git a/js/common/prisma/migrations/20251210150302_add_optimizations_origin_column/migration.sql b/js/common/prisma/migrations/20251210150302_add_optimizations_origin_column/migration.sql
new file mode 100644
index 000000000..355f697bb
--- /dev/null
+++ b/js/common/prisma/migrations/20251210150302_add_optimizations_origin_column/migration.sql
@@ -0,0 +1,2 @@
+-- AlterTable
+ALTER TABLE "public"."optimization_features" ADD COLUMN "optimizations_origin" JSONB;
diff --git a/js/common/prisma/schema.prisma b/js/common/prisma/schema.prisma
index 724c019e0..a7019efa6 100644
--- a/js/common/prisma/schema.prisma
+++ b/js/common/prisma/schema.prisma
@@ -73,6 +73,7 @@ model optimization_features {
organization String?
repository String?
ranking Json?
+ optimizations_origin Json?
review_quality String? // Hight, Med, low
review_explanation String?
calling_fn_details String?