undo structured output

This commit is contained in:
Aseem Saxena 2025-04-16 20:05:19 -07:00
parent 01062bbbb8
commit dacab4fef1
3 changed files with 24 additions and 20 deletions

View file

@ -2,7 +2,6 @@ from __future__ import annotations
import ast
import asyncio
import json
import re
import uuid
from pathlib import Path
@ -128,23 +127,28 @@ async def optimize_python_code(
results = [content for op in outputs if (content := op.choices[0].message.content)]
optimized_code_and_explanations: list[CodeAndExplanation] = []
for result in results:
json_blocks = re.findall(r"\{[^}]*\}", result)
if len(json_blocks)>1:
debug_log_sensitive_data("Ideally should just have one json block")
if len(json_blocks)==0:
debug_log_sensitive_data(f"No json block found in output:\n{result}")
if len(json_blocks)>0:
try:
json_dict = json.loads(json_blocks[0])
except json.JSONDecodeError as e:
debug_log_sensitive_data(f"Failed to parse json:\n{json_blocks[0]}")
debug_log_sensitive_data(f"Traceback: {e}")
continue
if "optimized_code" not in json_dict or "explanation" not in json_dict:
debug_log_sensitive_data(f"invalid json output from llm:\n{json_dict}")
continue
code = json_dict["optimized_code"]
explanation = json_dict["explanation"]
# json_blocks = re.findall(r"```markdown\s*([\s\S]*?)\s*```", result) + re.findall(r"```json\s*([\s\S]*?)\s*```", result)
# if len(json_blocks)>1:
# debug_log_sensitive_data("Ideally should just have one json block")
# if len(json_blocks)==0:
# debug_log_sensitive_data(f"No json block found in output:\n{result}")
# json_blocks = [result]
# if len(json_blocks)>0:
# try:
# json_dict = json.loads(json_blocks[0])
# except json.JSONDecodeError as e:
# debug_log_sensitive_data(f"Failed to parse json:\n{json_blocks[0]}\n{result}")
# debug_log_sensitive_data(f"Traceback: {e}")
# continue
# if "optimized_code" not in json_dict or "explanation" not in json_dict:
# debug_log_sensitive_data(f"invalid json output from llm:\n{json_dict}")
# continue
# code = json_dict["optimized_code"]
# explanation = json_dict["explanation"]
match = re.match(r"(.*)```python(?:\n|\\n)(.*?)```(.*)", result, re.DOTALL | re.MULTILINE)
if match:
code = match.group(2)
explanation = match.group(1) + match.group(3)
try:
cst_module = parse_module_to_cst(code)
except cst.ParserSyntaxError as e:

View file

@ -1,4 +1,4 @@
Rewrite this python program to run faster. Think step by step and explain your reasoning. Output in json format with the following keys, "optimized_code" and "explanation".
Rewrite this python program to run faster. Explain your reasoning.
```python
{source_code}
```

View file

@ -273,7 +273,7 @@ def augment_with_best_correct_speedup_ratio(df: DataFrame) -> DataFrame:
def main() -> None:
df = load_data("test_claude_aseem_sanity1")
df = load_data("test_gemini_aseem_apr16")
non_orphan_ids = remove_orphans(df)
df = df.iloc[non_orphan_ids]
#df = process_column_pairs(df, "metadata")