undo structured output
This commit is contained in:
parent
01062bbbb8
commit
dacab4fef1
3 changed files with 24 additions and 20 deletions
|
|
@ -2,7 +2,6 @@ from __future__ import annotations
|
|||
|
||||
import ast
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
|
@ -128,23 +127,28 @@ async def optimize_python_code(
|
|||
results = [content for op in outputs if (content := op.choices[0].message.content)]
|
||||
optimized_code_and_explanations: list[CodeAndExplanation] = []
|
||||
for result in results:
|
||||
json_blocks = re.findall(r"\{[^}]*\}", result)
|
||||
if len(json_blocks)>1:
|
||||
debug_log_sensitive_data("Ideally should just have one json block")
|
||||
if len(json_blocks)==0:
|
||||
debug_log_sensitive_data(f"No json block found in output:\n{result}")
|
||||
if len(json_blocks)>0:
|
||||
try:
|
||||
json_dict = json.loads(json_blocks[0])
|
||||
except json.JSONDecodeError as e:
|
||||
debug_log_sensitive_data(f"Failed to parse json:\n{json_blocks[0]}")
|
||||
debug_log_sensitive_data(f"Traceback: {e}")
|
||||
continue
|
||||
if "optimized_code" not in json_dict or "explanation" not in json_dict:
|
||||
debug_log_sensitive_data(f"invalid json output from llm:\n{json_dict}")
|
||||
continue
|
||||
code = json_dict["optimized_code"]
|
||||
explanation = json_dict["explanation"]
|
||||
# json_blocks = re.findall(r"```markdown\s*([\s\S]*?)\s*```", result) + re.findall(r"```json\s*([\s\S]*?)\s*```", result)
|
||||
# if len(json_blocks)>1:
|
||||
# debug_log_sensitive_data("Ideally should just have one json block")
|
||||
# if len(json_blocks)==0:
|
||||
# debug_log_sensitive_data(f"No json block found in output:\n{result}")
|
||||
# json_blocks = [result]
|
||||
# if len(json_blocks)>0:
|
||||
# try:
|
||||
# json_dict = json.loads(json_blocks[0])
|
||||
# except json.JSONDecodeError as e:
|
||||
# debug_log_sensitive_data(f"Failed to parse json:\n{json_blocks[0]}\n{result}")
|
||||
# debug_log_sensitive_data(f"Traceback: {e}")
|
||||
# continue
|
||||
# if "optimized_code" not in json_dict or "explanation" not in json_dict:
|
||||
# debug_log_sensitive_data(f"invalid json output from llm:\n{json_dict}")
|
||||
# continue
|
||||
# code = json_dict["optimized_code"]
|
||||
# explanation = json_dict["explanation"]
|
||||
match = re.match(r"(.*)```python(?:\n|\\n)(.*?)```(.*)", result, re.DOTALL | re.MULTILINE)
|
||||
if match:
|
||||
code = match.group(2)
|
||||
explanation = match.group(1) + match.group(3)
|
||||
try:
|
||||
cst_module = parse_module_to_cst(code)
|
||||
except cst.ParserSyntaxError as e:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
Rewrite this python program to run faster. Think step by step and explain your reasoning. Output in json format with the following keys, "optimized_code" and "explanation".
|
||||
Rewrite this python program to run faster. Explain your reasoning.
|
||||
```python
|
||||
{source_code}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -273,7 +273,7 @@ def augment_with_best_correct_speedup_ratio(df: DataFrame) -> DataFrame:
|
|||
|
||||
|
||||
def main() -> None:
|
||||
df = load_data("test_claude_aseem_sanity1")
|
||||
df = load_data("test_gemini_aseem_apr16")
|
||||
non_orphan_ids = remove_orphans(df)
|
||||
df = df.iloc[non_orphan_ids]
|
||||
#df = process_column_pairs(df, "metadata")
|
||||
|
|
|
|||
Loading…
Reference in a new issue