Add explanations in the candidate files, and documentation on how to use the script

This commit is contained in:
ihitamandal 2024-06-21 14:25:54 -07:00
parent 1e2ed78a08
commit 242f0c1e7f

View file

@ -21,12 +21,16 @@ def extract_json_values(json_column: Dict[str, Any]) -> Dict[str, Any]:
def write_to_file(filename: str, content: str) -> None:
with open(filename, "w") as file:
if isinstance(content, list):
content = '\n'.join(content)
content = "\n".join(content)
file.write(str(content))
def main(trace_id: str) -> None:
query = "SELECT original_code, optimizations_post, speedup_ratio, generated_test FROM optimization_features WHERE trace_id = %s"
def main(trace_id: str) -> None:
query = "SELECT original_code, optimizations_post, speedup_ratio, generated_test, explanations_post FROM optimization_features WHERE trace_id = %s"
result = execute_query(query, trace_id)
if not result:
@ -38,32 +42,44 @@ def main(trace_id: str) -> None:
optimizations_post = result["optimizations_post"]
speedup_ratio = result["speedup_ratio"]
generated_test = result["generated_test"]
explanations_post = result["explanations_post"]
# Write original code to file
write_to_file("original_code.py", original_code)
# Write each optimization candidate to its own file
for idx, optimization in enumerate(
extract_json_values(optimizations_post).values(), start=1
for idx, (opt_id, optimization) in enumerate(
extract_json_values(optimizations_post).items(), start=1
):
filename = f"optimization_candidate_{idx}.py"
write_to_file(filename, optimization)
explanation = explanations_post.get(opt_id, "")
content_with_comment = f'"""{explanation}"""\n\n{optimization}'
write_to_file(filename, content_with_comment)
# Find and write the best optimization candidate to its own file
if speedup_ratio is not None:
valid_speedup_values = [v for v in speedup_ratio.values() if v is not None]
best_speedup = max(valid_speedup_values, default=None) if valid_speedup_values else None
best_speedup = (
max(valid_speedup_values, default=None) if valid_speedup_values else None
)
if best_speedup is not None:
best_optimization = next(
best_optimization_id = next(
(
optimization
for id, optimization in optimizations_post.items()
if speedup_ratio[id] == best_speedup
id
for id, speedup in speedup_ratio.items()
if speedup == best_speedup
),
None,
)
if best_optimization is not None:
write_to_file("best_optimization_candidate.py", best_optimization)
best_optimization = optimizations_post.get(best_optimization_id)
best_explanation = explanations_post.get(best_optimization_id, "")
best_content_with_comment = (
f'"""{best_explanation}"""\n\n{best_optimization}'
)
if best_optimization and best_explanation is not None:
write_to_file(
"best_optimization_candidate.py", best_content_with_comment
)
else:
print("No speedup ratio found")
@ -71,6 +87,14 @@ def main(trace_id: str) -> None:
write_to_file("generated_tests.py", generated_test)
"""
Run the script using the .env file that contains the DATABASE_URL for the optimization_features table
Pass in the trace_id as a command line argument and the script will generate the following files for that trace id:
- original_code.py
- optimization_candidate_1.py, optimization_candidate_2.py, ...
- best_optimization_candidate.py
- generated_tests.py
"""
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python process_optimization_features.py <trace_id>")