63 lines
2 KiB
Python
63 lines
2 KiB
Python
import os
|
|
from typing import Any, Dict, Optional
|
|
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
from scipy.stats import hmean
|
|
from sqlalchemy import create_engine
|
|
import openpyxl
|
|
from openpyxl.styles import Alignment
|
|
|
|
def load_data(
|
|
database_uri: str = os.environ.get("DATABASE_URL")
|
|
) -> DataFrame:
|
|
engine = create_engine(database_uri)
|
|
with engine.connect() as connection:
|
|
query = """
|
|
SELECT * FROM optimization_features WHERE
|
|
user_id = 'github|4725571' AND EXISTS (SELECT 1 FROM unnest(generated_test) AS elem WHERE elem LIKE '%%# TRY41 GENERATED WITH SINGLE PROMPT%%')
|
|
"""
|
|
return pd.read_sql_query(
|
|
query, connection
|
|
)
|
|
|
|
def split_data(df: DataFrame) -> tuple[DataFrame, DataFrame]:
|
|
"""Split the data into two groups based on the 'test_framework' column."""
|
|
df_ret = df.copy()
|
|
def get_a(row):
|
|
out = ""
|
|
for elem in row['generated_test']:
|
|
if 'GENERATED WITH SINGLE PROMPT' not in elem:
|
|
out += elem + '\n'
|
|
return out
|
|
def get_b(row):
|
|
out = ""
|
|
for elem in row['generated_test']:
|
|
if 'GENERATED WITH SINGLE PROMPT' in elem:
|
|
out += elem + '\n'
|
|
return out
|
|
df_ret['generated_test_a'] = df.apply(lambda row: get_a(row), axis=1)
|
|
df_ret['generated_test_b'] = df.apply(lambda row: get_b(row), axis = 1)
|
|
return df_ret
|
|
|
|
def main() -> None:
|
|
df = load_data()
|
|
df_with_new_cols = split_data(df)
|
|
|
|
excel_path = "output.xlsx"
|
|
df_with_new_cols.drop(['created_at'],axis=1).to_excel(excel_path, index=False)
|
|
|
|
# Open the Excel file and apply wrapping
|
|
wb = openpyxl.load_workbook(excel_path)
|
|
ws = wb.active
|
|
|
|
# Apply wrap text to all cells
|
|
for row in ws.iter_rows():
|
|
for cell in row:
|
|
if isinstance(cell.value, str) and "\n" in cell.value:
|
|
cell.alignment = Alignment(wrapText=True)
|
|
|
|
wb.save(excel_path)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|