codeflash-internal/experiments/testgen_analysis.py

import os
from typing import Any, Dict, Optional

import pandas as pd
from pandas import DataFrame
from scipy.stats import hmean
from sqlalchemy import create_engine
import openpyxl
from openpyxl.styles import Alignment

def load_data(
    database_uri: str = os.environ.get("DATABASE_URL")
) -> DataFrame:
    engine = create_engine(database_uri)
    with engine.connect() as connection:
        query = """
            SELECT * FROM optimization_features WHERE
            user_id = 'github|4725571' AND EXISTS (SELECT 1 FROM unnest(generated_test) AS elem WHERE elem LIKE '%%# TRY41 GENERATED WITH SINGLE PROMPT%%')
        """
        return pd.read_sql_query(
            query, connection
        )

def split_data(df: DataFrame) -> tuple[DataFrame, DataFrame]:
    """Split the data into two groups based on the 'test_framework' column."""
    df_ret = df.copy()
    def get_a(row):
        out = ""
        for elem in row['generated_test']:
            if 'GENERATED WITH SINGLE PROMPT' not in elem:
                out += elem + '\n'
        return out
    def get_b(row):
        out = ""
        for elem in row['generated_test']:
            if 'GENERATED WITH SINGLE PROMPT' in elem:
                out += elem + '\n'
        return out
    df_ret['generated_test_a'] = df.apply(lambda row: get_a(row), axis=1)
    df_ret['generated_test_b'] = df.apply(lambda row: get_b(row), axis = 1)
    return df_ret

def main() -> None:
    df = load_data()
    df_with_new_cols = split_data(df)

    excel_path = "output.xlsx"
    df_with_new_cols.drop(['created_at'],axis=1).to_excel(excel_path, index=False)

    # Open the Excel file and apply wrapping
    wb = openpyxl.load_workbook(excel_path)
    ws = wb.active

    # Apply wrap text to all cells
    for row in ws.iter_rows():
        for cell in row:
            if isinstance(cell.value, str) and "\n" in cell.value:
                cell.alignment = Alignment(wrapText=True)

    wb.save(excel_path)

if __name__ == "__main__":
    main()