2023-10-23 05:14:06 +00:00
# Derived from https://github.com/openai/openai-cookbook/blob/main/examples/Unit_test_writing_using_a_multi-step_prompt.ipynb
# imports needed to run the code in this notebook
2023-12-22 05:58:38 +00:00
# TODO: This is only here as a temporary reference implementaion of how an early version of LLM inspired tests was written.
# It didn't work very well. This should be improved significantly.
2023-10-23 05:14:06 +00:00
import ast # used for detecting whether generated Python code is valid
2024-06-23 00:40:47 +00:00
import platform
2024-01-19 19:42:27 +00:00
2023-10-23 05:14:06 +00:00
import openai # used for calling the OpenAI API
2024-10-29 23:39:47 +00:00
from codeflash . code_utils . code_extractor import get_code
from codeflash . code_utils . code_utils import ellipsis_in_ast , get_imports_from_file
from codeflash . models . models import TestsInFile
2025-08-07 21:27:08 +00:00
from codeflash . verification . gen_regression_tests import print_message_delta , print_messages
2025-12-23 04:51:05 +00:00
from aiservice . llm import EXECUTE_MODEL , EXPLAIN_MODEL , LLM , PLAN_MODEL
2023-10-23 05:14:06 +00:00
2023-10-23 05:20:43 +00:00
def regression_tests_from_function_with_inspiration (
2023-12-22 05:58:38 +00:00
function_to_test : str , # Python function to test, as a string
function_name : str , # name of the function to test
2025-08-07 21:27:08 +00:00
existing_unit_test_path_and_function : list [
tuple [ str , str ]
2023-12-22 05:58:38 +00:00
] , # path to existing unit test file and function name that is testing the function
unit_test_package : str = " pytest " , # unit testing package; use the name as it appears in the import statement
approx_min_cases_to_cover : int = 7 , # minimum number of test case categories to cover (approximate)
print_text : bool = False , # optionally prints text; helpful for understanding the function & debugging
explain_model : LLM = EXPLAIN_MODEL , # model used to generate text plans in step 1
plan_model : LLM = PLAN_MODEL , # model used to generate text plans in steps 2 and 2b
execute_model : LLM = EXECUTE_MODEL , # model used to generate code in step 3
temperature : float = 0.4 , # temperature = 0 can sometimes get stuck in repetitive loops, so we use 0.4
reruns_if_fail : int = 1 , # if the output code cannot be parsed, this will re-run the function up to N times
2025-08-07 21:27:08 +00:00
python_version : tuple [ int , int , int ] = tuple ( [ int ( ver ) for ver in platform . python_version_tuple ( ) ] ) ,
2023-10-23 05:14:06 +00:00
) - > str :
""" Returns a unit test for a given Python function, using a 3-step GPT prompt. """
# TODO: This step is exactly the same as the non-inspired test generator. Merge them into one to save on API calls
# Step 1: Generate an explanation of the function
# create a markdown-formatted message that asks GPT to explain the function, formatted as a bullet list
explain_system_message = {
" role " : " system " ,
" content " : " You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists. " ,
}
explain_user_message = {
" role " : " user " ,
" content " : f """ Please explain the following Python function ' { function_name } ' . Review what each element of the function is doing precisely and what the author ' s intentions may have been. Organize your explanation as a markdown-formatted, bulleted list.
` ` ` python
{ function_to_test }
` ` ` """ ,
}
explain_messages = [ explain_system_message , explain_user_message ]
if print_text :
print_messages ( explain_messages )
try :
explanation_response = openai . ChatCompletion . create (
2024-10-25 22:45:44 +00:00
model = explain_model . name , messages = explain_messages , temperature = temperature , stream = True
2023-10-23 05:14:06 +00:00
)
2023-11-02 21:06:09 +00:00
except Exception as e :
2023-10-23 05:14:06 +00:00
print ( str ( e ) )
if reruns_if_fail > 0 :
print ( " Rerunning... " )
2023-10-23 05:20:43 +00:00
return regression_tests_from_function_with_inspiration (
2023-10-23 05:14:06 +00:00
function_to_test = function_to_test ,
function_name = function_name ,
existing_unit_test_path_and_function = existing_unit_test_path_and_function ,
unit_test_package = unit_test_package ,
approx_min_cases_to_cover = approx_min_cases_to_cover ,
print_text = print_text ,
2023-10-26 18:19:16 +00:00
explain_model = EXPLAIN_MODEL ,
plan_model = PLAN_MODEL ,
execute_model = EXECUTE_MODEL ,
2023-10-23 05:14:06 +00:00
temperature = temperature ,
reruns_if_fail = reruns_if_fail - 1 , # decrement rerun counter when calling again
)
explanation = " "
for chunk in explanation_response :
delta = chunk [ " choices " ] [ 0 ] [ " delta " ]
if print_text :
print_message_delta ( delta )
if " content " in delta :
explanation + = delta [ " content " ]
explain_assistant_message = { " role " : " assistant " , " content " : explanation }
# Step 2: Generate a plan to write a unit test
# Asks GPT to plan out cases the units tests should cover, formatted as a bullet list
existing_unit_tests = [ ]
max_inspiration_tests = 3
tests_in_file : TestsInFile
inspired_test_imports = [ ]
test_files = set ( )
for i , tests_in_file in enumerate ( existing_unit_test_path_and_function ) :
path = tests_in_file . test_file
test_files . add ( path )
function = tests_in_file . test_function
if i > = max_inspiration_tests :
break
tester_code = get_code ( path , function )
code = f """ ```python
{ tester_code }
` ` ` """
existing_unit_tests . append ( code )
for path in test_files :
imports_ast = get_imports_from_file ( file_path = path )
inspired_test_imports . append ( imports_ast )
package_comment = " "
execute_user_message = {
" role " : " user " ,
" content " : f """ A good unit test suite should aim to:
- Test the function ' s behavior for a wide range of possible inputs
- Test edge cases that the author may not have foreseen
- Take advantage of the features of ` { unit_test_package } ` to make the tests easy to write and maintain
- Be easy to read and understand , with clean code and descriptive names
- Be deterministic , so that the tests always pass or fail in the same way
2023-12-05 00:51:20 +00:00
To test the function above , here { " are a few unit tests " if len ( existing_unit_tests ) > 1 else " is a unit test " } that already exist :
2023-10-23 05:14:06 +00:00
2025-08-07 20:32:55 +00:00
{ f " { chr ( 10 ) } " . join ( existing_unit_tests ) }
2023-10-23 05:14:06 +00:00
Using Python and the ` { unit_test_package } ` package , and taking inspiration from the existing test cases above , write a new suite of unit tests for the function ' {function_name} ' . Include helpful comments to explain each line . Generate concrete runnable test without using ellipsis . Reply only with code , formatted as follows :
` ` ` python
# imports
import { unit_test_package } # used for our unit tests
{ { insert other imports as needed } }
# function to test
{ function_to_test }
# unit tests
{ package_comment }
{ { insert unit test code here } }
` ` `
""" ,
}
2024-10-25 22:45:44 +00:00
plan_messages = [ explain_system_message , explain_user_message , explain_assistant_message , execute_user_message ]
2023-10-23 05:14:06 +00:00
if print_text :
print_messages ( [ execute_user_message ] )
try :
execute_response = openai . ChatCompletion . create (
2024-10-25 22:45:44 +00:00
model = plan_model . name , messages = plan_messages , temperature = temperature , stream = True
2023-10-23 05:14:06 +00:00
)
2023-11-02 21:06:09 +00:00
except Exception as e :
2023-10-23 05:14:06 +00:00
print ( str ( e ) )
if reruns_if_fail > 0 :
print ( " Rerunning... " )
2023-10-23 05:20:43 +00:00
return regression_tests_from_function_with_inspiration (
2023-10-23 05:14:06 +00:00
function_to_test = function_to_test ,
function_name = function_name ,
existing_unit_test_path_and_function = existing_unit_test_path_and_function ,
unit_test_package = unit_test_package ,
approx_min_cases_to_cover = approx_min_cases_to_cover ,
print_text = print_text ,
2023-10-26 18:19:16 +00:00
explain_model = EXPLAIN_MODEL ,
plan_model = PLAN_MODEL ,
execute_model = EXECUTE_MODEL ,
2023-10-23 05:14:06 +00:00
temperature = temperature ,
reruns_if_fail = reruns_if_fail - 1 , # decrement rerun counter when calling again
)
execution = " "
for chunk in execute_response :
delta = chunk [ " choices " ] [ 0 ] [ " delta " ]
if print_text :
print_message_delta ( delta )
if " content " in delta :
execution + = delta [ " content " ]
# check the output for errors
code = execution . split ( " ```python " ) [ 1 ] . split ( " ``` " ) [ 0 ] . strip ( )
# TODO: This adds a bunch of redundant imports, clean them up
tests_list = [ imp for sublist in inspired_test_imports for imp in sublist ]
2024-01-14 06:17:25 +00:00
code = ast . unparse ( tests_list ) + " \n " + code
2023-10-23 05:14:06 +00:00
try :
2024-06-23 00:40:47 +00:00
module = ast . parse ( code , feature_version = python_version [ : 2 ] )
2023-10-23 05:14:06 +00:00
if ellipsis_in_ast ( module ) :
# If the test generator is generating ellipsis, it is punting on generating
# the concrete test cases and we should re-generate
raise SyntaxError ( " Ellipsis in generated test code, regenerating... " )
2024-04-18 02:41:00 +00:00
except SyntaxError :
print ( " Syntax error in generated code. " )
2023-10-23 05:14:06 +00:00
if reruns_if_fail > 0 :
print ( " Rerunning... " )
2023-10-23 05:20:43 +00:00
return regression_tests_from_function_with_inspiration (
2023-10-23 05:14:06 +00:00
function_to_test = function_to_test ,
function_name = function_name ,
existing_unit_test_path_and_function = existing_unit_test_path_and_function ,
unit_test_package = unit_test_package ,
approx_min_cases_to_cover = approx_min_cases_to_cover ,
print_text = print_text ,
2023-10-26 18:19:16 +00:00
explain_model = EXPLAIN_MODEL ,
plan_model = PLAN_MODEL ,
execute_model = EXECUTE_MODEL ,
2023-10-23 05:14:06 +00:00
temperature = temperature ,
reruns_if_fail = reruns_if_fail - 1 , # decrement rerun counter when calling again
)
# return the unit test as a string
return code
2023-10-23 05:20:43 +00:00
class ModifyInspiredTests ( ast . NodeTransformer ) :
def __init__ ( self , import_list , test_framework ) :
self . import_list = import_list
self . test_framework = test_framework
def visit_Import ( self , node : ast . Import ) :
self . import_list . append ( node )
def visit_ImportFrom ( self , node : ast . ImportFrom ) :
self . import_list . append ( node )
def visit_ClassDef ( self , node : ast . ClassDef ) :
2025-12-03 10:22:52 +00:00
# No unittest-specific transformations needed since we only support pytest
2023-10-23 05:20:43 +00:00
return node