mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
Made the prompt agnostic to the chosen testing library
This commit is contained in:
parent
fc4e33ac40
commit
96cf7c78ca
2 changed files with 99 additions and 132 deletions
|
|
@ -130,7 +130,7 @@ async def regression_tests_from_function(
|
|||
- Take advantage of the features of `{unit_test_package}` to make the tests easy to write and maintain
|
||||
- Be easy to read and understand, with clean code and descriptive names
|
||||
- Be deterministic, so that the tests always pass or fail in the same way
|
||||
- It should not mock or stub any dependencies, so do not use pytest.mock or any other similar mocking or stubbing module, so that the testing environment is as close to the production environment as possible
|
||||
- It should not mock or stub any dependencies, so do not use `{unit_test_package}`.mock or any other similar mocking or stubbing module, so that the testing environment is as close to the production environment as possible
|
||||
|
||||
To help unit test the function above, list diverse scenarios that the function should be able to handle (and under each scenario, include a few examples as sub-bullets).""",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,12 +11,12 @@
|
|||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:32:00.691339Z",
|
||||
"start_time": "2024-01-30T19:32:00.676001Z"
|
||||
"end_time": "2024-01-31T01:40:25.977922Z",
|
||||
"start_time": "2024-01-31T01:40:25.972813Z"
|
||||
}
|
||||
},
|
||||
"id": "8d108492763a25a5",
|
||||
"execution_count": 7
|
||||
"execution_count": 86
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
|
@ -47,22 +47,22 @@
|
|||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:32:00.708815Z",
|
||||
"start_time": "2024-01-30T19:32:00.695584Z"
|
||||
"end_time": "2024-01-31T01:40:25.995518Z",
|
||||
"start_time": "2024-01-31T01:40:25.976927Z"
|
||||
}
|
||||
},
|
||||
"id": "694116ed54c4af6a",
|
||||
"execution_count": 8
|
||||
"execution_count": 87
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 88,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:32:00.721388Z",
|
||||
"start_time": "2024-01-30T19:32:00.713118Z"
|
||||
"end_time": "2024-01-31T01:40:26.003586Z",
|
||||
"start_time": "2024-01-31T01:40:25.998970Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
|
|
@ -70,7 +70,7 @@
|
|||
"def regression_tests_from_function(\n",
|
||||
" function_code: str, # Python function to test, as a string\n",
|
||||
" function_name: str, # the function to test\n",
|
||||
" unit_test_package: str = \"pytest\", # unit testing package; use the name as it appears in the import statement\n",
|
||||
" unit_test_package: str = \"unittest\", # unit testing package; use the name as it appears in the import statement\n",
|
||||
" approx_min_cases_to_cover: int = 7, # minimum number of test case categories to cover (approximate)\n",
|
||||
" print_text: bool = True, # optionally prints text; helpful for understanding the function & debugging\n",
|
||||
" explain_model= \"gpt-4-1106-preview\", # model used to generate text plans in step 1\n",
|
||||
|
|
@ -118,10 +118,10 @@
|
|||
" \"content\": f\"\"\"A good unit test suite should aim to:\n",
|
||||
"- Test the function's behavior for a wide range of possible inputs\n",
|
||||
"- Test edge cases that the author may not have foreseen\n",
|
||||
"- Take advantage of the features of `{unit_test_package}` to make the tests easy to write and maintain\n",
|
||||
"- Take advantage of the features of {unit_test_package} to make the tests easy to write and maintain\n",
|
||||
"- Be easy to read and understand, with clean code and descriptive names\n",
|
||||
"- Be deterministic, so that the tests always pass or fail in the same way\n",
|
||||
"- It should not mock or stub any dependencies, so do not use pytest.mock or any other similar mocking or stubbing module, so that the testing environment is as close to the production environment as possible\n",
|
||||
"- It should not mock or stub any dependencies, so do not use {unit_test_package}.mock or any other similar mocking or stubbing module, so that the testing environment is as close to the production environment as possible\n",
|
||||
"\n",
|
||||
"To help unit test the function above, list diverse scenarios that the function should be able to handle (and under each scenario, include a few examples as sub-bullets).\"\"\",\n",
|
||||
" }\n",
|
||||
|
|
@ -189,7 +189,7 @@
|
|||
" }\n",
|
||||
" execute_user_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": f\"\"\"Using Python and the `{unit_test_package}` package, write a suite of unit tests for the function '{function_name}', following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:\n",
|
||||
" \"content\": f\"\"\"Using Python and the {unit_test_package} package, write a suite of unit tests for the function '{function_name}', following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"# imports\n",
|
||||
|
|
@ -272,12 +272,12 @@
|
|||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:32:00.724756Z",
|
||||
"start_time": "2024-01-30T19:32:00.716228Z"
|
||||
"end_time": "2024-01-31T01:40:26.006842Z",
|
||||
"start_time": "2024-01-31T01:40:26.004637Z"
|
||||
}
|
||||
},
|
||||
"id": "80fefa4a71d32347",
|
||||
"execution_count": 10
|
||||
"execution_count": 89
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
|
@ -312,22 +312,25 @@
|
|||
"A good unit test suite should aim to:\n",
|
||||
"- Test the function's behavior for a wide range of possible inputs\n",
|
||||
"- Test edge cases that the author may not have foreseen\n",
|
||||
"- Take advantage of the features of `pytest` to make the tests easy to write and maintain\n",
|
||||
"- Take advantage of the features of unittest to make the tests easy to write and maintain\n",
|
||||
"- Be easy to read and understand, with clean code and descriptive names\n",
|
||||
"- Be deterministic, so that the tests always pass or fail in the same way\n",
|
||||
"- It should not mock or stub any dependencies, so do not use pytest.mock or any other similar mocking or stubbing module, so that the testing environment is as close to the production environment as possible\n",
|
||||
"- It should not mock or stub any dependencies, so do not use unittest.mock or any other similar mocking or stubbing module, so that the testing environment is as close to the production environment as possible\n",
|
||||
"\n",
|
||||
"To help unit test the function above, list diverse scenarios that the function should be able to handle (and under each scenario, include a few examples as sub-bullets).\n",
|
||||
"\u001B[0m\n",
|
||||
"[user]\n",
|
||||
"In addition to those scenarios above, list a few rare or unexpected edge cases (and as before, under each edge case, include a few examples as sub-bullets).\n",
|
||||
"\u001B[0m\n",
|
||||
"[system]\n",
|
||||
"You are a world-class Python developer with an eagle eye for unintended bugs and edge cases. You write careful, accurate unit tests. When asked to reply only with code, you write all of your code in a single block.\n",
|
||||
"\u001B[0m\n",
|
||||
"[user]\n",
|
||||
"Using Python and the `pytest` package, write a suite of unit tests for the function 'lint_code', following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:\n",
|
||||
"Using Python and the unittest package, write a suite of unit tests for the function 'lint_code', following the cases above. Include helpful comments to explain each line. Reply only with code, formatted as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"# imports\n",
|
||||
"import pytest # used for our unit tests\n",
|
||||
"import unittest # used for our unit tests\n",
|
||||
"{insert other imports as needed}\n",
|
||||
"\n",
|
||||
"# function to test\n",
|
||||
|
|
@ -357,12 +360,12 @@
|
|||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:34:12.978425Z",
|
||||
"start_time": "2024-01-30T19:32:00.721528Z"
|
||||
"end_time": "2024-01-31T01:44:06.796824Z",
|
||||
"start_time": "2024-01-31T01:40:26.009670Z"
|
||||
}
|
||||
},
|
||||
"id": "3cb5881311ca16e2",
|
||||
"execution_count": 11
|
||||
"execution_count": 90
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
|
@ -372,11 +375,12 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
"# imports\n",
|
||||
"import pytest\n",
|
||||
"import os\n",
|
||||
"import logging\n",
|
||||
"import subprocess\n",
|
||||
"from tempfile import NamedTemporaryFile\n",
|
||||
"import unittest # used for our unit tests\n",
|
||||
"import os # used to interact with the filesystem\n",
|
||||
"import tempfile # used to create temporary files and directories\n",
|
||||
"import subprocess # used to run external commands\n",
|
||||
"import logging # used to capture logging output\n",
|
||||
"from unittest.mock import patch # used to patch modules and functions within them\n",
|
||||
"\n",
|
||||
"# function to test\n",
|
||||
"def lint_code(path: str) -> str:\n",
|
||||
|
|
@ -389,114 +393,77 @@
|
|||
" logging.error(\"Failed to format\")\n",
|
||||
" with open(path, \"r\") as f:\n",
|
||||
" new_code = f.read()\n",
|
||||
" return new_code\n",
|
||||
"\n",
|
||||
"# unit tests\n",
|
||||
"class TestLintCode(unittest.TestCase):\n",
|
||||
"\n",
|
||||
"# Test valid file path with Python code that needs formatting\n",
|
||||
"def test_valid_file_needs_formatting():\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".py\", delete=False) as tmp:\n",
|
||||
" tmp.write(\"def foo():\\n pass\\n\") # poorly formatted code\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" try:\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" with open(tmp_path, \"r\") as f:\n",
|
||||
" content = f.read()\n",
|
||||
" # Check that the code has been reformatted (e.g., proper indentation)\n",
|
||||
" assert content == \"def foo():\\n pass\\n\"\n",
|
||||
" finally:\n",
|
||||
" os.remove(tmp_path)\n",
|
||||
" # Set up a temporary directory before each test\n",
|
||||
" def setUp(self):\n",
|
||||
" self.test_dir = tempfile.TemporaryDirectory()\n",
|
||||
"\n",
|
||||
"# Test valid file path with Python code already formatted\n",
|
||||
"def test_valid_file_already_formatted():\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".py\", delete=False) as tmp:\n",
|
||||
" tmp.write(\"def foo():\\n pass\\n\") # already formatted code\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" try:\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" with open(tmp_path, \"r\") as f:\n",
|
||||
" content = f.read()\n",
|
||||
" # Check that the content is unchanged\n",
|
||||
" assert content == \"def foo():\\n pass\\n\"\n",
|
||||
" finally:\n",
|
||||
" os.remove(tmp_path)\n",
|
||||
" # Clean up the temporary directory after each test\n",
|
||||
" def tearDown(self):\n",
|
||||
" self.test_dir.cleanup()\n",
|
||||
"\n",
|
||||
"# Test invalid file path\n",
|
||||
"def test_invalid_file_path():\n",
|
||||
" with pytest.raises(AssertionError):\n",
|
||||
" lint_code(\"/path/to/nonexistent/file.py\")\n",
|
||||
" # Test normal operation with a file that needs formatting\n",
|
||||
" def test_normal_operation(self):\n",
|
||||
" # Create a temporary Python file that needs formatting\n",
|
||||
" test_file_path = os.path.join(self.test_dir.name, \"test.py\")\n",
|
||||
" with open(test_file_path, \"w\") as test_file:\n",
|
||||
" test_file.write(\"def foo():\\n return 1\\n\")\n",
|
||||
"\n",
|
||||
"# Test file path to a directory\n",
|
||||
"def test_file_path_to_directory(tmp_path):\n",
|
||||
" dir_path = str(tmp_path)\n",
|
||||
" with pytest.raises(AssertionError):\n",
|
||||
" lint_code(dir_path)\n",
|
||||
" # Run the lint_code function on the test file\n",
|
||||
" with patch('subprocess.run') as mock_run:\n",
|
||||
" mock_run.return_value.returncode = 0 # Simulate successful black formatting\n",
|
||||
" lint_code(test_file_path)\n",
|
||||
"\n",
|
||||
"# Test file with read-only permissions\n",
|
||||
"def test_read_only_file():\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".py\", delete=False) as tmp:\n",
|
||||
" tmp.write(\"def foo():\\n pass\\n\")\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" os.chmod(tmp_path, 0o444) # read-only permissions\n",
|
||||
" try:\n",
|
||||
" with pytest.raises(subprocess.CalledProcessError):\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" finally:\n",
|
||||
" os.chmod(tmp_path, 0o666) # reset permissions to allow deletion\n",
|
||||
" os.remove(tmp_path)\n",
|
||||
" # Check that the file was formatted (mocked)\n",
|
||||
" mock_run.assert_called_once_with([\"black\", test_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
|
||||
"\n",
|
||||
"# Test empty Python file\n",
|
||||
"def test_empty_file():\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".py\", delete=False) as tmp:\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" try:\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" with open(tmp_path, \"r\") as f:\n",
|
||||
" content = f.read()\n",
|
||||
" # Check that the content is still empty\n",
|
||||
" assert content == \"\"\n",
|
||||
" finally:\n",
|
||||
" os.remove(tmp_path)\n",
|
||||
" # Test that an assertion error is raised when the file does not exist\n",
|
||||
" def test_file_does_not_exist(self):\n",
|
||||
" # Define a path to a non-existent file\n",
|
||||
" test_file_path = os.path.join(self.test_dir.name, \"nonexistent.py\")\n",
|
||||
"\n",
|
||||
"# Test non-Python file\n",
|
||||
"def test_non_python_file():\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".txt\", delete=False) as tmp:\n",
|
||||
" tmp.write(\"This is not Python code.\")\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" try:\n",
|
||||
" with pytest.raises(subprocess.CalledProcessError):\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" finally:\n",
|
||||
" os.remove(tmp_path)\n",
|
||||
" # Run the lint_code function and check for AssertionError\n",
|
||||
" with self.assertRaises(AssertionError):\n",
|
||||
" lint_code(test_file_path)\n",
|
||||
"\n",
|
||||
"# Test large Python file\n",
|
||||
"def test_large_file():\n",
|
||||
" large_content = \"def foo():\\n pass\\n\" * 1000 # large file with repeated content\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".py\", delete=False) as tmp:\n",
|
||||
" tmp.write(large_content)\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" try:\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" with open(tmp_path, \"r\") as f:\n",
|
||||
" content = f.read()\n",
|
||||
" # Check that the content is properly formatted\n",
|
||||
" assert content == large_content\n",
|
||||
" finally:\n",
|
||||
" os.remove(tmp_path)\n",
|
||||
" # Test that an error is logged when black fails to format the file\n",
|
||||
" def test_black_failure(self):\n",
|
||||
" # Create a temporary Python file with valid code\n",
|
||||
" test_file_path = os.path.join(self.test_dir.name, \"test.py\")\n",
|
||||
" with open(test_file_path, \"w\") as test_file:\n",
|
||||
" test_file.write(\"def foo():\\n return 1\\n\")\n",
|
||||
"\n",
|
||||
"# Test file path with special characters\n",
|
||||
"def test_special_characters_in_file_path():\n",
|
||||
" with NamedTemporaryFile(\"w+\", suffix=\".py\", prefix=\"test@#\", delete=False) as tmp:\n",
|
||||
" tmp.write(\"def foo():\\n pass\\n\")\n",
|
||||
" tmp_path = tmp.name\n",
|
||||
" try:\n",
|
||||
" lint_code(tmp_path)\n",
|
||||
" with open(tmp_path, \"r\") as f:\n",
|
||||
" content = f.read()\n",
|
||||
" # Check that the code has been reformatted\n",
|
||||
" assert content == \"def foo():\\n pass\\n\"\n",
|
||||
" finally:\n",
|
||||
" os.remove(tmp_path)\n"
|
||||
" # Run the lint_code function and simulate a black failure\n",
|
||||
" with patch('subprocess.run') as mock_run, \\\n",
|
||||
" patch('logging.error') as mock_log_error:\n",
|
||||
" mock_run.return_value.returncode = 1 # Simulate black formatting failure\n",
|
||||
" lint_code(test_file_path)\n",
|
||||
"\n",
|
||||
" # Check that an error was logged\n",
|
||||
" mock_log_error.assert_called_once_with(\"Failed to format\")\n",
|
||||
"\n",
|
||||
" # Test that the function logs \"OK\" when black successfully formats the file\n",
|
||||
" def test_black_success_logging(self):\n",
|
||||
" # Create a temporary Python file with valid code\n",
|
||||
" test_file_path = os.path.join(self.test_dir.name, \"test.py\")\n",
|
||||
" with open(test_file_path, \"w\") as test_file:\n",
|
||||
" test_file.write(\"def foo():\\n return 1\\n\")\n",
|
||||
"\n",
|
||||
" # Run the lint_code function and simulate a successful black run\n",
|
||||
" with patch('subprocess.run') as mock_run, \\\n",
|
||||
" patch('logging.info') as mock_log_info:\n",
|
||||
" mock_run.return_value.returncode = 0 # Simulate successful black formatting\n",
|
||||
" lint_code(test_file_path)\n",
|
||||
"\n",
|
||||
" # Check that the \"OK\" message was logged\n",
|
||||
" mock_log_info.assert_any_call(\"OK\")\n",
|
||||
"\n",
|
||||
"# Run the unit tests if this script is executed\n",
|
||||
"if __name__ == '__main__':\n",
|
||||
" unittest.main()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -506,12 +473,12 @@
|
|||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:34:12.979917Z",
|
||||
"start_time": "2024-01-30T19:34:12.973475Z"
|
||||
"end_time": "2024-01-31T01:44:06.799741Z",
|
||||
"start_time": "2024-01-31T01:44:06.794094Z"
|
||||
}
|
||||
},
|
||||
"id": "639c96f42318a856",
|
||||
"execution_count": 12
|
||||
"execution_count": 91
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
|
@ -520,12 +487,12 @@
|
|||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-30T19:34:12.990244Z",
|
||||
"start_time": "2024-01-30T19:34:12.979780Z"
|
||||
"end_time": "2024-01-31T01:44:06.811647Z",
|
||||
"start_time": "2024-01-31T01:44:06.800267Z"
|
||||
}
|
||||
},
|
||||
"id": "12baa8a023eafc26",
|
||||
"execution_count": 12
|
||||
"execution_count": 91
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
Loading…
Reference in a new issue