mirror of
https://github.com/codeflash-ai/codeflash-internal.git
synced 2026-05-04 18:25:18 +00:00
Use xmlrunner to save test results, and etree to read them back
This commit is contained in:
parent
31d6c562d7
commit
7606358f43
8 changed files with 86 additions and 24 deletions
|
|
@ -173,7 +173,7 @@ def main() -> None:
|
|||
for test_file in instrumented_unittests_created:
|
||||
# TODO: If some test case times out then flag it and don't run it in subsequent tests, to save a lot of time. It doesn't add value anyway
|
||||
# TODO: Add Support for PyTest too
|
||||
std_output, stderr_output = run_tests(
|
||||
result_file_path = run_tests(
|
||||
test_file,
|
||||
test_framework=args.test_framework,
|
||||
cwd=args.root,
|
||||
|
|
@ -185,7 +185,7 @@ def main() -> None:
|
|||
if i == 0:
|
||||
existing_unittest_results_original = {
|
||||
**existing_unittest_results_original,
|
||||
**parse_unittest_output(stderr_output),
|
||||
**parse_unittest_output(result_file_path),
|
||||
}
|
||||
timing_result = parse_test_timing(std_output)
|
||||
timing_result = filter_out_failed_test_timing(
|
||||
|
|
@ -276,7 +276,7 @@ def main() -> None:
|
|||
|
||||
instrumented_test_timing = []
|
||||
for instrumented_test_file in instrumented_unittests_created:
|
||||
std_output, stderr_output = run_tests(
|
||||
result_file_path = run_tests(
|
||||
instrumented_test_file,
|
||||
test_framework=args.test_framework,
|
||||
cwd=args.root,
|
||||
|
|
@ -287,7 +287,7 @@ def main() -> None:
|
|||
if test_index == 0:
|
||||
existing_unittest_results_optimized = {
|
||||
**existing_unittest_results_optimized,
|
||||
**parse_unittest_output(stderr_output),
|
||||
**parse_unittest_output(result_file_path),
|
||||
}
|
||||
timing_result = parse_test_timing(std_output)
|
||||
timing_result = filter_out_failed_test_timing(
|
||||
|
|
|
|||
0
codeflash/test_runners/__init__.py
Normal file
0
codeflash/test_runners/__init__.py
Normal file
0
codeflash/test_runners/unittest_runner.py
Normal file
0
codeflash/test_runners/unittest_runner.py
Normal file
|
|
@ -1,6 +1,7 @@
|
|||
import re
|
||||
import os
|
||||
import pickle
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
def filter_out_failed_test_timing(test_result, timing_result):
|
||||
|
|
@ -46,24 +47,23 @@ def parse_test_return_values_bin(file_location):
|
|||
return test_results
|
||||
|
||||
|
||||
def parse_unittest_output(output):
|
||||
re_pattern = r"^(test\w+)\s\((.*?)\)\s\.\.\.\s.*?(ok|FAIL|ERROR)$"
|
||||
matches = re.findall(re_pattern, output, re.MULTILINE | re.DOTALL)
|
||||
def parse_unittest_output(file_path):
|
||||
tree = ET.parse(file_path)
|
||||
root = tree.getroot()
|
||||
test_results = {}
|
||||
for match in matches:
|
||||
if not str.isidentifier(match[0]):
|
||||
print(f"Invalid test name {match[0]}. Test names must be valid python identifiers")
|
||||
continue
|
||||
if match[2] == "ok":
|
||||
test_results[match[1] + ":" + match[0]] = True
|
||||
elif match[2] in ["FAIL", "ERROR"]:
|
||||
test_results[match[1] + ":" + match[0]] = False
|
||||
for testcase in root.iter('testcase'):
|
||||
class_name = testcase.attrib['classname']
|
||||
name = testcase.attrib['name']
|
||||
if testcase.find('failure') is not None:
|
||||
test_results[class_name + ":" + name] = False
|
||||
else:
|
||||
raise ValueError("Invalid test result, couldn't parse the test output")
|
||||
test_results[class_name + ":" + name] = True
|
||||
return test_results
|
||||
|
||||
|
||||
def parse_test_timing(test_results):
|
||||
def parse_test_timing(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
test_results = file.read()
|
||||
m = re.findall(r"#####([^#]*?)#####([\d\.]*?)\^\^\^\^\^", test_results)
|
||||
parsed_results = {}
|
||||
for test_name, time_taken in m:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import subprocess
|
||||
import unittest
|
||||
import xmlrunner
|
||||
|
||||
|
||||
def run_tests(
|
||||
|
|
@ -18,18 +20,20 @@ def run_tests(
|
|||
cwd=cwd,
|
||||
env=test_env,
|
||||
)
|
||||
stdout = pytest_results.stdout.decode("utf-8")
|
||||
stderr = pytest_results.stderr.decode("utf-8")
|
||||
# TODO result file path for pytest
|
||||
result_file_path = "pytest_results.xml" # FIXME
|
||||
elif test_framework == "unittest":
|
||||
unittest_results = subprocess.run(
|
||||
["python", "-m", "unittest"] + (["-v"] if verbose else []) + [test_path],
|
||||
["python", "-m", "xmlrunner"]
|
||||
+ (["-v"] if verbose else [])
|
||||
+ [test_path]
|
||||
+ ["--output-file", "unittest_results.xml"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=cwd,
|
||||
env=test_env,
|
||||
)
|
||||
stdout = unittest_results.stdout.decode("utf-8")
|
||||
stderr = unittest_results.stderr.decode("utf-8")
|
||||
result_file_path = "unittest_results.xml"
|
||||
else:
|
||||
raise ValueError("Invalid test framework, we only support Pytest and Unittest currently.")
|
||||
return stdout, stderr
|
||||
return result_file_path
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@ tiktoken = "^0.5.1"
|
|||
timeout-decorator = "^0.5.0"
|
||||
pytest-timeout = "^2.1.0"
|
||||
astunparse-fixed = {version = "^1.7.0", optional = true, python = ">=3.8.0,<3.9"}
|
||||
tomli = {version = "^2.0.1" , optional = true, python = "<3.11"}
|
||||
tomli = {version = "^2.0.1", optional = true, python = "<3.11"}
|
||||
unittest-xml-reporting = "^3.2.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipython = "^8.12.0"
|
||||
|
|
|
|||
43
tests/test_test_runner.py
Normal file
43
tests/test_test_runner.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import os
|
||||
import subprocess
|
||||
from codeflash.verification.test_runner import run_tests
|
||||
|
||||
|
||||
def test_unittest_runner():
|
||||
code = """import unittest
|
||||
def sorter(arr):
|
||||
arr.sort()
|
||||
return arr
|
||||
class TestUnittestRunnerSorter(unittest.TestCase):
|
||||
def test_sort():
|
||||
arr = [5, 4, 3, 2, 1, 0]
|
||||
output = sorter(arr)
|
||||
self.assertEqual(output, [0, 1, 2, 3, 4, 5])
|
||||
"""
|
||||
cur_dir_path = os.path.dirname(os.path.abspath(__file__))
|
||||
new_test_path = os.path.join(cur_dir_path, "test_unittest_runner.py")
|
||||
with open(new_test_path, "w") as file:
|
||||
file.write(code)
|
||||
result_file = run_tests(new_test_path, test_framework="unittest", cwd=os.path.join(cur_dir_path), )
|
||||
results = parse_unittest_output(result_file)
|
||||
assert False # FIXME
|
||||
|
||||
|
||||
def test_pytest_runner():
|
||||
code = """import unittest
|
||||
def sorter(arr):
|
||||
arr.sort()
|
||||
return arr
|
||||
class TestUnittestRunnerSorter(unittest.TestCase):
|
||||
def test_sort():
|
||||
arr = [5, 4, 3, 2, 1, 0]
|
||||
output = sorter(arr)
|
||||
self.assertEqual(output, [0, 1, 2, 3, 4, 5])
|
||||
"""
|
||||
cur_dir_path = os.path.dirname(os.path.abspath(__file__))
|
||||
new_test_path = os.path.join(cur_dir_path, "test_pytest_runner.py")
|
||||
with open(new_test_path, "w") as file:
|
||||
file.write(code)
|
||||
result_file = run_tests(new_test_path, test_framework="pytest", cwd=os.path.join(cur_dir_path), )
|
||||
results = parse_pytest_output(result_file)
|
||||
assert False # FIXME
|
||||
14
unittest_results.xml
Normal file
14
unittest_results.xml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<testsuites>
|
||||
<testsuite name="code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented__perfinstrumented.TestPigLatin-20231023155843" tests="1" file="code_to_optimize/tests/unittest/test_bubble_sort__perfinstrumented__perfinstrumented.py" time="0.002" timestamp="2023-10-23T15:58:43" failures="0" errors="0" skipped="0">
|
||||
<testcase classname="code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented__perfinstrumented.TestPigLatin" name="test_sort" time="0.002" timestamp="2023-10-23T15:58:43" file="code_to_optimize/tests/unittest/test_bubble_sort__perfinstrumented__perfinstrumented.py" line="10">
|
||||
<system-out><![CDATA[#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:3#####708^^^^^
|
||||
#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:1#####12208^^^^^
|
||||
#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:12#####334^^^^^
|
||||
#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:4#####2792^^^^^
|
||||
#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:21#####16459^^^^^
|
||||
#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:7#####22167^^^^^
|
||||
]]></system-out>
|
||||
</testcase>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
Loading…
Reference in a new issue