Use xmlrunner to save test results, and etree to read them back

This commit is contained in:
afik.cohen 2023-10-23 16:06:28 -07:00
parent 31d6c562d7
commit 7606358f43
8 changed files with 86 additions and 24 deletions

View file

@ -173,7 +173,7 @@ def main() -> None:
for test_file in instrumented_unittests_created:
# TODO: If some test case times out then flag it and don't run it in subsequent tests, to save a lot of time. It doesn't add value anyway
# TODO: Add Support for PyTest too
std_output, stderr_output = run_tests(
result_file_path = run_tests(
test_file,
test_framework=args.test_framework,
cwd=args.root,
@ -185,7 +185,7 @@ def main() -> None:
if i == 0:
existing_unittest_results_original = {
**existing_unittest_results_original,
**parse_unittest_output(stderr_output),
**parse_unittest_output(result_file_path),
}
timing_result = parse_test_timing(std_output)
timing_result = filter_out_failed_test_timing(
@ -276,7 +276,7 @@ def main() -> None:
instrumented_test_timing = []
for instrumented_test_file in instrumented_unittests_created:
std_output, stderr_output = run_tests(
result_file_path = run_tests(
instrumented_test_file,
test_framework=args.test_framework,
cwd=args.root,
@ -287,7 +287,7 @@ def main() -> None:
if test_index == 0:
existing_unittest_results_optimized = {
**existing_unittest_results_optimized,
**parse_unittest_output(stderr_output),
**parse_unittest_output(result_file_path),
}
timing_result = parse_test_timing(std_output)
timing_result = filter_out_failed_test_timing(

View file

View file

@ -1,6 +1,7 @@
import re
import os
import pickle
import xml.etree.ElementTree as ET
def filter_out_failed_test_timing(test_result, timing_result):
@ -46,24 +47,23 @@ def parse_test_return_values_bin(file_location):
return test_results
def parse_unittest_output(output):
re_pattern = r"^(test\w+)\s\((.*?)\)\s\.\.\.\s.*?(ok|FAIL|ERROR)$"
matches = re.findall(re_pattern, output, re.MULTILINE | re.DOTALL)
def parse_unittest_output(file_path):
tree = ET.parse(file_path)
root = tree.getroot()
test_results = {}
for match in matches:
if not str.isidentifier(match[0]):
print(f"Invalid test name {match[0]}. Test names must be valid python identifiers")
continue
if match[2] == "ok":
test_results[match[1] + ":" + match[0]] = True
elif match[2] in ["FAIL", "ERROR"]:
test_results[match[1] + ":" + match[0]] = False
for testcase in root.iter('testcase'):
class_name = testcase.attrib['classname']
name = testcase.attrib['name']
if testcase.find('failure') is not None:
test_results[class_name + ":" + name] = False
else:
raise ValueError("Invalid test result, couldn't parse the test output")
test_results[class_name + ":" + name] = True
return test_results
def parse_test_timing(test_results):
def parse_test_timing(file_path):
with open(file_path, 'r') as file:
test_results = file.read()
m = re.findall(r"#####([^#]*?)#####([\d\.]*?)\^\^\^\^\^", test_results)
parsed_results = {}
for test_name, time_taken in m:

View file

@ -1,4 +1,6 @@
import subprocess
import unittest
import xmlrunner
def run_tests(
@ -18,18 +20,20 @@ def run_tests(
cwd=cwd,
env=test_env,
)
stdout = pytest_results.stdout.decode("utf-8")
stderr = pytest_results.stderr.decode("utf-8")
# TODO result file path for pytest
result_file_path = "pytest_results.xml" # FIXME
elif test_framework == "unittest":
unittest_results = subprocess.run(
["python", "-m", "unittest"] + (["-v"] if verbose else []) + [test_path],
["python", "-m", "xmlrunner"]
+ (["-v"] if verbose else [])
+ [test_path]
+ ["--output-file", "unittest_results.xml"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cwd,
env=test_env,
)
stdout = unittest_results.stdout.decode("utf-8")
stderr = unittest_results.stderr.decode("utf-8")
result_file_path = "unittest_results.xml"
else:
raise ValueError("Invalid test framework, we only support Pytest and Unittest currently.")
return stdout, stderr
return result_file_path

View file

@ -17,7 +17,8 @@ tiktoken = "^0.5.1"
timeout-decorator = "^0.5.0"
pytest-timeout = "^2.1.0"
astunparse-fixed = {version = "^1.7.0", optional = true, python = ">=3.8.0,<3.9"}
tomli = {version = "^2.0.1" , optional = true, python = "<3.11"}
tomli = {version = "^2.0.1", optional = true, python = "<3.11"}
unittest-xml-reporting = "^3.2.0"
[tool.poetry.group.dev.dependencies]
ipython = "^8.12.0"

43
tests/test_test_runner.py Normal file
View file

@ -0,0 +1,43 @@
import os
import subprocess
from codeflash.verification.test_runner import run_tests
def test_unittest_runner():
code = """import unittest
def sorter(arr):
arr.sort()
return arr
class TestUnittestRunnerSorter(unittest.TestCase):
def test_sort():
arr = [5, 4, 3, 2, 1, 0]
output = sorter(arr)
self.assertEqual(output, [0, 1, 2, 3, 4, 5])
"""
cur_dir_path = os.path.dirname(os.path.abspath(__file__))
new_test_path = os.path.join(cur_dir_path, "test_unittest_runner.py")
with open(new_test_path, "w") as file:
file.write(code)
result_file = run_tests(new_test_path, test_framework="unittest", cwd=os.path.join(cur_dir_path), )
results = parse_unittest_output(result_file)
assert False # FIXME
def test_pytest_runner():
code = """import unittest
def sorter(arr):
arr.sort()
return arr
class TestUnittestRunnerSorter(unittest.TestCase):
def test_sort():
arr = [5, 4, 3, 2, 1, 0]
output = sorter(arr)
self.assertEqual(output, [0, 1, 2, 3, 4, 5])
"""
cur_dir_path = os.path.dirname(os.path.abspath(__file__))
new_test_path = os.path.join(cur_dir_path, "test_pytest_runner.py")
with open(new_test_path, "w") as file:
file.write(code)
result_file = run_tests(new_test_path, test_framework="pytest", cwd=os.path.join(cur_dir_path), )
results = parse_pytest_output(result_file)
assert False # FIXME

14
unittest_results.xml Normal file
View file

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<testsuites>
<testsuite name="code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented__perfinstrumented.TestPigLatin-20231023155843" tests="1" file="code_to_optimize/tests/unittest/test_bubble_sort__perfinstrumented__perfinstrumented.py" time="0.002" timestamp="2023-10-23T15:58:43" failures="0" errors="0" skipped="0">
<testcase classname="code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented__perfinstrumented.TestPigLatin" name="test_sort" time="0.002" timestamp="2023-10-23T15:58:43" file="code_to_optimize/tests/unittest/test_bubble_sort__perfinstrumented__perfinstrumented.py" line="10">
<system-out><![CDATA[#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:3#####708^^^^^
#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:1#####12208^^^^^
#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:12#####334^^^^^
#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:4#####2792^^^^^
#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:21#####16459^^^^^
#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:7#####22167^^^^^
]]></system-out>
</testcase>
</testsuite>
</testsuites>