Use xmlrunner to save test results, and etree to read them back

2026-05-04 18:25:18 +00:00 · 2023-10-23 16:06:28 -07:00 · 2023-10-23 16:06:28 -07:00 · 7606358f43
commit 7606358f43
parent 31d6c562d7
8 changed files with 86 additions and 24 deletions
--- a/codeflash/main.py
+++ b/codeflash/main.py
@ -173,7 +173,7 @@ def main() -> None:
                    for test_file in instrumented_unittests_created:
                        # TODO: If some test case times out then flag it and don't run it in subsequent tests, to save a lot of time. It doesn't add value anyway
                        # TODO: Add Support for PyTest too
-                        std_output, stderr_output = run_tests(
+                        result_file_path = run_tests(
                            test_file,
                            test_framework=args.test_framework,
                            cwd=args.root,
@ -185,7 +185,7 @@ def main() -> None:
                        if i == 0:
                            existing_unittest_results_original = {
                                **existing_unittest_results_original,
-                                **parse_unittest_output(stderr_output),
+                                **parse_unittest_output(result_file_path),
                            }
                        timing_result = parse_test_timing(std_output)
                        timing_result = filter_out_failed_test_timing(
@ -276,7 +276,7 @@ def main() -> None:

                        instrumented_test_timing = []
                        for instrumented_test_file in instrumented_unittests_created:
-                            std_output, stderr_output = run_tests(
+                            result_file_path = run_tests(
                                instrumented_test_file,
                                test_framework=args.test_framework,
                                cwd=args.root,
@ -287,7 +287,7 @@ def main() -> None:
                            if test_index == 0:
                                existing_unittest_results_optimized = {
                                    **existing_unittest_results_optimized,
-                                    **parse_unittest_output(stderr_output),
+                                    **parse_unittest_output(result_file_path),
                                }
                            timing_result = parse_test_timing(std_output)
                            timing_result = filter_out_failed_test_timing(
--- a/codeflash/test_runners/init.py
+++ b/codeflash/test_runners/init.py
--- a/codeflash/test_runners/unittest_runner.py
+++ b/codeflash/test_runners/unittest_runner.py
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@ -1,6 +1,7 @@
 import re
 import os
 import pickle
+import xml.etree.ElementTree as ET


 def filter_out_failed_test_timing(test_result, timing_result):
@ -46,24 +47,23 @@ def parse_test_return_values_bin(file_location):
    return test_results


-def parse_unittest_output(output):
-    re_pattern = r"^(test\w+)\s\((.*?)\)\s\.\.\.\s.*?(ok|FAIL|ERROR)$"
-    matches = re.findall(re_pattern, output, re.MULTILINE | re.DOTALL)
+def parse_unittest_output(file_path):
+    tree = ET.parse(file_path)
+    root = tree.getroot()
    test_results = {}
-    for match in matches:
-        if not str.isidentifier(match[0]):
-            print(f"Invalid test name {match[0]}. Test names must be valid python identifiers")
-            continue
-        if match[2] == "ok":
-            test_results[match[1] + ":" + match[0]] = True
-        elif match[2] in ["FAIL", "ERROR"]:
-            test_results[match[1] + ":" + match[0]] = False
+    for testcase in root.iter('testcase'):
+        class_name = testcase.attrib['classname']
+        name = testcase.attrib['name']
+        if testcase.find('failure') is not None:
+            test_results[class_name + ":" + name] = False
        else:
-            raise ValueError("Invalid test result, couldn't parse the test output")
+            test_results[class_name + ":" + name] = True
    return test_results


-def parse_test_timing(test_results):
+def parse_test_timing(file_path):
+    with open(file_path, 'r') as file:
+        test_results = file.read()
    m = re.findall(r"#####([^#]*?)#####([\d\.]*?)\^\^\^\^\^", test_results)
    parsed_results = {}
    for test_name, time_taken in m:
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@ -1,4 +1,6 @@
 import subprocess
+import unittest
+import xmlrunner


 def run_tests(
@ -18,18 +20,20 @@ def run_tests(
            cwd=cwd,
            env=test_env,
        )
-        stdout = pytest_results.stdout.decode("utf-8")
-        stderr = pytest_results.stderr.decode("utf-8")
+        # TODO result file path for pytest
+        result_file_path = "pytest_results.xml"  # FIXME
    elif test_framework == "unittest":
        unittest_results = subprocess.run(
-            ["python", "-m", "unittest"] + (["-v"] if verbose else []) + [test_path],
+            ["python", "-m", "xmlrunner"]
+            + (["-v"] if verbose else [])
+            + [test_path]
+            + ["--output-file", "unittest_results.xml"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            cwd=cwd,
            env=test_env,
        )
-        stdout = unittest_results.stdout.decode("utf-8")
-        stderr = unittest_results.stderr.decode("utf-8")
+        result_file_path = "unittest_results.xml"
    else:
        raise ValueError("Invalid test framework, we only support Pytest and Unittest currently.")
-    return stdout, stderr
+    return result_file_path
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,7 +17,8 @@ tiktoken = "^0.5.1"
 timeout-decorator = "^0.5.0"
 pytest-timeout = "^2.1.0"
 astunparse-fixed = {version = "^1.7.0", optional = true, python = ">=3.8.0,<3.9"}
-tomli = {version = "^2.0.1" , optional = true, python = "<3.11"}
+tomli = {version = "^2.0.1",  optional = true, python = "<3.11"}
+unittest-xml-reporting = "^3.2.0"

 [tool.poetry.group.dev.dependencies]
 ipython = "^8.12.0"
--- a/tests/test_test_runner.py
+++ b/tests/test_test_runner.py
@ -0,0 +1,43 @@
+import os
+import subprocess
+from codeflash.verification.test_runner import run_tests
+
+
+def test_unittest_runner():
+    code = """import unittest
+def sorter(arr):
+    arr.sort()
+    return arr
+class TestUnittestRunnerSorter(unittest.TestCase):
+    def test_sort():
+        arr = [5, 4, 3, 2, 1, 0]
+        output = sorter(arr)
+        self.assertEqual(output, [0, 1, 2, 3, 4, 5])
+"""
+    cur_dir_path = os.path.dirname(os.path.abspath(__file__))
+    new_test_path = os.path.join(cur_dir_path, "test_unittest_runner.py")
+    with open(new_test_path, "w") as file:
+        file.write(code)
+    result_file = run_tests(new_test_path, test_framework="unittest", cwd=os.path.join(cur_dir_path), )
+    results = parse_unittest_output(result_file)
+    assert False  # FIXME
+
+
+def test_pytest_runner():
+    code = """import unittest
+def sorter(arr):
+    arr.sort()
+    return arr
+class TestUnittestRunnerSorter(unittest.TestCase):
+    def test_sort():
+        arr = [5, 4, 3, 2, 1, 0]
+        output = sorter(arr)
+        self.assertEqual(output, [0, 1, 2, 3, 4, 5])
+"""
+    cur_dir_path = os.path.dirname(os.path.abspath(__file__))
+    new_test_path = os.path.join(cur_dir_path, "test_pytest_runner.py")
+    with open(new_test_path, "w") as file:
+        file.write(code)
+    result_file = run_tests(new_test_path, test_framework="pytest", cwd=os.path.join(cur_dir_path), )
+    results = parse_pytest_output(result_file)
+    assert False  # FIXME
--- a/unittest_results.xml
+++ b/unittest_results.xml
@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuites>
+	<testsuite name="code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented__perfinstrumented.TestPigLatin-20231023155843" tests="1" file="code_to_optimize/tests/unittest/test_bubble_sort__perfinstrumented__perfinstrumented.py" time="0.002" timestamp="2023-10-23T15:58:43" failures="0" errors="0" skipped="0">
+		<testcase classname="code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented__perfinstrumented.TestPigLatin" name="test_sort" time="0.002" timestamp="2023-10-23T15:58:43" file="code_to_optimize/tests/unittest/test_bubble_sort__perfinstrumented__perfinstrumented.py" line="10">
+			<system-out><![CDATA[#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:3#####708^^^^^
+#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:1#####12208^^^^^
+#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:12#####334^^^^^
+#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:4#####2792^^^^^
+#####code_to_optimize.tests.unittest.test_bubble_sort__perfinstrumented:sorter:test_sort:21#####16459^^^^^
+#####code_to_optimize.tests.unittest.test_bubble_sort:sorter:test_sort:7#####22167^^^^^
+]]></system-out>
+		</testcase>
+	</testsuite>
+</testsuites>