stdout comparison in E2E
This commit is contained in:
parent
1f25df9061
commit
f40c388ef6
3 changed files with 33 additions and 11 deletions
|
|
@ -1,8 +1,10 @@
|
|||
def sorter(arr):
|
||||
print("codeflash stdout: Sorting list")
|
||||
for i in range(len(arr)):
|
||||
for j in range(len(arr) - 1):
|
||||
if arr[j] > arr[j + 1]:
|
||||
temp = arr[j]
|
||||
arr[j] = arr[j + 1]
|
||||
arr[j + 1] = temp
|
||||
print(f"result: {arr}")
|
||||
return arr
|
||||
|
|
@ -11,11 +11,15 @@ def run_test(expected_improvement_pct: int) -> bool:
|
|||
test_framework="pytest",
|
||||
min_improvement_x=1.0,
|
||||
coverage_expectations=[
|
||||
CoverageExpectation(function_name="sorter", expected_coverage=100.0, expected_lines=[2, 3, 4, 5, 6, 7, 8])
|
||||
CoverageExpectation(
|
||||
function_name="sorter", expected_coverage=100.0, expected_lines=[2, 3, 4, 5, 6, 7, 8, 9, 10]
|
||||
)
|
||||
],
|
||||
)
|
||||
cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve()
|
||||
return run_codeflash_command(cwd, config, expected_improvement_pct)
|
||||
return run_codeflash_command(
|
||||
cwd, config, expected_improvement_pct, ['print("codeflash stdout: Sorting list")', 'print(f"result: {arr}")']
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -63,19 +63,21 @@ def validate_coverage(stdout: str, expectations: list[CoverageExpectation]) -> b
|
|||
assert coverage_match, f"Failed to find coverage data for {expect.function_name}"
|
||||
|
||||
coverage = float(coverage_match.group(1))
|
||||
assert (
|
||||
coverage == expect.expected_coverage
|
||||
), f"Coverage was {coverage} instead of {expect.expected_coverage} for function: {expect.function_name}"
|
||||
assert coverage == expect.expected_coverage, (
|
||||
f"Coverage was {coverage} instead of {expect.expected_coverage} for function: {expect.function_name}"
|
||||
)
|
||||
|
||||
executed_lines = list(map(int, coverage_match.group(2).split(", ")))
|
||||
assert (
|
||||
executed_lines == expect.expected_lines
|
||||
), f"Executed lines were {executed_lines} instead of {expect.expected_lines} for function: {expect.function_name}"
|
||||
assert executed_lines == expect.expected_lines, (
|
||||
f"Executed lines were {executed_lines} instead of {expect.expected_lines} for function: {expect.function_name}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def run_codeflash_command(cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int) -> bool:
|
||||
def run_codeflash_command(
|
||||
cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int, expected_in_stdout: list[str] = None
|
||||
) -> bool:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
if config.trace_mode:
|
||||
return run_trace_test(cwd, config, expected_improvement_pct)
|
||||
|
|
@ -97,12 +99,21 @@ def run_codeflash_command(cwd: pathlib.Path, config: TestConfig, expected_improv
|
|||
return_code = process.wait()
|
||||
stdout = "".join(output)
|
||||
|
||||
if not validate_output(stdout, return_code, expected_improvement_pct, config):
|
||||
validated = validate_output(stdout, return_code, expected_improvement_pct, config)
|
||||
if not validated:
|
||||
# Write original file contents back to file
|
||||
path_to_file.write_text(file_contents, "utf-8")
|
||||
logging.info("Codeflash run did not meet expected requirements for testing, reverting file changes.")
|
||||
return False
|
||||
return True
|
||||
|
||||
if expected_in_stdout:
|
||||
stdout_validated = validate_stdout_in_candidate(stdout, expected_in_stdout)
|
||||
if not stdout_validated:
|
||||
logging.error("Failed to find expected output in candidate output")
|
||||
validated = False
|
||||
logging.info(f"Success: Expected output found in candidate output")
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
def build_command(cwd: pathlib.Path, config: TestConfig, test_root: pathlib.Path) -> list[str]:
|
||||
|
|
@ -164,6 +175,11 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int
|
|||
return True
|
||||
|
||||
|
||||
def validate_stdout_in_candidate(stdout: str, expected_in_stdout: list[str]) -> bool:
|
||||
candidate_output = stdout[stdout.find("INFO Best candidate") : stdout.find("Best Candidate Explanation")]
|
||||
return all(expected in candidate_output for expected in expected_in_stdout)
|
||||
|
||||
|
||||
def run_trace_test(cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int) -> bool:
|
||||
# First command: Run the tracer
|
||||
test_root = cwd / "tests" / (config.test_framework or "")
|
||||
|
|
|
|||
Loading…
Reference in a new issue