stdout comparison in E2E

This commit is contained in:
Kevin Turcios 2025-02-18 05:25:31 -05:00
parent 1f25df9061
commit f40c388ef6
3 changed files with 33 additions and 11 deletions

View file

@ -1,8 +1,10 @@
def sorter(arr):
print("codeflash stdout: Sorting list")
for i in range(len(arr)):
for j in range(len(arr) - 1):
if arr[j] > arr[j + 1]:
temp = arr[j]
arr[j] = arr[j + 1]
arr[j + 1] = temp
print(f"result: {arr}")
return arr

View file

@ -11,11 +11,15 @@ def run_test(expected_improvement_pct: int) -> bool:
test_framework="pytest",
min_improvement_x=1.0,
coverage_expectations=[
CoverageExpectation(function_name="sorter", expected_coverage=100.0, expected_lines=[2, 3, 4, 5, 6, 7, 8])
CoverageExpectation(
function_name="sorter", expected_coverage=100.0, expected_lines=[2, 3, 4, 5, 6, 7, 8, 9, 10]
)
],
)
cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve()
return run_codeflash_command(cwd, config, expected_improvement_pct)
return run_codeflash_command(
cwd, config, expected_improvement_pct, ['print("codeflash stdout: Sorting list")', 'print(f"result: {arr}")']
)
if __name__ == "__main__":

View file

@ -63,19 +63,21 @@ def validate_coverage(stdout: str, expectations: list[CoverageExpectation]) -> b
assert coverage_match, f"Failed to find coverage data for {expect.function_name}"
coverage = float(coverage_match.group(1))
assert (
coverage == expect.expected_coverage
), f"Coverage was {coverage} instead of {expect.expected_coverage} for function: {expect.function_name}"
assert coverage == expect.expected_coverage, (
f"Coverage was {coverage} instead of {expect.expected_coverage} for function: {expect.function_name}"
)
executed_lines = list(map(int, coverage_match.group(2).split(", ")))
assert (
executed_lines == expect.expected_lines
), f"Executed lines were {executed_lines} instead of {expect.expected_lines} for function: {expect.function_name}"
assert executed_lines == expect.expected_lines, (
f"Executed lines were {executed_lines} instead of {expect.expected_lines} for function: {expect.function_name}"
)
return True
def run_codeflash_command(cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int) -> bool:
def run_codeflash_command(
cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int, expected_in_stdout: list[str] = None
) -> bool:
logging.basicConfig(level=logging.INFO)
if config.trace_mode:
return run_trace_test(cwd, config, expected_improvement_pct)
@ -97,12 +99,21 @@ def run_codeflash_command(cwd: pathlib.Path, config: TestConfig, expected_improv
return_code = process.wait()
stdout = "".join(output)
if not validate_output(stdout, return_code, expected_improvement_pct, config):
validated = validate_output(stdout, return_code, expected_improvement_pct, config)
if not validated:
# Write original file contents back to file
path_to_file.write_text(file_contents, "utf-8")
logging.info("Codeflash run did not meet expected requirements for testing, reverting file changes.")
return False
return True
if expected_in_stdout:
stdout_validated = validate_stdout_in_candidate(stdout, expected_in_stdout)
if not stdout_validated:
logging.error("Failed to find expected output in candidate output")
validated = False
logging.info(f"Success: Expected output found in candidate output")
return validated
def build_command(cwd: pathlib.Path, config: TestConfig, test_root: pathlib.Path) -> list[str]:
@ -164,6 +175,11 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int
return True
def validate_stdout_in_candidate(stdout: str, expected_in_stdout: list[str]) -> bool:
candidate_output = stdout[stdout.find("INFO Best candidate") : stdout.find("Best Candidate Explanation")]
return all(expected in candidate_output for expected in expected_in_stdout)
def run_trace_test(cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int) -> bool:
# First command: Run the tracer
test_root = cwd / "tests" / (config.test_framework or "")