fix: pin PYTHONHASHSEED=0 in test env and enhance diff diagnostics

Set PYTHONHASHSEED=0 in test subprocess environments so original and
candidate runs use identical hash behavior, eliminating a source of
non-deterministic return-value comparisons.

Also upgrade diff logging from debug to info level with actual types
and repr values for DID_PASS, RETURN_VALUE, and STDOUT diffs.
This commit is contained in:
Kevin Turcios 2026-04-10 06:38:08 -05:00
parent e191f74aa6
commit 986654b7e6
2 changed files with 24 additions and 7 deletions

View file

@ -3253,6 +3253,11 @@ class FunctionOptimizer:
test_env["CODEFLASH_TEST_ITERATION"] = str(codeflash_test_iteration)
test_env["CODEFLASH_TRACER_DISABLE"] = str(codeflash_tracer_disable)
test_env["CODEFLASH_LOOP_INDEX"] = str(codeflash_loop_index)
# Pin PYTHONHASHSEED so original and candidate test processes use the same hash seed.
# Without this, each subprocess gets a random seed, which can cause non-deterministic
# iteration order in sets/dicts and lead to flaky return-value comparisons.
if "PYTHONHASHSEED" not in test_env:
test_env["PYTHONHASHSEED"] = "0"
return test_env
def line_profiler_step(

View file

@ -111,6 +111,11 @@ def compare_test_results(
original_pytest_error=original_pytest_error,
)
)
logger.info(
f"[DIFF] scope=DID_PASS test_id={test_id} "
f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} "
f"test_type={original_test_result.test_type} cand_error={cdd_pytest_error[:200] if cdd_pytest_error else 'none'}"
)
elif not pass_fail_only and not comparator(
original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj
@ -129,13 +134,15 @@ def compare_test_results(
)
try:
logger.debug(
f"File Name: {original_test_result.file_name}\n"
f"Test Type: {original_test_result.test_type}\n"
f"Verification Type: {original_test_result.verification_type}\n"
f"Invocation ID: {original_test_result.id}\n"
f"Original return value: {original_test_result.return_value}\n"
f"Candidate return value: {cdd_test_result.return_value}\n"
_orig_rv = original_test_result.return_value
_cand_rv = cdd_test_result.return_value
logger.info(
f"[DIFF] scope=RETURN_VALUE test_id={test_id} "
f"orig_type={type(_orig_rv).__name__} cand_type={type(_cand_rv).__name__} "
f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} "
f"test_type={original_test_result.test_type} "
f"orig_repr={safe_repr(_orig_rv)[:200]} "
f"cand_repr={safe_repr(_cand_rv)[:200]}"
)
except Exception as e:
logger.error(e)
@ -156,6 +163,11 @@ def compare_test_results(
original_pytest_error=original_pytest_error,
)
)
logger.info(
f"[DIFF] scope=STDOUT test_id={test_id} "
f"orig_stdout={str(original_test_result.stdout)[:200]} "
f"cand_stdout={str(cdd_test_result.stdout)[:200]}"
)
sys.setrecursionlimit(original_recursion_limit)
logger.info(