From 82ec301fad6f78921dee84b9a98fcd22e76427c7 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 06:49:43 -0500 Subject: [PATCH] chore: remove diagnostic logging from compare_test_results --- codeflash/verification/equivalence.py | 53 ++++----------------------- 1 file changed, 7 insertions(+), 46 deletions(-) diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py index 68cf216de..f660e35ea 100644 --- a/codeflash/verification/equivalence.py +++ b/codeflash/verification/equivalence.py @@ -41,17 +41,11 @@ def compare_test_results( ) test_diffs: list[TestDiff] = [] did_all_timeout: bool = True - _matched_count = 0 - _skipped_cdd_only = 0 - _skipped_init_state = 0 - _skipped_none = 0 - _timed_out_count = 0 for test_id in test_ids_superset: original_test_result = original_results.get_by_unique_invocation_loop_id(test_id) cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id) if cdd_test_result is not None and original_test_result is None: - _skipped_cdd_only += 1 continue # If helper function instance_state verification is not present, that's ok. continue if ( @@ -59,15 +53,11 @@ def compare_test_results( and original_test_result.verification_type == VerificationType.INIT_STATE_HELPER and cdd_test_result is None ): - _skipped_init_state += 1 continue if original_test_result is None or cdd_test_result is None: - _skipped_none += 1 continue - _matched_count += 1 did_all_timeout = did_all_timeout and original_test_result.timed_out if original_test_result.timed_out: - _timed_out_count += 1 continue superset_obj = False if original_test_result.verification_type and ( @@ -111,11 +101,6 @@ def compare_test_results( original_pytest_error=original_pytest_error, ) ) - logger.info( - f"[DIFF] scope=DID_PASS test_id={test_id} " - f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} " - f"test_type={original_test_result.test_type} cand_error={cdd_pytest_error[:200] if cdd_pytest_error else 'none'}" - ) elif not pass_fail_only and not comparator( original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj @@ -134,15 +119,13 @@ def compare_test_results( ) try: - _orig_rv = original_test_result.return_value - _cand_rv = cdd_test_result.return_value - logger.info( - f"[DIFF] scope=RETURN_VALUE test_id={test_id} " - f"orig_type={type(_orig_rv).__name__} cand_type={type(_cand_rv).__name__} " - f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} " - f"test_type={original_test_result.test_type} " - f"orig_repr={safe_repr(_orig_rv)[:200]} " - f"cand_repr={safe_repr(_cand_rv)[:200]}" + logger.debug( + f"File Name: {original_test_result.file_name}\n" + f"Test Type: {original_test_result.test_type}\n" + f"Verification Type: {original_test_result.verification_type}\n" + f"Invocation ID: {original_test_result.id}\n" + f"Original return value: {original_test_result.return_value}\n" + f"Candidate return value: {cdd_test_result.return_value}\n" ) except Exception as e: logger.error(e) @@ -163,30 +146,8 @@ def compare_test_results( original_pytest_error=original_pytest_error, ) ) - logger.info( - f"[DIFF] scope=STDOUT test_id={test_id} " - f"orig_stdout={str(original_test_result.stdout)[:200]} " - f"cand_stdout={str(cdd_test_result.stdout)[:200]}" - ) sys.setrecursionlimit(original_recursion_limit) - logger.info( - f"[compare_test_results] superset={len(test_ids_superset)} matched={_matched_count} " - f"skipped(cdd_only={_skipped_cdd_only} init_state={_skipped_init_state} none={_skipped_none}) " - f"timed_out={_timed_out_count} did_all_timeout={did_all_timeout} diffs={len(test_diffs)} " - f"pass_fail_only={pass_fail_only} orig_len={len(original_results)} cand_len={len(candidate_results)}" - ) - if did_all_timeout and _matched_count > 0 and _matched_count <= 3: - # Log a few sample matched IDs for debugging - _sample_ids = [] - for test_id in test_ids_superset: - orig = original_results.get_by_unique_invocation_loop_id(test_id) - cand = candidate_results.get_by_unique_invocation_loop_id(test_id) - if orig is not None and cand is not None: - _sample_ids.append(f" id={test_id} orig_timed_out={orig.timed_out} orig_pass={orig.did_pass}") - if len(_sample_ids) >= 3: - break - logger.info(f"[compare_test_results] sample matched: {_sample_ids}") if did_all_timeout: return False, test_diffs return len(test_diffs) == 0, test_diffs