Fix excessive logging in create_pr.py that creates 43MB+ log files

**Problem:** Line 38 of create_pr.py logged all keys from function_to_tests dictionary using `list(function_to_tests.keys())`. For large codebases like budibase (1012 functions), this creates massive log files (43MB+) with a single DEBUG statement printing thousands of function names. **Root Cause:** Debug logging statement was designed for small projects but became problematic when used with monorepos containing hundreds of packages. **Evidence:** - Trace ID: 3d2ad2f0-254a-4401-9c93-84f691acabf0 (43MB log, 534K lines) - Line 533922 shows list of 1000+ function keys in single log entry - Affects 4/22 logs (18%) in recent optimization run - Each occurrence adds ~100KB to log file **Fix:** Changed line 38 from: logger.debug(f"[PR-DEBUG] function_to_tests keys: {list(function_to_tests.keys())}") to: logger.debug(f"[PR-DEBUG] function_to_tests has {len(function_to_tests)} keys") This logs only the count instead of the full list, reducing log output from ~100KB to ~50 bytes per call. **Impact:** - Severity: MEDIUM (doesn't break functionality, but bloats logs) - Systematic: Reproducible on every optimization run with large codebases - Benefits: Significantly reduces log file sizes for monorepo projects **Testing:** - Added 2 regression tests in test_create_pr_logging_bug.py - Tests verify count is logged, not full key list - Tests verify log output stays under 10KB (vs 100KB+ before) - All existing tests pass - Linting passes (uv run prek) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-05-04 18:25:17 +00:00 · 2026-04-05 03:20:12 +00:00 · 2026-04-05 03:20:12 +00:00 · c980d43b9f
commit c980d43b9f
parent 8d51e2d310
2 changed files with 148 additions and 1 deletions
--- a/codeflash/result/create_pr.py
+++ b/codeflash/result/create_pr.py
@ -35,7 +35,7 @@ def existing_tests_source_for(
    logger.debug(
        f"[PR-DEBUG] existing_tests_source_for called with func={function_qualified_name_with_modules_from_root}"
    )
-    logger.debug(f"[PR-DEBUG] function_to_tests keys: {list(function_to_tests.keys())}")
+    logger.debug(f"[PR-DEBUG] function_to_tests has {len(function_to_tests)} keys")
    logger.debug(f"[PR-DEBUG] original_runtimes_all has {len(original_runtimes_all)} entries")
    logger.debug(f"[PR-DEBUG] optimized_runtimes_all has {len(optimized_runtimes_all)} entries")
    test_files = function_to_tests.get(function_qualified_name_with_modules_from_root)
--- a/tests/test_result/test_create_pr_logging_bug.py
+++ b/tests/test_result/test_create_pr_logging_bug.py
@ -0,0 +1,147 @@
+"""Test for Issue #9: Excessive logging in create_pr.py
+
+Verifies that function_to_tests logging uses count instead of full key list.
+"""
+
+import logging
+from io import StringIO
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+
+from codeflash.result.create_pr import existing_tests_source_for
+from codeflash.verification.verification_utils import TestConfig
+
+
+def test_function_to_tests_logging_uses_count_not_full_list():
+    """
+    Test that function_to_tests debug logging outputs count, not all keys.
+
+    Bug: Line 38 of create_pr.py logs `list(function_to_tests.keys())` which
+    creates massive log files (43MB+) when function_to_tests has thousands
+    of entries (e.g., budibase monorepo with 1012 functions).
+
+    Fix: Should log only `len(function_to_tests)` instead.
+    """
+    # Create a large function_to_tests dict (simulate budibase scale)
+    function_to_tests = {
+        f"package{i}.module{j}.function{k}": set()
+        for i in range(10)
+        for j in range(10)
+        for k in range(10)
+    }
+    # Total: 1000 keys
+
+    # Capture debug logs
+    log_stream = StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(message)s')
+    handler.setFormatter(formatter)
+
+    # Get the 'rich' logger used by console.py
+    logger = logging.getLogger('rich')
+    original_level = logger.level
+    logger.addHandler(handler)
+    logger.setLevel(logging.DEBUG)
+
+    try:
+        # Mock test_cfg
+        test_cfg = Mock(spec=TestConfig)
+        test_cfg.test_framework = "jest"
+
+        # Call the function
+        existing_tests_source_for(
+            function_qualified_name_with_modules_from_root="test.function",
+            function_to_tests=function_to_tests,
+            test_cfg=test_cfg,
+            original_runtimes_all={},
+            optimized_runtimes_all={},
+            test_files_registry=None,
+        )
+
+        # Get log output
+        log_output = log_stream.getvalue()
+
+        # ASSERTION 1: Should log the count
+        assert "function_to_tests" in log_output, "Should mention function_to_tests in logs"
+        assert "1000" in log_output or "len" in log_output, \
+            "Should log count of function_to_tests, not full list"
+
+        # ASSERTION 2: Should NOT log all keys (would create massive logs)
+        # Check that we don't have dozens of "package0.module" strings
+        package_mentions = log_output.count("package0.module")
+        assert package_mentions < 10, \
+            f"Should not log all {len(function_to_tests)} keys. " \
+            f"Found {package_mentions} package mentions, which suggests full list logging. " \
+            f"Log output size: {len(log_output)} bytes"
+
+        # ASSERTION 3: Log output should be reasonable size (< 10KB for this debug line)
+        # The buggy version would produce ~100KB+ for 1000 keys
+        assert len(log_output) < 10000, \
+            f"Log output too large ({len(log_output)} bytes). " \
+            f"This suggests logging full key list instead of count."
+
+    finally:
+        logger.removeHandler(handler)
+        logger.setLevel(original_level)
+
+
+def test_function_to_tests_logging_with_small_dict():
+    """
+    Test that logging still works correctly with small function_to_tests dict.
+
+    This ensures the fix doesn't break the normal case.
+    """
+    # Small dict (< 10 entries)
+    function_to_tests = {
+        "module.function1": set(),
+        "module.function2": set(),
+    }
+
+    # Capture debug logs
+    log_stream = StringIO()
+    handler = logging.StreamHandler(log_stream)
+    handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(message)s')
+    handler.setFormatter(formatter)
+
+    # Get the 'rich' logger used by console.py
+    logger = logging.getLogger('rich')
+    original_level = logger.level
+    logger.addHandler(handler)
+    logger.setLevel(logging.DEBUG)
+
+    try:
+        # Mock test_cfg
+        test_cfg = Mock(spec=TestConfig)
+        test_cfg.test_framework = "jest"
+
+        # Call the function
+        existing_tests_source_for(
+            function_qualified_name_with_modules_from_root="test.function",
+            function_to_tests=function_to_tests,
+            test_cfg=test_cfg,
+            original_runtimes_all={},
+            optimized_runtimes_all={},
+            test_files_registry=None,
+        )
+
+        # Get log output
+        log_output = log_stream.getvalue()
+
+        # Should mention function_to_tests
+        assert "function_to_tests" in log_output
+
+        # Log should be reasonable size
+        assert len(log_output) < 5000, \
+            f"Even with small dict, log output is too large ({len(log_output)} bytes)"
+
+    finally:
+        logger.removeHandler(handler)
+        logger.setLevel(original_level)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])