Fix excessive logging in create_pr.py that creates 43MB+ log files

**Problem:**
Line 38 of create_pr.py logged all keys from function_to_tests dictionary
using `list(function_to_tests.keys())`. For large codebases like budibase
(1012 functions), this creates massive log files (43MB+) with a single
DEBUG statement printing thousands of function names.

**Root Cause:**
Debug logging statement was designed for small projects but became
problematic when used with monorepos containing hundreds of packages.

**Evidence:**
- Trace ID: 3d2ad2f0-254a-4401-9c93-84f691acabf0 (43MB log, 534K lines)
- Line 533922 shows list of 1000+ function keys in single log entry
- Affects 4/22 logs (18%) in recent optimization run
- Each occurrence adds ~100KB to log file

**Fix:**
Changed line 38 from:
  logger.debug(f"[PR-DEBUG] function_to_tests keys: {list(function_to_tests.keys())}")
to:
  logger.debug(f"[PR-DEBUG] function_to_tests has {len(function_to_tests)} keys")

This logs only the count instead of the full list, reducing log output
from ~100KB to ~50 bytes per call.

**Impact:**
- Severity: MEDIUM (doesn't break functionality, but bloats logs)
- Systematic: Reproducible on every optimization run with large codebases
- Benefits: Significantly reduces log file sizes for monorepo projects

**Testing:**
- Added 2 regression tests in test_create_pr_logging_bug.py
- Tests verify count is logged, not full key list
- Tests verify log output stays under 10KB (vs 100KB+ before)
- All existing tests pass
- Linting passes (uv run prek)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
mohammed ahmed 2026-04-05 03:20:12 +00:00
parent 8d51e2d310
commit c980d43b9f
2 changed files with 148 additions and 1 deletions

View file

@ -35,7 +35,7 @@ def existing_tests_source_for(
logger.debug(
f"[PR-DEBUG] existing_tests_source_for called with func={function_qualified_name_with_modules_from_root}"
)
logger.debug(f"[PR-DEBUG] function_to_tests keys: {list(function_to_tests.keys())}")
logger.debug(f"[PR-DEBUG] function_to_tests has {len(function_to_tests)} keys")
logger.debug(f"[PR-DEBUG] original_runtimes_all has {len(original_runtimes_all)} entries")
logger.debug(f"[PR-DEBUG] optimized_runtimes_all has {len(optimized_runtimes_all)} entries")
test_files = function_to_tests.get(function_qualified_name_with_modules_from_root)

View file

@ -0,0 +1,147 @@
"""Test for Issue #9: Excessive logging in create_pr.py
Verifies that function_to_tests logging uses count instead of full key list.
"""
import logging
from io import StringIO
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from codeflash.result.create_pr import existing_tests_source_for
from codeflash.verification.verification_utils import TestConfig
def test_function_to_tests_logging_uses_count_not_full_list():
"""
Test that function_to_tests debug logging outputs count, not all keys.
Bug: Line 38 of create_pr.py logs `list(function_to_tests.keys())` which
creates massive log files (43MB+) when function_to_tests has thousands
of entries (e.g., budibase monorepo with 1012 functions).
Fix: Should log only `len(function_to_tests)` instead.
"""
# Create a large function_to_tests dict (simulate budibase scale)
function_to_tests = {
f"package{i}.module{j}.function{k}": set()
for i in range(10)
for j in range(10)
for k in range(10)
}
# Total: 1000 keys
# Capture debug logs
log_stream = StringIO()
handler = logging.StreamHandler(log_stream)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
# Get the 'rich' logger used by console.py
logger = logging.getLogger('rich')
original_level = logger.level
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
try:
# Mock test_cfg
test_cfg = Mock(spec=TestConfig)
test_cfg.test_framework = "jest"
# Call the function
existing_tests_source_for(
function_qualified_name_with_modules_from_root="test.function",
function_to_tests=function_to_tests,
test_cfg=test_cfg,
original_runtimes_all={},
optimized_runtimes_all={},
test_files_registry=None,
)
# Get log output
log_output = log_stream.getvalue()
# ASSERTION 1: Should log the count
assert "function_to_tests" in log_output, "Should mention function_to_tests in logs"
assert "1000" in log_output or "len" in log_output, \
"Should log count of function_to_tests, not full list"
# ASSERTION 2: Should NOT log all keys (would create massive logs)
# Check that we don't have dozens of "package0.module" strings
package_mentions = log_output.count("package0.module")
assert package_mentions < 10, \
f"Should not log all {len(function_to_tests)} keys. " \
f"Found {package_mentions} package mentions, which suggests full list logging. " \
f"Log output size: {len(log_output)} bytes"
# ASSERTION 3: Log output should be reasonable size (< 10KB for this debug line)
# The buggy version would produce ~100KB+ for 1000 keys
assert len(log_output) < 10000, \
f"Log output too large ({len(log_output)} bytes). " \
f"This suggests logging full key list instead of count."
finally:
logger.removeHandler(handler)
logger.setLevel(original_level)
def test_function_to_tests_logging_with_small_dict():
"""
Test that logging still works correctly with small function_to_tests dict.
This ensures the fix doesn't break the normal case.
"""
# Small dict (< 10 entries)
function_to_tests = {
"module.function1": set(),
"module.function2": set(),
}
# Capture debug logs
log_stream = StringIO()
handler = logging.StreamHandler(log_stream)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
# Get the 'rich' logger used by console.py
logger = logging.getLogger('rich')
original_level = logger.level
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
try:
# Mock test_cfg
test_cfg = Mock(spec=TestConfig)
test_cfg.test_framework = "jest"
# Call the function
existing_tests_source_for(
function_qualified_name_with_modules_from_root="test.function",
function_to_tests=function_to_tests,
test_cfg=test_cfg,
original_runtimes_all={},
optimized_runtimes_all={},
test_files_registry=None,
)
# Get log output
log_output = log_stream.getvalue()
# Should mention function_to_tests
assert "function_to_tests" in log_output
# Log should be reasonable size
assert len(log_output) < 5000, \
f"Even with small dict, log output is too large ({len(log_output)} bytes)"
finally:
logger.removeHandler(handler)
logger.setLevel(original_level)
if __name__ == "__main__":
pytest.main([__file__, "-v"])