mirror of
https://github.com/codeflash-ai/codeflash.git
synced 2026-05-04 18:25:17 +00:00
Fix excessive logging in create_pr.py that creates 43MB+ log files
**Problem:**
Line 38 of create_pr.py logged all keys from function_to_tests dictionary
using `list(function_to_tests.keys())`. For large codebases like budibase
(1012 functions), this creates massive log files (43MB+) with a single
DEBUG statement printing thousands of function names.
**Root Cause:**
Debug logging statement was designed for small projects but became
problematic when used with monorepos containing hundreds of packages.
**Evidence:**
- Trace ID: 3d2ad2f0-254a-4401-9c93-84f691acabf0 (43MB log, 534K lines)
- Line 533922 shows list of 1000+ function keys in single log entry
- Affects 4/22 logs (18%) in recent optimization run
- Each occurrence adds ~100KB to log file
**Fix:**
Changed line 38 from:
logger.debug(f"[PR-DEBUG] function_to_tests keys: {list(function_to_tests.keys())}")
to:
logger.debug(f"[PR-DEBUG] function_to_tests has {len(function_to_tests)} keys")
This logs only the count instead of the full list, reducing log output
from ~100KB to ~50 bytes per call.
**Impact:**
- Severity: MEDIUM (doesn't break functionality, but bloats logs)
- Systematic: Reproducible on every optimization run with large codebases
- Benefits: Significantly reduces log file sizes for monorepo projects
**Testing:**
- Added 2 regression tests in test_create_pr_logging_bug.py
- Tests verify count is logged, not full key list
- Tests verify log output stays under 10KB (vs 100KB+ before)
- All existing tests pass
- Linting passes (uv run prek)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
8d51e2d310
commit
c980d43b9f
2 changed files with 148 additions and 1 deletions
|
|
@ -35,7 +35,7 @@ def existing_tests_source_for(
|
|||
logger.debug(
|
||||
f"[PR-DEBUG] existing_tests_source_for called with func={function_qualified_name_with_modules_from_root}"
|
||||
)
|
||||
logger.debug(f"[PR-DEBUG] function_to_tests keys: {list(function_to_tests.keys())}")
|
||||
logger.debug(f"[PR-DEBUG] function_to_tests has {len(function_to_tests)} keys")
|
||||
logger.debug(f"[PR-DEBUG] original_runtimes_all has {len(original_runtimes_all)} entries")
|
||||
logger.debug(f"[PR-DEBUG] optimized_runtimes_all has {len(optimized_runtimes_all)} entries")
|
||||
test_files = function_to_tests.get(function_qualified_name_with_modules_from_root)
|
||||
|
|
|
|||
147
tests/test_result/test_create_pr_logging_bug.py
Normal file
147
tests/test_result/test_create_pr_logging_bug.py
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
"""Test for Issue #9: Excessive logging in create_pr.py
|
||||
|
||||
Verifies that function_to_tests logging uses count instead of full key list.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from codeflash.result.create_pr import existing_tests_source_for
|
||||
from codeflash.verification.verification_utils import TestConfig
|
||||
|
||||
|
||||
def test_function_to_tests_logging_uses_count_not_full_list():
|
||||
"""
|
||||
Test that function_to_tests debug logging outputs count, not all keys.
|
||||
|
||||
Bug: Line 38 of create_pr.py logs `list(function_to_tests.keys())` which
|
||||
creates massive log files (43MB+) when function_to_tests has thousands
|
||||
of entries (e.g., budibase monorepo with 1012 functions).
|
||||
|
||||
Fix: Should log only `len(function_to_tests)` instead.
|
||||
"""
|
||||
# Create a large function_to_tests dict (simulate budibase scale)
|
||||
function_to_tests = {
|
||||
f"package{i}.module{j}.function{k}": set()
|
||||
for i in range(10)
|
||||
for j in range(10)
|
||||
for k in range(10)
|
||||
}
|
||||
# Total: 1000 keys
|
||||
|
||||
# Capture debug logs
|
||||
log_stream = StringIO()
|
||||
handler = logging.StreamHandler(log_stream)
|
||||
handler.setLevel(logging.DEBUG)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
# Get the 'rich' logger used by console.py
|
||||
logger = logging.getLogger('rich')
|
||||
original_level = logger.level
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
try:
|
||||
# Mock test_cfg
|
||||
test_cfg = Mock(spec=TestConfig)
|
||||
test_cfg.test_framework = "jest"
|
||||
|
||||
# Call the function
|
||||
existing_tests_source_for(
|
||||
function_qualified_name_with_modules_from_root="test.function",
|
||||
function_to_tests=function_to_tests,
|
||||
test_cfg=test_cfg,
|
||||
original_runtimes_all={},
|
||||
optimized_runtimes_all={},
|
||||
test_files_registry=None,
|
||||
)
|
||||
|
||||
# Get log output
|
||||
log_output = log_stream.getvalue()
|
||||
|
||||
# ASSERTION 1: Should log the count
|
||||
assert "function_to_tests" in log_output, "Should mention function_to_tests in logs"
|
||||
assert "1000" in log_output or "len" in log_output, \
|
||||
"Should log count of function_to_tests, not full list"
|
||||
|
||||
# ASSERTION 2: Should NOT log all keys (would create massive logs)
|
||||
# Check that we don't have dozens of "package0.module" strings
|
||||
package_mentions = log_output.count("package0.module")
|
||||
assert package_mentions < 10, \
|
||||
f"Should not log all {len(function_to_tests)} keys. " \
|
||||
f"Found {package_mentions} package mentions, which suggests full list logging. " \
|
||||
f"Log output size: {len(log_output)} bytes"
|
||||
|
||||
# ASSERTION 3: Log output should be reasonable size (< 10KB for this debug line)
|
||||
# The buggy version would produce ~100KB+ for 1000 keys
|
||||
assert len(log_output) < 10000, \
|
||||
f"Log output too large ({len(log_output)} bytes). " \
|
||||
f"This suggests logging full key list instead of count."
|
||||
|
||||
finally:
|
||||
logger.removeHandler(handler)
|
||||
logger.setLevel(original_level)
|
||||
|
||||
|
||||
def test_function_to_tests_logging_with_small_dict():
|
||||
"""
|
||||
Test that logging still works correctly with small function_to_tests dict.
|
||||
|
||||
This ensures the fix doesn't break the normal case.
|
||||
"""
|
||||
# Small dict (< 10 entries)
|
||||
function_to_tests = {
|
||||
"module.function1": set(),
|
||||
"module.function2": set(),
|
||||
}
|
||||
|
||||
# Capture debug logs
|
||||
log_stream = StringIO()
|
||||
handler = logging.StreamHandler(log_stream)
|
||||
handler.setLevel(logging.DEBUG)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
# Get the 'rich' logger used by console.py
|
||||
logger = logging.getLogger('rich')
|
||||
original_level = logger.level
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
try:
|
||||
# Mock test_cfg
|
||||
test_cfg = Mock(spec=TestConfig)
|
||||
test_cfg.test_framework = "jest"
|
||||
|
||||
# Call the function
|
||||
existing_tests_source_for(
|
||||
function_qualified_name_with_modules_from_root="test.function",
|
||||
function_to_tests=function_to_tests,
|
||||
test_cfg=test_cfg,
|
||||
original_runtimes_all={},
|
||||
optimized_runtimes_all={},
|
||||
test_files_registry=None,
|
||||
)
|
||||
|
||||
# Get log output
|
||||
log_output = log_stream.getvalue()
|
||||
|
||||
# Should mention function_to_tests
|
||||
assert "function_to_tests" in log_output
|
||||
|
||||
# Log should be reasonable size
|
||||
assert len(log_output) < 5000, \
|
||||
f"Even with small dict, log output is too large ({len(log_output)} bytes)"
|
||||
|
||||
finally:
|
||||
logger.removeHandler(handler)
|
||||
logger.setLevel(original_level)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Loading…
Reference in a new issue