refactor: remove profanity filtering from optimization pipeline (#2288)

## Summary
- Removes `profanity_regex` and `profanity_words` from
`postprocess_constants.py`
- Removes `remove_profanity_from_explanation` from the optimization
pipeline
- Removes associated test
This commit is contained in:
Kevin Turcios 2026-01-24 23:46:03 -05:00 committed by GitHub
parent 7aa4da74f8
commit df9bef20c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 0 additions and 1531 deletions

View file

@ -1,6 +1,5 @@
tests/optimizer/__init__.py
optimizer/__init__.py
optimizer/code_utils/postprocess_constants.py
optimizer/code_utils/__init__.py
authapp/__init__.py
authapp/tests.py

File diff suppressed because it is too large Load diff

View file

@ -13,7 +13,6 @@ import sentry_sdk
from libcst import CSTTransformer, CSTVisitor, Expr, IndentedBlock, SimpleStatementLine, SimpleString
from aiservice.common_utils import safe_isort
from optimizer.code_utils.postprocess_constants import profanity_regex
from optimizer.models import CodeExplanationAndID
from optimizer.optimizer_utils import compare_unparsed_ast_to_source, unparse_parse_source
from testgen.instrumentation.edit_generated_test import parse_module_to_cst
@ -247,20 +246,6 @@ def filter_ellipsis_containing_code(
return new_optimized_code_and_explanations
def remove_profanity_from_explanation(
_original_source_code: str, optimized_code_and_explanations: list[CodeExplanationAndID]
) -> list[CodeExplanationAndID]:
new_optimized_code_and_explanations: list[CodeExplanationAndID] = []
for ce in optimized_code_and_explanations:
if profanity_regex.search(ce.explanation):
logging.warning("Profanity detected in explanation for optimization %s. Skipping this optimization.", ce.id)
continue
new_optimized_code_and_explanations.append(
CodeExplanationAndID(cst_module=ce.cst_module, explanation=ce.explanation, id=ce.id)
)
return new_optimized_code_and_explanations
def _strip_comments_from_code(code: str) -> str:
"""Remove all comments from Python code while preserving strings and their content.
@ -600,7 +585,6 @@ def optimizations_postprocessing_pipeline(
original_source_code: str, optimized_code_and_explanations: list[CodeExplanationAndID]
) -> list[CodeExplanationAndID]:
pipeline = [
remove_profanity_from_explanation,
fix_missing_docstring, # We want to deduplicate with the fixed docstrings included
clean_extraneous_comments_pipeline, # Clean comments added to unchanged code
fix_forward_references, # Add future annotations for forward references

View file

@ -10,34 +10,9 @@ from optimizer.postprocess import (
filter_ellipsis_containing_code,
fix_missing_docstring,
optimizations_postprocessing_pipeline,
remove_profanity_from_explanation,
)
def test_remove_profanity_from_explanation() -> None:
original_code = "def example(): pass"
explanations_with_profanity = [
CodeExplanationAndID(libcst.parse_module("print('hi')"), "This is a55hole, we should remove it.", "1"),
CodeExplanationAndID(libcst.parse_module("print('hi')"), "fuck, we should remove it.", "2"),
]
safe_explanations = [
CodeExplanationAndID(libcst.parse_module("print('hi')"), "This isa55hole, we should not remove it.", "1"),
CodeExplanationAndID(libcst.parse_module("print('hi')"), "funck, we should not remove it", "2"),
]
good_result = remove_profanity_from_explanation(
_original_source_code=original_code, optimized_code_and_explanations=explanations_with_profanity
)
assert good_result == []
bad_result = remove_profanity_from_explanation(
_original_source_code=original_code, optimized_code_and_explanations=safe_explanations
)
assert bad_result == safe_explanations
def test_cleanup_explanations_removes_code_markers() -> None:
original_code = "def example(): pass"
empty_cst_module = cst.parse_module("")