diff --git a/code_to_optimize/text_processor.py b/code_to_optimize/text_processor.py new file mode 100644 index 000000000..8fdfb9e70 --- /dev/null +++ b/code_to_optimize/text_processor.py @@ -0,0 +1,190 @@ +class TextProcessor: + def __init__(self): + self.version = "0.1" + + def find_unique_words(self, text): + stop_words = { + "a", + "about", + "above", + "after", + "again", + "against", + "ain", + "all", + "am", + "an", + "and", + "any", + "are", + "aren", + "aren't", + "as", + "at", + "be", + "because", + "been", + "before", + "being", + "below", + "between", + "both", + "but", + "by", + "can", + "couldn", + "couldn't", + "did", + "didn", + "didn't", + "do", + "does", + "doesn", + "doesn't", + "doing", + "don", + "don't", + "down", + "during", + "each", + "few", + "for", + "from", + "further", + "had", + "hadn", + "hadn't", + "has", + "hasn", + "hasn't", + "have", + "haven", + "haven't", + "having", + "he", + "her", + "here", + "hers", + "herself", + "him", + "himself", + "his", + "how", + "i", + "if", + "in", + "into", + "is", + "isn", + "isn't", + "it", + "it's", + "its", + "itself", + "just", + "ll", + "let's", + "ma", + "me", + "mightn", + "mightn't", + "more", + "most", + "mustn", + "mustn't", + "my", + "myself", + "needn", + "needn't", + "no", + "nor", + "not", + "now", + "o", + "of", + "off", + "on", + "once", + "only", + "or", + "other", + "our", + "ours", + "ourselves", + "out", + "over", + "own", + "re", + "s", + "same", + "shan", + "shan't", + "she", + "she's", + "should", + "should've", + "shouldn", + "shouldn't", + "so", + "some", + "such", + "t", + "than", + "that", + "that'll", + "the", + "their", + "theirs", + "them", + "themselves", + "then", + "there", + "these", + "they", + "this", + "those", + "through", + "to", + "too", + "under", + "until", + "up", + "ve", + "very", + "was", + "wasn", + "wasn't", + "we", + "were", + "weren", + "weren't", + "what", + "when", + "where", + "which", + "while", + "who", + "whom", + "why", + "will", + "with", + "won", + "won't", + "wouldn", + "wouldn't", + "y", + "you", + "you'd", + "you'll", + "you're", + "you've", + "your", + "yours", + "yourself", + "yourselves", + } + + words = text.lower().split() + unique_words = [word for word in words if word not in stop_words] + + return unique_words