diff --git a/tagger.py b/tagger.py index 34d0da7..63ddabb 100644 --- a/tagger.py +++ b/tagger.py @@ -33,7 +33,7 @@ class FileScanner(HTMLParser): words_with_usage = {} words = [] for text in self.texte: - words += re.split(r'[ /\-_#\n]', text) + words += re.split(r'[ /\-_#\n.?=]', text) print(f'\nFile {self.file.parent.name} contains {len(words)} words') title_words = set(self.file.parent.name.split('-')) for word in words: