From 233d1c94bfe2a4066b0f2db17a39b45de7bfc507 Mon Sep 17 00:00:00 2001 From: OneNewDev Date: Sat, 28 May 2022 15:33:16 +0200 Subject: [PATCH] split url letters --- tagger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tagger.py b/tagger.py index 34d0da7..63ddabb 100644 --- a/tagger.py +++ b/tagger.py @@ -33,7 +33,7 @@ class FileScanner(HTMLParser): words_with_usage = {} words = [] for text in self.texte: - words += re.split(r'[ /\-_#\n]', text) + words += re.split(r'[ /\-_#\n.?=]', text) print(f'\nFile {self.file.parent.name} contains {len(words)} words') title_words = set(self.file.parent.name.split('-')) for word in words: