change split and strip characters
This commit is contained in:
parent
9eda3b29f8
commit
20666e8e4c
1 changed files with 2 additions and 2 deletions
|
@ -44,7 +44,7 @@ class FileScanner(HTMLParser):
|
|||
words = []
|
||||
for text in self.texte:
|
||||
# Textteile in einzelne Wörter aufteilen
|
||||
words += re.split(r'[ \n\-_/]', text)
|
||||
words += re.split(r'[ \n/]', text)
|
||||
# Die Anzahl, der Wörter in der aktuellen Datei, auf der Konsole ausgeben
|
||||
title = self.file.parent.name
|
||||
print(f'\nFile {title} contains {len(words)} words')
|
||||
|
@ -52,7 +52,7 @@ class FileScanner(HTMLParser):
|
|||
title_words = set(title.split('-'))
|
||||
for word in words:
|
||||
# Verschiedene Zeichen vom Anfang und Ende der Wörter entfernen.
|
||||
tag_name = word.strip(".,:;!\"'()«»")
|
||||
tag_name = word.strip(".,:;!?\"'()«»")
|
||||
# Leere Wörter ignorieren
|
||||
if not tag_name:
|
||||
continue
|
||||
|
|
Loading…
Reference in a new issue