unquote url

2022-05-28 15:34:41 +02:00 · 2022-05-28 15:34:41 +02:00 · 819d6d51b9
parent 737634e740
commit 819d6d51b9
1 changed files with 2 additions and 0 deletions
--- a/tagger.py
+++ b/tagger.py
@ -2,6 +2,7 @@ import json
 import re
 from dataclasses import dataclass
 from html.parser import HTMLParser
 from urllib.parse import unquote_plus
 from pathlib import Path
 from exclude import EXCLUDED_WORDS
@ -33,6 +34,7 @@ class FileScanner(HTMLParser):
        words_with_usage = {}
        words = []
        for text in self.texte:
            text = unquote_plus(text)
            words += re.split(r'[ /\-_#\n.?=]', text)
        print(f'\nFile {self.file.parent.name} contains {len(words)} words')
        title_words = set(self.file.parent.name.split('-'))