From c9f435f0f589a55de1d95423c58286060794854f Mon Sep 17 00:00:00 2001 From: OneNewDev Date: Sat, 28 May 2022 15:24:19 +0200 Subject: [PATCH] Revert "don't include links anymore" This reverts commit fba3f789bb2bfc46ea8741aca5fcebe838eb8c50. --- tagger.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tagger.py b/tagger.py index 7c1db31..34d0da7 100644 --- a/tagger.py +++ b/tagger.py @@ -57,6 +57,14 @@ class FileScanner(HTMLParser): words_with_usage[word].score += score return sorted(words_with_usage.values(), key=lambda tag: tag.score, reverse=True) + def handle_starttag(self, tag, attrs): + if tag != "a": + return + for attr_name, attr_value in attrs: + if attr_name == "href": + self.texte.append(attr_value) + break + def handle_data(self, data): self.texte.append(data)