diff --git a/tagger.py b/tagger.py index 7c1db31..34d0da7 100644 --- a/tagger.py +++ b/tagger.py @@ -57,6 +57,14 @@ class FileScanner(HTMLParser): words_with_usage[word].score += score return sorted(words_with_usage.values(), key=lambda tag: tag.score, reverse=True) + def handle_starttag(self, tag, attrs): + if tag != "a": + return + for attr_name, attr_value in attrs: + if attr_name == "href": + self.texte.append(attr_value) + break + def handle_data(self, data): self.texte.append(data)