parent
0dc6fd60d5
commit
c9f435f0f5
|
@ -57,6 +57,14 @@ class FileScanner(HTMLParser):
|
||||||
words_with_usage[word].score += score
|
words_with_usage[word].score += score
|
||||||
return sorted(words_with_usage.values(), key=lambda tag: tag.score, reverse=True)
|
return sorted(words_with_usage.values(), key=lambda tag: tag.score, reverse=True)
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag != "a":
|
||||||
|
return
|
||||||
|
for attr_name, attr_value in attrs:
|
||||||
|
if attr_name == "href":
|
||||||
|
self.texte.append(attr_value)
|
||||||
|
break
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
self.texte.append(data)
|
self.texte.append(data)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue