change handling of words with less than 3 characters

This commit is contained in:
OneNewDev 2022-05-28 15:19:00 +02:00
parent f09d42433f
commit 0dc6fd60d5

View file

@ -46,8 +46,9 @@ class FileScanner(HTMLParser):
continue
if word in title_words:
score *= 4
if len(word) <= 3:
score //= 2
word_length = len(word)
if word_length <= 3:
score = int(score * word_length / 4)
upper_letters_count = len(UPPER_CHECK.findall(tag_name))
score += upper_letters_count * 5
if word not in words_with_usage: