From 0dc6fd60d5f8cec41b163d9df54344c35b429ee4 Mon Sep 17 00:00:00 2001 From: OneNewDev Date: Sat, 28 May 2022 15:19:00 +0200 Subject: [PATCH] change handling of words with less than 3 characters --- tagger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tagger.py b/tagger.py index 78586a7..7c1db31 100644 --- a/tagger.py +++ b/tagger.py @@ -46,8 +46,9 @@ class FileScanner(HTMLParser): continue if word in title_words: score *= 4 - if len(word) <= 3: - score //= 2 + word_length = len(word) + if word_length <= 3: + score = int(score * word_length / 4) upper_letters_count = len(UPPER_CHECK.findall(tag_name)) score += upper_letters_count * 5 if word not in words_with_usage: