change handling of words with less than 3 characters
This commit is contained in:
parent
f09d42433f
commit
0dc6fd60d5
|
@ -46,8 +46,9 @@ class FileScanner(HTMLParser):
|
||||||
continue
|
continue
|
||||||
if word in title_words:
|
if word in title_words:
|
||||||
score *= 4
|
score *= 4
|
||||||
if len(word) <= 3:
|
word_length = len(word)
|
||||||
score //= 2
|
if word_length <= 3:
|
||||||
|
score = int(score * word_length / 4)
|
||||||
upper_letters_count = len(UPPER_CHECK.findall(tag_name))
|
upper_letters_count = len(UPPER_CHECK.findall(tag_name))
|
||||||
score += upper_letters_count * 5
|
score += upper_letters_count * 5
|
||||||
if word not in words_with_usage:
|
if word not in words_with_usage:
|
||||||
|
|
Loading…
Reference in a new issue