45 lines
1.1 KiB
Python
45 lines
1.1 KiB
Python
from pathlib import Path
|
|
import re
|
|
|
|
|
|
def scan_file(file: Path):
|
|
content = read_file(file)
|
|
|
|
words_with_usage = {}
|
|
words = re.split(' /-_', content)
|
|
print(f'File {file.parent.name} contains {len(words)} words')
|
|
title_words = set(file.name.split('-'))
|
|
for word in words:
|
|
word = word.strip(" .,:;-_!\"'<>")
|
|
score = 10
|
|
if word in title_words:
|
|
score *= 4
|
|
if len(word) <= 3:
|
|
score //= 2
|
|
if word in words_with_usage:
|
|
words_with_usage[word] += score
|
|
else:
|
|
words_with_usage[word] = score
|
|
sorted_list = sorted(words_with_usage.items(), key=lambda item: item[1], reverse=True)
|
|
display_result(sorted_list)
|
|
|
|
|
|
def display_result(result):
|
|
for word, usage in result:
|
|
if usage > 1:
|
|
print(f"Score: {usage:>3} Word: {word}")
|
|
|
|
|
|
def read_file(file: Path) -> str:
|
|
with open(file, 'r') as file:
|
|
return file.read()
|
|
|
|
|
|
def main(source=Path('data')):
|
|
for file in source.glob('**/index.txt'):
|
|
scan_file(file)
|
|
break
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|