Tagger/tagger.py
2022-05-05 18:01:10 +02:00

45 lines
1.1 KiB
Python

from pathlib import Path
import re
def scan_file(file: Path):
content = read_file(file)
words_with_usage = {}
words = re.split(' /-_', content)
print(f'File {file.parent.name} contains {len(words)} words')
title_words = set(file.name.split('-'))
for word in words:
word = word.strip(" .,:;-_!\"'<>")
score = 10
if word in title_words:
score *= 4
if len(word) <= 3:
score //= 2
if word in words_with_usage:
words_with_usage[word] += score
else:
words_with_usage[word] = score
sorted_list = sorted(words_with_usage.items(), key=lambda item: item[1], reverse=True)
display_result(sorted_list)
def display_result(result):
for word, usage in result:
if usage > 1:
print(f"Score: {usage:>3} Word: {word}")
def read_file(file: Path) -> str:
with open(file, 'r') as file:
return file.read()
def main(source=Path('data')):
for file in source.glob('**/index.txt'):
scan_file(file)
break
if __name__ == '__main__':
main()