from pathlib import Path import re def scan_file(file: Path): content = read_file(file) words_with_usage = {} words = re.split(' /-_', content) print(f'File {file.parent.name} contains {len(words)} words') title_words = set(file.name.split('-')) for word in words: word = word.strip(" .,:;-_!\"'<>") score = 10 if word in title_words: score *= 4 if len(word) <= 3: score //= 2 if word in words_with_usage: words_with_usage[word] += score else: words_with_usage[word] = score sorted_list = sorted(words_with_usage.items(), key=lambda item: item[1], reverse=True) display_result(sorted_list) def display_result(result): for word, usage in result: if usage > 1: print(f"Score: {usage:>3} Word: {word}") def read_file(file: Path) -> str: with open(file, 'r') as file: return file.read() def main(source=Path('data')): for file in source.glob('**/index.txt'): scan_file(file) break if __name__ == '__main__': main()