diff --git a/src/codewars/kata_most_words_used.py b/src/codewars/kata_most_words_used.py index 96e2f9c..8e93bf5 100644 --- a/src/codewars/kata_most_words_used.py +++ b/src/codewars/kata_most_words_used.py @@ -9,14 +9,19 @@ def top_3_words(text: str) -> list: Returns: list: [most used word, second most used word, third most used word] """ + # remove invalid characters and replace with whitespace text = "".join( c if c.isalpha() or c.isdigit() or c in ("'", " ") else " " for c in text - ) # remove invalid characters and replace with whitespace + ) text = text.split() + # remove word if it's just made of the character ' + text = [w for w in text if w.strip("' ") != ""] + word_count = {} for word in text: + word = word.lower() if word in word_count: word_count[word] += 1 else: @@ -30,9 +35,18 @@ def top_3_words(text: str) -> list: for word in word_count: if word_count[word] > top_3["first"]["amount"]: + top_3["third"]["word"] = top_3["second"]["word"] + top_3["third"]["amount"] = top_3["second"]["amount"] + + top_3["second"]["word"] = top_3["first"]["word"] + top_3["second"]["amount"] = top_3["first"]["amount"] + top_3["first"]["word"] = word top_3["first"]["amount"] = word_count[word] elif word_count[word] > top_3["second"]["amount"]: + top_3["third"]["word"] = top_3["second"]["word"] + top_3["third"]["amount"] = top_3["second"]["amount"] + top_3["second"]["word"] = word top_3["second"]["amount"] = word_count[word] elif word_count[word] > top_3["third"]["amount"]: diff --git a/tests/tests_codewars/test_most_words_used.py b/tests/tests_codewars/test_most_words_used.py index 24fb0d3..c3b567f 100644 --- a/tests/tests_codewars/test_most_words_used.py +++ b/tests/tests_codewars/test_most_words_used.py @@ -1,5 +1,27 @@ from src.codewars.kata_most_words_used import top_3_words +import pytest -def test_top_3_words(): - assert top_3_words() == 0 +@pytest.mark.parametrize( + ("input", "expected"), + [ + ("a a a b c c d d d d e e e e e", ["e", "d", "a"]), + ("e e e e DDD ddd DdD: ddd ddd aa aA Aa, bb cc cC e e e", ["e", "ddd", "aa"]), + (" //wont won't won't ", ["won't", "wont"]), + (" , e .. ", ["e"]), + (" ... ", []), + (" ' ", []), + (" ''' ", []), + ( + """In a village of La Mancha, the name of which I have no desire to call to + mind, there lived not long since one of those gentlemen that keep a lance + in the lance-rack, an old buckler, a lean hack, and a greyhound for + coursing. An olla of rather more beef than mutton, a salad on most + nights, scraps on Saturdays, lentils on Fridays, and a pigeon or so extra + on Sundays, made away with three-quarters of his income.""", + ["a", "of", "on"], + ), + ], +) +def test_top_3_words(input, expected): + assert top_3_words(input) == expected