feat: function which returns top three most used word in a text
This commit is contained in:
parent
cc263c4088
commit
1cab2e22fb
@ -1,2 +1,50 @@
|
|||||||
def top_3_words(text):
|
def top_3_words(text: str) -> list:
|
||||||
return None
|
"""Given a string of text (possibly with punctuation and line-breaks),
|
||||||
|
returns an array of the top-3 most occurring words, in descending order
|
||||||
|
of the number of occurrences.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): Some text
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: [most used word, second most used word, third most used word]
|
||||||
|
"""
|
||||||
|
text = "".join(
|
||||||
|
c if c.isalpha() or c.isdigit() or c in ("'", " ") else " " for c in text
|
||||||
|
) # remove invalid characters and replace with whitespace
|
||||||
|
text = text.split()
|
||||||
|
|
||||||
|
word_count = {}
|
||||||
|
|
||||||
|
for word in text:
|
||||||
|
if word in word_count:
|
||||||
|
word_count[word] += 1
|
||||||
|
else:
|
||||||
|
word_count[word] = 1
|
||||||
|
|
||||||
|
top_3 = {
|
||||||
|
"first": {"word": None, "amount": 0},
|
||||||
|
"second": {"word": None, "amount": 0},
|
||||||
|
"third": {"word": None, "amount": 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
for word in word_count:
|
||||||
|
if word_count[word] > top_3["first"]["amount"]:
|
||||||
|
top_3["first"]["word"] = word
|
||||||
|
top_3["first"]["amount"] = word_count[word]
|
||||||
|
elif word_count[word] > top_3["second"]["amount"]:
|
||||||
|
top_3["second"]["word"] = word
|
||||||
|
top_3["second"]["amount"] = word_count[word]
|
||||||
|
elif word_count[word] > top_3["third"]["amount"]:
|
||||||
|
top_3["third"]["word"] = word
|
||||||
|
top_3["third"]["amount"] = word_count[word]
|
||||||
|
|
||||||
|
return [
|
||||||
|
x
|
||||||
|
for x in [
|
||||||
|
top_3["first"]["word"],
|
||||||
|
top_3["second"]["word"],
|
||||||
|
top_3["third"]["word"],
|
||||||
|
]
|
||||||
|
if x is not None
|
||||||
|
]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user