feat: function which returns top three most used word in a text
This commit is contained in:
parent
cc263c4088
commit
1cab2e22fb
@ -1,2 +1,50 @@
|
||||
def top_3_words(text):
|
||||
return None
|
||||
def top_3_words(text: str) -> list:
|
||||
"""Given a string of text (possibly with punctuation and line-breaks),
|
||||
returns an array of the top-3 most occurring words, in descending order
|
||||
of the number of occurrences.
|
||||
|
||||
Args:
|
||||
text (str): Some text
|
||||
|
||||
Returns:
|
||||
list: [most used word, second most used word, third most used word]
|
||||
"""
|
||||
text = "".join(
|
||||
c if c.isalpha() or c.isdigit() or c in ("'", " ") else " " for c in text
|
||||
) # remove invalid characters and replace with whitespace
|
||||
text = text.split()
|
||||
|
||||
word_count = {}
|
||||
|
||||
for word in text:
|
||||
if word in word_count:
|
||||
word_count[word] += 1
|
||||
else:
|
||||
word_count[word] = 1
|
||||
|
||||
top_3 = {
|
||||
"first": {"word": None, "amount": 0},
|
||||
"second": {"word": None, "amount": 0},
|
||||
"third": {"word": None, "amount": 0},
|
||||
}
|
||||
|
||||
for word in word_count:
|
||||
if word_count[word] > top_3["first"]["amount"]:
|
||||
top_3["first"]["word"] = word
|
||||
top_3["first"]["amount"] = word_count[word]
|
||||
elif word_count[word] > top_3["second"]["amount"]:
|
||||
top_3["second"]["word"] = word
|
||||
top_3["second"]["amount"] = word_count[word]
|
||||
elif word_count[word] > top_3["third"]["amount"]:
|
||||
top_3["third"]["word"] = word
|
||||
top_3["third"]["amount"] = word_count[word]
|
||||
|
||||
return [
|
||||
x
|
||||
for x in [
|
||||
top_3["first"]["word"],
|
||||
top_3["second"]["word"],
|
||||
top_3["third"]["word"],
|
||||
]
|
||||
if x is not None
|
||||
]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user