Skip to content
Snippets Groups Projects
Commit f6c5a882 authored by perov's avatar perov
Browse files

add the average values for the metrics instead of one value for each word

parent b148336f
No related branches found
No related tags found
No related merge requests found
...@@ -16,6 +16,8 @@ def count_syllables(word): ...@@ -16,6 +16,8 @@ def count_syllables(word):
syllable_count -= 1 syllable_count -= 1
return max(syllable_count, 1) return max(syllable_count, 1)
class Compute_Metrics(object): class Compute_Metrics(object):
def __init__(self): def __init__(self):
...@@ -74,8 +76,9 @@ class Compute_Metrics(object): ...@@ -74,8 +76,9 @@ class Compute_Metrics(object):
pmi = math.log2(p_w1_w2 / (p_w1 * p_w2)) pmi = math.log2(p_w1_w2 / (p_w1 * p_w2))
pmi_scores[word1, word2] = pmi pmi_scores[word1, word2] = pmi
sorted_pmi_scores = dict(sorted(pmi_scores.items(), key=lambda item: item[1], reverse=True)) # sorted_pmi_scores = dict(sorted(pmi_scores.items(), key=lambda item: item[1], reverse=True))
return sorted_pmi_scores avg_pmi = sum(pmi_scores.values()) / len(pmi_scores)
return avg_pmi
def compute_tfidf(self, new_text) -> dict[str, float]: def compute_tfidf(self, new_text) -> dict[str, float]:
corpus = copy.deepcopy(self.corpus_strings) corpus = copy.deepcopy(self.corpus_strings)
...@@ -91,12 +94,12 @@ class Compute_Metrics(object): ...@@ -91,12 +94,12 @@ class Compute_Metrics(object):
feature_indices = [i for i, word in enumerate(feature_names) if word in new_text_tokens] feature_indices = [i for i, word in enumerate(feature_names) if word in new_text_tokens]
tfidf_dict = {feature_names[i]: tfidf_scores[i] for i in feature_indices} tfidf_dict = {feature_names[i]: tfidf_scores[i] for i in feature_indices}
sorted_tfidf = dict(sorted(tfidf_dict.items(), key=lambda item: item[1], reverse=True)) # sorted_tfidf = dict(sorted(tfidf_dict.items(), key=lambda item: item[1], reverse=True))
avg_tfidf = sum(tfidf_dict.values()) / len(tfidf_dict)
return sorted_tfidf return avg_tfidf
def main(): def main():
test_text = """I have been dreaming of this day for years, when the sky is blue. But now I'm tired enough to see it coming up out there in my dreams as if they were some kind thing that was born from nothing but sunlight… test_text = """I have been dreaming of this day for years, when the sky is blue. But now I\'m tired enough to see it coming up out there in my dreams as if they were some kind thing that was born from nothing but sunlight… It\'s so bright here at night because you\'re awake right next door; just like before! The sun goes down on your back while every single other part has fallen into place by then – all over me with its own little circle around us."
""" """
Corpus = Compute_Metrics() Corpus = Compute_Metrics()
pmi = Corpus.compute_pmi(test_text) pmi = Corpus.compute_pmi(test_text)
......
...@@ -114,7 +114,7 @@ if __name__ == "__main__": ...@@ -114,7 +114,7 @@ if __name__ == "__main__":
# output_directory = r"C:\Users" # output_directory = r"C:\Users"
# write_file(output_directory, "poetry_newlines.txt", poetry_text[0]) # write_file(output_directory, "poetry_newlines", poetry_text[0])
# write_file(output_directory, "wiki.txt", wiki_text) # write_file(output_directory, "wiki", wiki_text)
# write_file(output_directory, "sport_bbc.txt", sports_text) # write_file(output_directory, "sport_bbc", sports_text)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment