diff --git a/src/automatic_metrics.py b/src/automatic_metrics.py index 42d240f2e2db8714869604d402f9cc3cbd6f399f..096eacd566602b15ce2ff9d598757d13ed106036 100644 --- a/src/automatic_metrics.py +++ b/src/automatic_metrics.py @@ -16,6 +16,8 @@ def count_syllables(word): syllable_count -= 1 return max(syllable_count, 1) + + class Compute_Metrics(object): def __init__(self): @@ -74,8 +76,9 @@ class Compute_Metrics(object): pmi = math.log2(p_w1_w2 / (p_w1 * p_w2)) pmi_scores[word1, word2] = pmi - sorted_pmi_scores = dict(sorted(pmi_scores.items(), key=lambda item: item[1], reverse=True)) - return sorted_pmi_scores + # sorted_pmi_scores = dict(sorted(pmi_scores.items(), key=lambda item: item[1], reverse=True)) + avg_pmi = sum(pmi_scores.values()) / len(pmi_scores) + return avg_pmi def compute_tfidf(self, new_text) -> dict[str, float]: corpus = copy.deepcopy(self.corpus_strings) @@ -91,12 +94,12 @@ class Compute_Metrics(object): feature_indices = [i for i, word in enumerate(feature_names) if word in new_text_tokens] tfidf_dict = {feature_names[i]: tfidf_scores[i] for i in feature_indices} - sorted_tfidf = dict(sorted(tfidf_dict.items(), key=lambda item: item[1], reverse=True)) - - return sorted_tfidf + # sorted_tfidf = dict(sorted(tfidf_dict.items(), key=lambda item: item[1], reverse=True)) + avg_tfidf = sum(tfidf_dict.values()) / len(tfidf_dict) + return avg_tfidf def main(): - test_text = """I have been dreaming of this day for years, when the sky is blue. But now I'm tired enough to see it coming up out there in my dreams as if they were some kind thing that was born from nothing but sunlight… + test_text = """I have been dreaming of this day for years, when the sky is blue. But now I\'m tired enough to see it coming up out there in my dreams as if they were some kind thing that was born from nothing but sunlight… It\'s so bright here at night because you\'re awake right next door; just like before! The sun goes down on your back while every single other part has fallen into place by then – all over me with its own little circle around us." """ Corpus = Compute_Metrics() pmi = Corpus.compute_pmi(test_text) diff --git a/src/text_extraction.py b/src/text_extraction.py index 189900817d88ca463292c703a554ccd6e87f9958..19073710d689b2f2386a2e3bdf8f535d89bf9244 100644 --- a/src/text_extraction.py +++ b/src/text_extraction.py @@ -114,7 +114,7 @@ if __name__ == "__main__": # output_directory = r"C:\Users" - # write_file(output_directory, "poetry_newlines.txt", poetry_text[0]) - # write_file(output_directory, "wiki.txt", wiki_text) - # write_file(output_directory, "sport_bbc.txt", sports_text) + # write_file(output_directory, "poetry_newlines", poetry_text[0]) + # write_file(output_directory, "wiki", wiki_text) + # write_file(output_directory, "sport_bbc", sports_text)