Loading count_name_occurrences.py +3 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ occurr in the Wikipedia snapshot used for GloVe training.""" import pandas as pd df = pd.read_csv("../data/names_nationality.csv", usecols=["name", "nationality"]) df = pd.read_csv("../data/names_nationality.csv", usecols=["name", "nationality", "gender"]) with open("../data/wikipedia/wikipedia_corpus.txt", encoding="utf-8") as f: wikipedia_text = f.read() Loading @@ -15,6 +15,8 @@ for index, row in df.iterrows(): count = wikipedia_text.count(name) df.at[index, "occurrences_in_wikipedia"] = count df.to_csv("./data/names_nationality_wikipedia.csv", index=False) avg_occurrences = df.groupby("nationality")["occurrences_in_wikipedia"].mean() for nationality, avg_count in avg_occurrences.items(): print(f"Nationality: {nationality}, Average Occurrences in Wikipedia: {avg_count:.2f}") Loading
count_name_occurrences.py +3 −1 Original line number Diff line number Diff line Loading @@ -3,7 +3,7 @@ occurr in the Wikipedia snapshot used for GloVe training.""" import pandas as pd df = pd.read_csv("../data/names_nationality.csv", usecols=["name", "nationality"]) df = pd.read_csv("../data/names_nationality.csv", usecols=["name", "nationality", "gender"]) with open("../data/wikipedia/wikipedia_corpus.txt", encoding="utf-8") as f: wikipedia_text = f.read() Loading @@ -15,6 +15,8 @@ for index, row in df.iterrows(): count = wikipedia_text.count(name) df.at[index, "occurrences_in_wikipedia"] = count df.to_csv("./data/names_nationality_wikipedia.csv", index=False) avg_occurrences = df.groupby("nationality")["occurrences_in_wikipedia"].mean() for nationality, avg_count in avg_occurrences.items(): print(f"Nationality: {nationality}, Average Occurrences in Wikipedia: {avg_count:.2f}")