Loading analyze_deepset_glove_counts.py 0 → 100644 +23 −0 Original line number Diff line number Diff line """Analyze the vocab counts and positions of the name lists within Deepset's German pretrained GloVe embeddings.""" import pandas as pd vocab_df = pd.read_csv("./data/deepset_german_glove_vocab.txt", sep=" ", header=None, names=["word", "count"]) names_df = pd.read_csv("./data/names_nationality.csv") names_df["deepset_glove_vocab_count"] = 0 names_df["deepset_glove_vocab_position"] = -1 for index, row in names_df.iterrows(): name = row['name'].lower() search_result = vocab_df.loc[vocab_df["word"] == name].head(1) if not search_result.empty: count_value = search_result["count"].iloc[0] index_value = search_result.index[0] names_df.at[index, "deepset_glove_vocab_count"] = count_value names_df.at[index, "deepset_glove_vocab_position"] = index_value names_df.to_csv("./data/names_nationality_deepset.csv", index=False) Loading
analyze_deepset_glove_counts.py 0 → 100644 +23 −0 Original line number Diff line number Diff line """Analyze the vocab counts and positions of the name lists within Deepset's German pretrained GloVe embeddings.""" import pandas as pd vocab_df = pd.read_csv("./data/deepset_german_glove_vocab.txt", sep=" ", header=None, names=["word", "count"]) names_df = pd.read_csv("./data/names_nationality.csv") names_df["deepset_glove_vocab_count"] = 0 names_df["deepset_glove_vocab_position"] = -1 for index, row in names_df.iterrows(): name = row['name'].lower() search_result = vocab_df.loc[vocab_df["word"] == name].head(1) if not search_result.empty: count_value = search_result["count"].iloc[0] index_value = search_result.index[0] names_df.at[index, "deepset_glove_vocab_count"] = count_value names_df.at[index, "deepset_glove_vocab_position"] = index_value names_df.to_csv("./data/names_nationality_deepset.csv", index=False)