diff --git a/check_fasttext_vocab.py b/check_fasttext_vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..af5c9ab22d6cdb9de374b8cc4993269619703530 --- /dev/null +++ b/check_fasttext_vocab.py @@ -0,0 +1,17 @@ +"""Find out vocab counts and positions for names in +names lists, in German fasttext model.""" + +import fasttext +import pandas as pd + +df = pd.read_csv("../data/names_nationality_data.csv") +df["in_fasttext_vocab"] = 0 + +model = fasttext.load_model("cc.de.300.bin") + +for index, row in df.iterrows(): + name = row['name'] + if name in model.words: + df.at[index, "in_fasttext_vocab"] = 1 + +df.to_csv("../data/names_nationality_fasttext.csv", index=False) diff --git a/count_name_occurrences.py b/count_name_occurrences_wikipedia.py similarity index 100% rename from count_name_occurrences.py rename to count_name_occurrences_wikipedia.py