Loading bpemb_reimplementation.pydeleted 100644 → 0 +0 −0 Empty file deleted. byte_pair_encode.py 0 → 100644 +12 −0 Original line number Diff line number Diff line from bpemb import BPEmb bpemb_de = BPEmb(lang="de", dim=300, vs=200000) with open("/home/aileen/heiBOX/BA/bias-mitigation-ba/blindtext.txt", mode="r") as f: tokens = [] for line in f.readlines(): tokens.extend(bpemb_de.encode(line)) with open("/home/aileen/heiBOX/BA/bias-mitigation-ba/blindtext_tokenized.txt", mode="w") as f: for token in tokens: f.write(token + " ") Loading
byte_pair_encode.py 0 → 100644 +12 −0 Original line number Diff line number Diff line from bpemb import BPEmb bpemb_de = BPEmb(lang="de", dim=300, vs=200000) with open("/home/aileen/heiBOX/BA/bias-mitigation-ba/blindtext.txt", mode="r") as f: tokens = [] for line in f.readlines(): tokens.extend(bpemb_de.encode(line)) with open("/home/aileen/heiBOX/BA/bias-mitigation-ba/blindtext_tokenized.txt", mode="w") as f: for token in tokens: f.write(token + " ")