diff --git a/debiaswe-master/.gitignore b/debiaswe-master/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..02724e2d3005f2e7e39b281aabc33cdd6f27ce9a
--- /dev/null
+++ b/debiaswe-master/.gitignore
@@ -0,0 +1,94 @@
+# PROJECT SPECIFIC
+
+
+# PYTHON RELATED
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
diff --git a/debiaswe-master/LICENSE b/debiaswe-master/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..963fdd22206b067eec644ef7b2f679aff9b13c18
--- /dev/null
+++ b/debiaswe-master/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Tolga
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/debiaswe-master/README.md b/debiaswe-master/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ce94c5fae6ef2805e551b8b93df669d021d0f7f5
--- /dev/null
+++ b/debiaswe-master/README.md
@@ -0,0 +1,37 @@
+# Debiaswe: try to make word embeddings less sexist
+
+🔴[FAT* 2018 tutorial slides](https://drive.google.com/file/d/1IxIdmreH4qVYnx68QVkqCC9-_yyksoxR/view?usp=sharing)
+
+
+Here we have the code and data for the following paper:
+[Man is to Computer Programmer as Woman is to
+Homemaker? Debiasing Word Embeddings](http://papers.nips.cc/paper/6228-man-is-to-computer-programmer-as-woman-is-to-homemaker-debiasing-word-embeddings.pdf) by 
+Tolga Bolukbasi, Kai-Wei Chang, James Zou, Venkatesh Saligrama, and Adam Kalai. Proceedings of [NIPS 2016](https://papers.nips.cc/paper/6228-man-is-to-computer-programmer-as-woman-is-to-homemaker-debiasing-word-embeddings).
+
+**Just looking to download a debiased embedding?**
+
+You can download [binary](https://drive.google.com/file/d/0B5vZVlu2WoS5ZTBSekpUX0RSNDg/view?usp=sharing&resourcekey=0-qO1UY06KB42G1T6IeJ2XCQ)/[txt](https://drive.google.com/file/d/1_PvT4ZvtZjhq4HPywA8-u06epht9ccOw/view?usp=sharing) hard debiased version of the Google's Word2Vec embedding trained on Google News (Origin: GoogleNews-vectors-negative300.bin.gz found [here](https://code.google.com/archive/p/word2vec/)).
+
+**Python scripts:**
+- **learn_gender_specific.py**: given a word embedding and a seed set of gender-specific words (like <i>king</i>, <i>she</i>, etc.), it learns a much larger list of gender-specific words
+- **debias.py**: given a word embedding, sets of gender-pairs, gender-specific words, and pairs to equalize, it outputs a new word embedding. This version basically reads/writes word2vec binary file format.  
+
+```
+python learn_gender_specific.py ../embeddings/GoogleNews-vectors-negative300.bin 50000 ../data/gender_specific_seed.json gender_specific_full.json
+```
+
+```
+python debias.py ../embeddings/GoogleNews-vectors-negative300.bin ../data/definitional_pairs.json ../data/gender_specific_full.json ../data/equalize_pairs.json ../embeddings/GoogleNews-vectors-negative300-hard-debiased.bin
+```
+
+
+We also have seed data used to debias and crowd data used to evaluate the embeddings.
+
+**Data files:**
+- **gender_specific_seed.json**: A list of 218 gender-specific words
+- **gender_specific_full.json**: A list of 1441 gender-specific words
+- **definitional_pairs.json**: The ten pairs of words we use to define the gender direction
+- **equalize_pairs.json**: Some crowdsourced F-M pairs of words that represent gender direction
+
+
+(All external files that I refer within this repo can be found in [this folder](https://drive.google.com/drive/folders/0B5vZVlu2WoS5dkRFY19YUXVIU2M?resourcekey=0-rZ1HR4Fb0XCi4HFUERGhRA&usp=sharing).)
diff --git a/debiaswe-master/data/general_origin/bias_specific_full.json b/debiaswe-master/data/general_origin/bias_specific_full.json
new file mode 100644
index 0000000000000000000000000000000000000000..21d96db29b05499cc36ff495716a8e4d22f32f53
--- /dev/null
+++ b/debiaswe-master/data/general_origin/bias_specific_full.json
@@ -0,0 +1 @@
+["germane", "ostgeld", "focaccia", "ostalgie", "volksgenosse", "hetman", "auslandsdeutscher", "sinto", "lech", "auslandsgeschäft", "bambino", "reichsbahn", "engadin", "schilling", "grundgesetz", "prosecco", "kleindeutsch", "aventiure", "europide", "flüchtlingsausweis", "weser", "völkerwanderung", "azzurri", "landammann", "trecento", "deutschfeindlichkeit", "polnisch", "baron", "mitteldeutsch", "bundesminister", "germanisch", "itaker", "groschen", "quempaslied", "flüchtlingshilfe", "baden-württemberg",  "thai", "zuwanderin", "edeling", "italienisch", "ausländerhass", "confoederatio helvetica", "germanisieren", "vaudeville", "italowestern", "mittelhochdeutsch", "schwarz-rot-gold", "westmitteldeutsch", "tamtam", "janitscharenmusik", "öterreichisch-ungarisch", "weichsel",  "germanentum", "jungdeutscher", "plattdeutsch", "grappa", "exarch",  "abate", "carabiniere", "bairisch", "alldeutsch", "quart", "sultan", "ramasan", "liechtenstein", "sachsen-anhalt", "settecento", "greyerzer", "reichsdeutsch", "urdeutsch", "bundesrepublikanisch", "thurgau", "germanist", "labiovelar", "kampanile", "ostmitteldeutsch", "frühneuhochdeutsch", "ostverträge", "geschäftsträger", "hochlautung", "reinheitsgebot", "wallis", "signore",  "brandenburg", "nazarener", "sbrinz", "carnotzet", "verhochdeutschen", "cinquecento", "großglockner", "aargau", "einwanderungsstrom", "beg", "kastell", "asylsuchende", "panje",  "spartakiade", "veltliner", "pizzaservice", "wittum", "helvetien", "nibelungen", "papagallo", "amerikahaus", "romand", "ausländisch", "boatpeople", "neudeutsch", "zibetkatze", "rom", "bundesstadt", "schweizweit", "mora", "signor", "chianti", "bundesstraße", "asylsuchender", "indogermane", "kenning", "sejm", "stadtammann", "wessi", "normanne", "ostdeutschland", "volksdeutscher", "rugier", "sütterlinschrift", "secondo", "gambir", "hochdeutsch", "bel paese", "frühneuhochdeutsch", "bundesliga", "devisenbewirtschaftung", "signorina", "zweigelt", "toskana", "auswärtig", "bundesadler", "deutschtum", "khan",  "hodscha", "hinterindien", "franken", "deutschlandchef", "helvetier", "afroasiatisch", "bundesbürger", "eindeutschung", "oberdeutsch", "deutschtümelei", "deutschsprachlich", "radicchio", "missingsch", "ostblock", "himalaja", "puzzolan", "taverne", "sundainseln", "außerdeutsch", "signoria", "cinzano", "wirtschaftsflüchtling", "markomanne", "austrofaschismus", "swissair", "ddr-bürger", "deutsche", "migrationshintergrund", "asylbewerberin", "deutschamerikanisch", "levante", "piefke", "padre", "theatiner", "westdeutschland", "zimber", "stasimitarbeiter", "deutschnational", "karawane", "norddeutsch", "russland", "spumante", "kosovokrieg", "italienisch", "italienerin", "mittelhochdeutsch", "bundeskanzlei", "gastarbeiter", "freiheitlich", "deutsch-schweizerisch", "italienreise", "flüchtlingselend", "ostler", "landesstraße", "sachsen", "verfassungsgerichtshof", "trentino-südtirol", "freisinn", "sprachgesellschaft", "ausländerfeindlich", "kurfürst", "deutschtürke", "schwäbeln", "kirchenstaat", "kebab", "staatsgerichtshof", "oberlandesgericht", "deutschkunde", "ostmitteldeutsch", "bergama", "cajunmusic", "spieloper", "rheinfall", "auffangen", "auslieferungsantrag", "spatha", "ausführen", "magnat", "polonistik", "undeutsch", "westfernsehen", "uri", "indoeuropäisch", "donnerer", "odal", "indogermanisch", "schufa", "dw", "po", "schleswig-holstein", "unter", "humanismus", "piazza", "ausländerkind", "westen", "kentumsprache", "reisefreiheit", "tessin", "greencard", "stasiakte", "ard", "einwandererkind", "pirogge", "lambrusco", "zimbal", "cavaliere", "mark", "raki", "aussiedler", "flüchtlingslager", "ufa", "deutschschweizerisch", "jot", "gorgonzola", "kanake", "u-häkchen", "fichtelberg", "südtirol", "asti", "burgunde", "ost-west-dialog", "centime", "tiber", "jul", "hartz", "quattrocento", "alphorn", "bundesverdienstkreuz", "pasta", "madrigal", "kelte", "deutschschweizer", "displaced person", "zentralasien", "handballbundesliga", "rentenmark", "althochdeutsch", "fremdenpolizei", "tarantella", "frühmittelhochdeutsch", "sibirien", "urgermanisch", "migrantenkind", "schwyzertütsch", "arte povera", "gastarbeiterin", "weißherbst", "bundespräsident", "öterreichweit", "ural", "pidginenglisch", "uckermark", "immigrantin", "ger", "scudo", "spätaussiedler", "lastenausgleichsgesetz", "kulturinstitut", "welschschweizer", "menschenhandel", "austriazismus", "bundeshaus", "italer", "mitternachtssonne", "deutschlandfunk", "welsche", "latsche", "weißbuch", "hilfswillige", "schweizerdeutsch", "kufe", "kolonialherrschaft", "bundesdeutsche", "mitteldeutschland", "stracciatella", "frühmittelhochdeutsch", "vielfraß", "bundesdeutscher", "kaspisches meer", "kanton", "deutschtürkisch", "pfingstochse", "teutonin", "dihk", "amerikadeutsche", "stasiunterlagen", "deutsch", "ausländerpolitik", "niedersachsen", "ausfuhrgarantie", "harz", "lateinisch", "palazzo", "futhark", "schriftdeutsch", "wien", "pan", "weißwurstäquator", "pazifischer ozean", "basso", "welschland", "stabreim", "bundesministerium", "quarta", "schwyzerdütsch", "lasagne", "aga", "karelien", "polnisch", "russlanddeutsch", "kurrentschrift", "canzone", "oberdeutsch", "härtefallkommission", "nachkriegsschweiz", "levantiner", "faschismus", "pole", "angelsachse", "ararat", "reichstag", "verismo", "börde", "paying guest", "balsamessig", "schwabenspiegel", "westmitteldeutsch", "landesversicherungsanstalt", "illyrer", "pagode", "treuhandanstalt", "doktorhut", "fußballbundesliga", "italoamerikaner", "kalabrien", "arbeitsemigrant", "deutschlehrer", "arier", "bajazzo", "kabinett", "lufthansa", "mikrozensus", "verrechnungseinheit", "hanswurst", "sezession", "schlepper", "aufenthaltsgenehmigung", "deutschjüdisch", "einwandererstrom", "außenwert", "misereor", "bundesgartenschau", "bezirkstag", "alpenrepublik", "zwangsumtausch", "auslandsdeutsch", "teehaus", "panasiatisch", "einwanderin", "westdeutscher", "duce", "konsularkorps", "italianisieren", "siamkatze", "auslese", "isolationismus", "expedition", "zav", "einreisen", "turkisieren", "ostöterreich", "anwerbestopp", "waadt", "ausländerin", "franzöisch-deutsch", "vorderasien", "administrator", "stradivari", "welschschweizerisch", "ostgermane", "kolonie", "bundesbetreuung", "gefolgschaft", "bundeshaushalt", "sizilien", "vendetta", "botschafter", "hermesbürgschaft", "nidwalden", "zahlungsbilanz", "apo", "generaloberst", "altnordisch", "jura", "ostasien", "pandschabi", "volksdeutsche", "einwanderer", "saarland", "effendi", "deutschlandlied", "intershop", "eisheiligen", "ch-laut", "bundschuh", "landeshauptmann", "cherusker", "migrantin", "deutsch gesinnt", "dolma", "pecorino", "nordrhein-westfalen", "inländerin", "obwalden", "schrumpfgermane", "osten", "eindeutschen", "amerikadeutscher", "thing", "ciabatta", "hamburg", "schweizergarde", "welscher", "parmesan", "altdeutsch", "mazurka", "böhmerwald", "sowjetzone", "westdeutsche", "berlin", "deutsch", "rhododendron", "fra", "hispano", "deutschsprachig", "osmane", "immigrant", "bundespolitiker", "ubier", "hilfswilliger", "wechselkurs", "marchese", "apulien", "reisescheck", "bergamotte", "defa", "sonata", "zentralschweiz", "apennin", "dax", "ostdeutsche", "bremen", "konsistorium", "deutschfreundlichkeit", "honved", "padrone", "schweizer", "kawass", "departement", "frikadelle", "großdeutsch",  "verdeutschung", "jiddisch", "neubürger", "trattoria",  "panettone", "austromarxismus", "metamusik", "ddr-bürgerin", "boreal", "nordgermane", "notaufnahme", "antipasto", "drk", "catenaccio", "hesperien", "pannacotta", "schweizerin", "moxibustion", "allgäu", "schriftdeutsch", "welschschweiz", "bundesgebiet", "auslandsdeutsche", "eurasier", "schakal", "jass", "bundesrat", "warenumsatzsteuer", "deutscher", "swiss", "westschweiz", "trakehner", "gote", "fürstentag", "autarkie", "flühtlingsstrom", "landesgartenschau", "futurismus", "ligurien", "bundesautobahn", "ku-klux-klan", "standarddeutsch", "kappadozien", "westdeutsch", "westlich", "innerschweiz", "steppenhuhn", "ösi", "orient", "achtundvierziger", "entsendegesetz", "hethiter", "deutsch-türkisch", "romanismus", "schweizerbürgerin", "daus", "franke", "senat", "bundesnachrichtendienst", "bundesbahn", "beamtendeutsch", "zuwandrer", "lombardei", "rittmeister", "lori", "alta moda", "standarddeutsch", "buntnessel", "belcanto", "deutschkenntnis", "piccolo", "tschibuk", "auffanglager", "elba", "arlecchino", "lira", "exilliteratur", "niederdeutsch", "bundesausbildungsförderungsgesetz", "ehrenspielführer", "durchgangslager", "apenninen-halbinsel", "cassata", "schwarz-weiß-rot", "deutschlandsender", "autark", "erzherzog", "eurokommunismus", "europider", "hennastrauch", "öterreichisch", "brd", "plateresk", "prignitz", "treck", "buch", "iberer", "pancetta", "lüneburger heide", "ostig", "fdp", "couvert", "asylbewerberheim", "quintal", "heldenlied", "asiatisch", "kandidat", "notlager", "ems", "bundestag", "hindukusch", "beitrittsgebiet", "türkisch", "güteraustausch", "importe", "mittelniederdeutsch", "mauerschütze", "bundeskanzleramt", "ß", "tagliatelle", "büffel", "ossi", "seconda", "zaubernuss", "ziehungsrecht", "brandgans", "katamaran", "feldgrau", "pizza", "afrodeutsch", "importhandel", "zloty", "italienische", "ostdeutsch", "anopheles", "betäubungsmittelgesetz", "kreuzer", "resident", "bundesdeutsch", "italianismus", "ötlich", "türkischstämmig", "welsch", "valuta", "schleichkatze", "fernamt", "südasien", "deutschlandpolitik", "germanin", "muchtar", "ostpolitik", "thüringen", "flüchtlingsrat", "brillenschlange", "met", "schabzieger", "piva", "krevette", "devise", "ausländerfeindlichkeit", "boccia", "konak", "alpenjäger", "prädikatswein", "preislied", "studienkolleg", "sudetenland", "chassidismus", "hemlocktanne", "baba", "novecento", "großdeutschland", "rheinland-pfalz", "lizenziat", "nachkriegsöterreich", "binnendeutsch", "geest", "billigflagge", "bundeswehr", "amischer", "getto", "kanzleideutsch", "moschustier", "neudeutsch", "polentum", "italienischsprachig", "kamtschatka", "vacherin", "fantasia", "volksgericht", "nationalratspräsident", "kontor", "scampi", "teutonisch", "plattdeutsch", "germanistik", "biedermeier", "certosa", "eurocityzug", "ausländer", "seele", "staatsrat", "bundeskabinett", "alitalia", "italien", "migrationspolitik", "verfassungsinitiative", "diplomatie", "neuhochdeutsch", "zwergkiefer", "marktamt", "dienstpragmatik", "deutschschweiz", "frascati", "kurrent", "türkisch", "fpö", "eurasien", "kemalismus", "landeskirche", "mittelmeerländer", "eidgenosse", "friedensfahrt", "renaissance", "rotwelsch", "hyäne", "italianist", "prälat", "pfalz", "fremdarbeiter", "quent", "spruch", "wandervogel", "hortensie", "türbe", "bundesgesetzblatt", "schwarzwald", "ausländeranteil", "hafenzoll", "integrationsbeauftragte", "mecklenburg-vorpommern", "ostdeutscher", "satemsprache", "mittelniederdeutsch", "botschaft", "maggiore", "schutztruppe", "ländle", "kreole", "hamam", "conte", "incoming", "ripuarisch", "lingua franca", "aare", "bundesversammlung", "bootsflühtling", "mitteldeutsch", "unteritalien", "althochdeutsch", "bigos", "ingwäonen", "schwarzes meer", "bundesanleihe", "fremde", "ober", "ausländeramt", "qualitätswein", "sardinien", "westler", "einigungsvertrag", "asean", "visconte", "don", "halbesel", "bundesbank", "gesandtschaft", "indogermanistik", "behördendeutsch", "notaufnahmelager", "ausländerbehörde", "josephinismus", "schwaben", "flühtlingspolitik", "rote-armee-fraktion", "schutzzoll", "katzelmacher", "deutschstämmig", "reichsdeutscher", "deutsch sprechend", "staatsminister", "präfekt", "deutschamerikaner", "asylgerichtshof", "glosse", "italianistisch", "alemanne", "legionär", "sammellager", "reichsdeutsche", "kapitalflucht", "ostschweiz", "germanien", "orientteppich", "landeshauptfrau", "romandie", "ultra", "oder-neiße-linie", "platt", "neuhochdeutsch", "staatssicherheitsdienst", "südeuropäisch", "deutschstämmige", "umweltflühtling", "ostzone", "mezzogiorno", "villanell", "frisör", "oberitalien", "süddeutsch", "treudeutsch", "bundesverfassungsgericht", "ischia", "mozzarella", "sudetendeutsch", "tramontana", "bayern", "einwandererfamilie", "sprachführer", "durchgangsverkehr", "arno", "rütlischwur", "volkskammer", "mad", "ns-staat", "volksmarine", "dienstleistungsverkehr", "expatriate", "gemeindeutsch", "österreicherin", "zonenrandgebiet", "amtssprache", "tifoso", "schweizerisch", "studienaufenthalt", "hansestadt", "hessen", "bure", "ostflüchtling", "flüchtlingstreck", "ristorante", "osteria", "teutonengrill", "assisen", "riviera", "kolonialherr", "wendezeit", "flüchtlingsheim", "bundesverwaltungsgericht", "diwan", "exequatur", "krautrock", "deutschstämmiger", "woiwod", "geniezeit", "anatolien", "bundessozialgericht", "freiburg"]
\ No newline at end of file
diff --git a/debiaswe-master/data/general_origin/bias_specific_seed.json b/debiaswe-master/data/general_origin/bias_specific_seed.json
new file mode 100644
index 0000000000000000000000000000000000000000..21d96db29b05499cc36ff495716a8e4d22f32f53
--- /dev/null
+++ b/debiaswe-master/data/general_origin/bias_specific_seed.json
@@ -0,0 +1 @@
+["germane", "ostgeld", "focaccia", "ostalgie", "volksgenosse", "hetman", "auslandsdeutscher", "sinto", "lech", "auslandsgeschäft", "bambino", "reichsbahn", "engadin", "schilling", "grundgesetz", "prosecco", "kleindeutsch", "aventiure", "europide", "flüchtlingsausweis", "weser", "völkerwanderung", "azzurri", "landammann", "trecento", "deutschfeindlichkeit", "polnisch", "baron", "mitteldeutsch", "bundesminister", "germanisch", "itaker", "groschen", "quempaslied", "flüchtlingshilfe", "baden-württemberg",  "thai", "zuwanderin", "edeling", "italienisch", "ausländerhass", "confoederatio helvetica", "germanisieren", "vaudeville", "italowestern", "mittelhochdeutsch", "schwarz-rot-gold", "westmitteldeutsch", "tamtam", "janitscharenmusik", "öterreichisch-ungarisch", "weichsel",  "germanentum", "jungdeutscher", "plattdeutsch", "grappa", "exarch",  "abate", "carabiniere", "bairisch", "alldeutsch", "quart", "sultan", "ramasan", "liechtenstein", "sachsen-anhalt", "settecento", "greyerzer", "reichsdeutsch", "urdeutsch", "bundesrepublikanisch", "thurgau", "germanist", "labiovelar", "kampanile", "ostmitteldeutsch", "frühneuhochdeutsch", "ostverträge", "geschäftsträger", "hochlautung", "reinheitsgebot", "wallis", "signore",  "brandenburg", "nazarener", "sbrinz", "carnotzet", "verhochdeutschen", "cinquecento", "großglockner", "aargau", "einwanderungsstrom", "beg", "kastell", "asylsuchende", "panje",  "spartakiade", "veltliner", "pizzaservice", "wittum", "helvetien", "nibelungen", "papagallo", "amerikahaus", "romand", "ausländisch", "boatpeople", "neudeutsch", "zibetkatze", "rom", "bundesstadt", "schweizweit", "mora", "signor", "chianti", "bundesstraße", "asylsuchender", "indogermane", "kenning", "sejm", "stadtammann", "wessi", "normanne", "ostdeutschland", "volksdeutscher", "rugier", "sütterlinschrift", "secondo", "gambir", "hochdeutsch", "bel paese", "frühneuhochdeutsch", "bundesliga", "devisenbewirtschaftung", "signorina", "zweigelt", "toskana", "auswärtig", "bundesadler", "deutschtum", "khan",  "hodscha", "hinterindien", "franken", "deutschlandchef", "helvetier", "afroasiatisch", "bundesbürger", "eindeutschung", "oberdeutsch", "deutschtümelei", "deutschsprachlich", "radicchio", "missingsch", "ostblock", "himalaja", "puzzolan", "taverne", "sundainseln", "außerdeutsch", "signoria", "cinzano", "wirtschaftsflüchtling", "markomanne", "austrofaschismus", "swissair", "ddr-bürger", "deutsche", "migrationshintergrund", "asylbewerberin", "deutschamerikanisch", "levante", "piefke", "padre", "theatiner", "westdeutschland", "zimber", "stasimitarbeiter", "deutschnational", "karawane", "norddeutsch", "russland", "spumante", "kosovokrieg", "italienisch", "italienerin", "mittelhochdeutsch", "bundeskanzlei", "gastarbeiter", "freiheitlich", "deutsch-schweizerisch", "italienreise", "flüchtlingselend", "ostler", "landesstraße", "sachsen", "verfassungsgerichtshof", "trentino-südtirol", "freisinn", "sprachgesellschaft", "ausländerfeindlich", "kurfürst", "deutschtürke", "schwäbeln", "kirchenstaat", "kebab", "staatsgerichtshof", "oberlandesgericht", "deutschkunde", "ostmitteldeutsch", "bergama", "cajunmusic", "spieloper", "rheinfall", "auffangen", "auslieferungsantrag", "spatha", "ausführen", "magnat", "polonistik", "undeutsch", "westfernsehen", "uri", "indoeuropäisch", "donnerer", "odal", "indogermanisch", "schufa", "dw", "po", "schleswig-holstein", "unter", "humanismus", "piazza", "ausländerkind", "westen", "kentumsprache", "reisefreiheit", "tessin", "greencard", "stasiakte", "ard", "einwandererkind", "pirogge", "lambrusco", "zimbal", "cavaliere", "mark", "raki", "aussiedler", "flüchtlingslager", "ufa", "deutschschweizerisch", "jot", "gorgonzola", "kanake", "u-häkchen", "fichtelberg", "südtirol", "asti", "burgunde", "ost-west-dialog", "centime", "tiber", "jul", "hartz", "quattrocento", "alphorn", "bundesverdienstkreuz", "pasta", "madrigal", "kelte", "deutschschweizer", "displaced person", "zentralasien", "handballbundesliga", "rentenmark", "althochdeutsch", "fremdenpolizei", "tarantella", "frühmittelhochdeutsch", "sibirien", "urgermanisch", "migrantenkind", "schwyzertütsch", "arte povera", "gastarbeiterin", "weißherbst", "bundespräsident", "öterreichweit", "ural", "pidginenglisch", "uckermark", "immigrantin", "ger", "scudo", "spätaussiedler", "lastenausgleichsgesetz", "kulturinstitut", "welschschweizer", "menschenhandel", "austriazismus", "bundeshaus", "italer", "mitternachtssonne", "deutschlandfunk", "welsche", "latsche", "weißbuch", "hilfswillige", "schweizerdeutsch", "kufe", "kolonialherrschaft", "bundesdeutsche", "mitteldeutschland", "stracciatella", "frühmittelhochdeutsch", "vielfraß", "bundesdeutscher", "kaspisches meer", "kanton", "deutschtürkisch", "pfingstochse", "teutonin", "dihk", "amerikadeutsche", "stasiunterlagen", "deutsch", "ausländerpolitik", "niedersachsen", "ausfuhrgarantie", "harz", "lateinisch", "palazzo", "futhark", "schriftdeutsch", "wien", "pan", "weißwurstäquator", "pazifischer ozean", "basso", "welschland", "stabreim", "bundesministerium", "quarta", "schwyzerdütsch", "lasagne", "aga", "karelien", "polnisch", "russlanddeutsch", "kurrentschrift", "canzone", "oberdeutsch", "härtefallkommission", "nachkriegsschweiz", "levantiner", "faschismus", "pole", "angelsachse", "ararat", "reichstag", "verismo", "börde", "paying guest", "balsamessig", "schwabenspiegel", "westmitteldeutsch", "landesversicherungsanstalt", "illyrer", "pagode", "treuhandanstalt", "doktorhut", "fußballbundesliga", "italoamerikaner", "kalabrien", "arbeitsemigrant", "deutschlehrer", "arier", "bajazzo", "kabinett", "lufthansa", "mikrozensus", "verrechnungseinheit", "hanswurst", "sezession", "schlepper", "aufenthaltsgenehmigung", "deutschjüdisch", "einwandererstrom", "außenwert", "misereor", "bundesgartenschau", "bezirkstag", "alpenrepublik", "zwangsumtausch", "auslandsdeutsch", "teehaus", "panasiatisch", "einwanderin", "westdeutscher", "duce", "konsularkorps", "italianisieren", "siamkatze", "auslese", "isolationismus", "expedition", "zav", "einreisen", "turkisieren", "ostöterreich", "anwerbestopp", "waadt", "ausländerin", "franzöisch-deutsch", "vorderasien", "administrator", "stradivari", "welschschweizerisch", "ostgermane", "kolonie", "bundesbetreuung", "gefolgschaft", "bundeshaushalt", "sizilien", "vendetta", "botschafter", "hermesbürgschaft", "nidwalden", "zahlungsbilanz", "apo", "generaloberst", "altnordisch", "jura", "ostasien", "pandschabi", "volksdeutsche", "einwanderer", "saarland", "effendi", "deutschlandlied", "intershop", "eisheiligen", "ch-laut", "bundschuh", "landeshauptmann", "cherusker", "migrantin", "deutsch gesinnt", "dolma", "pecorino", "nordrhein-westfalen", "inländerin", "obwalden", "schrumpfgermane", "osten", "eindeutschen", "amerikadeutscher", "thing", "ciabatta", "hamburg", "schweizergarde", "welscher", "parmesan", "altdeutsch", "mazurka", "böhmerwald", "sowjetzone", "westdeutsche", "berlin", "deutsch", "rhododendron", "fra", "hispano", "deutschsprachig", "osmane", "immigrant", "bundespolitiker", "ubier", "hilfswilliger", "wechselkurs", "marchese", "apulien", "reisescheck", "bergamotte", "defa", "sonata", "zentralschweiz", "apennin", "dax", "ostdeutsche", "bremen", "konsistorium", "deutschfreundlichkeit", "honved", "padrone", "schweizer", "kawass", "departement", "frikadelle", "großdeutsch",  "verdeutschung", "jiddisch", "neubürger", "trattoria",  "panettone", "austromarxismus", "metamusik", "ddr-bürgerin", "boreal", "nordgermane", "notaufnahme", "antipasto", "drk", "catenaccio", "hesperien", "pannacotta", "schweizerin", "moxibustion", "allgäu", "schriftdeutsch", "welschschweiz", "bundesgebiet", "auslandsdeutsche", "eurasier", "schakal", "jass", "bundesrat", "warenumsatzsteuer", "deutscher", "swiss", "westschweiz", "trakehner", "gote", "fürstentag", "autarkie", "flühtlingsstrom", "landesgartenschau", "futurismus", "ligurien", "bundesautobahn", "ku-klux-klan", "standarddeutsch", "kappadozien", "westdeutsch", "westlich", "innerschweiz", "steppenhuhn", "ösi", "orient", "achtundvierziger", "entsendegesetz", "hethiter", "deutsch-türkisch", "romanismus", "schweizerbürgerin", "daus", "franke", "senat", "bundesnachrichtendienst", "bundesbahn", "beamtendeutsch", "zuwandrer", "lombardei", "rittmeister", "lori", "alta moda", "standarddeutsch", "buntnessel", "belcanto", "deutschkenntnis", "piccolo", "tschibuk", "auffanglager", "elba", "arlecchino", "lira", "exilliteratur", "niederdeutsch", "bundesausbildungsförderungsgesetz", "ehrenspielführer", "durchgangslager", "apenninen-halbinsel", "cassata", "schwarz-weiß-rot", "deutschlandsender", "autark", "erzherzog", "eurokommunismus", "europider", "hennastrauch", "öterreichisch", "brd", "plateresk", "prignitz", "treck", "buch", "iberer", "pancetta", "lüneburger heide", "ostig", "fdp", "couvert", "asylbewerberheim", "quintal", "heldenlied", "asiatisch", "kandidat", "notlager", "ems", "bundestag", "hindukusch", "beitrittsgebiet", "türkisch", "güteraustausch", "importe", "mittelniederdeutsch", "mauerschütze", "bundeskanzleramt", "ß", "tagliatelle", "büffel", "ossi", "seconda", "zaubernuss", "ziehungsrecht", "brandgans", "katamaran", "feldgrau", "pizza", "afrodeutsch", "importhandel", "zloty", "italienische", "ostdeutsch", "anopheles", "betäubungsmittelgesetz", "kreuzer", "resident", "bundesdeutsch", "italianismus", "ötlich", "türkischstämmig", "welsch", "valuta", "schleichkatze", "fernamt", "südasien", "deutschlandpolitik", "germanin", "muchtar", "ostpolitik", "thüringen", "flüchtlingsrat", "brillenschlange", "met", "schabzieger", "piva", "krevette", "devise", "ausländerfeindlichkeit", "boccia", "konak", "alpenjäger", "prädikatswein", "preislied", "studienkolleg", "sudetenland", "chassidismus", "hemlocktanne", "baba", "novecento", "großdeutschland", "rheinland-pfalz", "lizenziat", "nachkriegsöterreich", "binnendeutsch", "geest", "billigflagge", "bundeswehr", "amischer", "getto", "kanzleideutsch", "moschustier", "neudeutsch", "polentum", "italienischsprachig", "kamtschatka", "vacherin", "fantasia", "volksgericht", "nationalratspräsident", "kontor", "scampi", "teutonisch", "plattdeutsch", "germanistik", "biedermeier", "certosa", "eurocityzug", "ausländer", "seele", "staatsrat", "bundeskabinett", "alitalia", "italien", "migrationspolitik", "verfassungsinitiative", "diplomatie", "neuhochdeutsch", "zwergkiefer", "marktamt", "dienstpragmatik", "deutschschweiz", "frascati", "kurrent", "türkisch", "fpö", "eurasien", "kemalismus", "landeskirche", "mittelmeerländer", "eidgenosse", "friedensfahrt", "renaissance", "rotwelsch", "hyäne", "italianist", "prälat", "pfalz", "fremdarbeiter", "quent", "spruch", "wandervogel", "hortensie", "türbe", "bundesgesetzblatt", "schwarzwald", "ausländeranteil", "hafenzoll", "integrationsbeauftragte", "mecklenburg-vorpommern", "ostdeutscher", "satemsprache", "mittelniederdeutsch", "botschaft", "maggiore", "schutztruppe", "ländle", "kreole", "hamam", "conte", "incoming", "ripuarisch", "lingua franca", "aare", "bundesversammlung", "bootsflühtling", "mitteldeutsch", "unteritalien", "althochdeutsch", "bigos", "ingwäonen", "schwarzes meer", "bundesanleihe", "fremde", "ober", "ausländeramt", "qualitätswein", "sardinien", "westler", "einigungsvertrag", "asean", "visconte", "don", "halbesel", "bundesbank", "gesandtschaft", "indogermanistik", "behördendeutsch", "notaufnahmelager", "ausländerbehörde", "josephinismus", "schwaben", "flühtlingspolitik", "rote-armee-fraktion", "schutzzoll", "katzelmacher", "deutschstämmig", "reichsdeutscher", "deutsch sprechend", "staatsminister", "präfekt", "deutschamerikaner", "asylgerichtshof", "glosse", "italianistisch", "alemanne", "legionär", "sammellager", "reichsdeutsche", "kapitalflucht", "ostschweiz", "germanien", "orientteppich", "landeshauptfrau", "romandie", "ultra", "oder-neiße-linie", "platt", "neuhochdeutsch", "staatssicherheitsdienst", "südeuropäisch", "deutschstämmige", "umweltflühtling", "ostzone", "mezzogiorno", "villanell", "frisör", "oberitalien", "süddeutsch", "treudeutsch", "bundesverfassungsgericht", "ischia", "mozzarella", "sudetendeutsch", "tramontana", "bayern", "einwandererfamilie", "sprachführer", "durchgangsverkehr", "arno", "rütlischwur", "volkskammer", "mad", "ns-staat", "volksmarine", "dienstleistungsverkehr", "expatriate", "gemeindeutsch", "österreicherin", "zonenrandgebiet", "amtssprache", "tifoso", "schweizerisch", "studienaufenthalt", "hansestadt", "hessen", "bure", "ostflüchtling", "flüchtlingstreck", "ristorante", "osteria", "teutonengrill", "assisen", "riviera", "kolonialherr", "wendezeit", "flüchtlingsheim", "bundesverwaltungsgericht", "diwan", "exequatur", "krautrock", "deutschstämmiger", "woiwod", "geniezeit", "anatolien", "bundessozialgericht", "freiburg"]
\ No newline at end of file
diff --git a/debiaswe-master/data/general_origin/definitional_pairs.json b/debiaswe-master/data/general_origin/definitional_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..210f5a3f05185a4f6777a7376ac2589dbab84a5e
--- /dev/null
+++ b/debiaswe-master/data/general_origin/definitional_pairs.json
@@ -0,0 +1,10 @@
+[
+    ["deutscher", "ausländer"],
+    ["deutsche", "ausländerin"],
+    ["deutschland", "ausland"],
+    ["einheimischer", "immigrant"],
+    ["einheimische", "immigrantin"],
+    ["deutsch", "fremd"],
+    ["heimat", "gast"],
+    ["deutschsprachig", "fremdsprachig"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/general_origin/equalize_pairs.json b/debiaswe-master/data/general_origin/equalize_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..191e93c78d71e17602ba7c0acdb77085ceceda27
--- /dev/null
+++ b/debiaswe-master/data/general_origin/equalize_pairs.json
@@ -0,0 +1,35 @@
+[
+    ["deutscher", "ausländer"],
+    ["deutsche", "ausländerin"],
+    ["inländer", "ausländer"],
+    ["inländerin", "ausländerin"],
+    ["deutschland", "ausland"],
+    ["einheimischer", "immigrant"],
+    ["einheimische", "immigrantin"],
+    ["deutsch", "fremd"],
+    ["deutsch", "ausländisch"],
+    ["traditionell", "exotisch"],
+    ["heimat", "gast"],
+    ["deutscher", "migrant"],
+    ["deutsche", "migrantin"],
+    ["deutschsprachig", "fremdsprachig"],
+    ["sabine", "elif"],
+    ["klaus", "mehmet"],
+    ["staatsbürger", "einwanderer"],
+    ["staatsbürgerin", "einwanderin"],
+    ["christlich", "muslimisch"],
+    ["christlich", "jüdisch"],
+    ["deutsch", "türkisch"],
+    ["deutsch", "polnisch"],
+    ["deutsch", "italienisch"],
+    ["euro", "złoty"],
+    ["euro", "lira"],
+    ["berlin", "warschau"],
+    ["berlin", "rom"],
+    ["berlin", "istanbul"],
+    ["hamburg", "krakau"],
+    ["hamburg", "ankara"],
+    ["hamburg", "mailand"],
+    ["deutscher", "flüchtling"],
+    ["deutscher", "asylbewerber"]
+] 
\ No newline at end of file
diff --git a/debiaswe-master/data/general_origin/professions.json b/debiaswe-master/data/general_origin/professions.json
new file mode 100644
index 0000000000000000000000000000000000000000..b6897fe6ce421396385a31d2624aaa928ea18808
--- /dev/null
+++ b/debiaswe-master/data/general_origin/professions.json
@@ -0,0 +1 @@
+[["accountant", 0.0, 0.4], ["acquaintance", 0.0, 0.0], ["actor", 0.8, 0.0], ["actress", -1.0, 0.0], ["adjunct_professor", 0.0, 0.5], ["administrator", 0.0, 0.2], ["adventurer", 0.0, 0.5], ["advocate", 0.0, -0.1], ["aide", 0.0, -0.2], ["alderman", 0.7, 0.2], ["alter_ego", 0.0, 0.0], ["ambassador", 0.0, 0.7], ["analyst", 0.0, 0.4], ["anthropologist", 0.0, 0.4], ["archaeologist", 0.0, 0.6], ["archbishop", 0.4, 0.5], ["architect", 0.1, 0.6], ["artist", 0.0, -0.2], ["artiste", -0.1, -0.2], ["assassin", 0.1, 0.8], ["assistant_professor", 0.1, 0.4], ["associate_dean", 0.0, 0.4], ["associate_professor", 0.0, 0.4], ["astronaut", 0.1, 0.8], ["astronomer", 0.1, 0.5], ["athlete", 0.0, 0.7], ["athletic_director", 0.1, 0.7], ["attorney", 0.0, 0.3], ["author", 0.0, 0.1], ["baker", 0.0, -0.1], ["ballerina", -0.5, -0.5], ["ballplayer", 0.2, 0.8], ["banker", 0.0, 0.6], ["barber", 0.5, 0.5], ["baron", 0.6, 0.3], ["barrister", 0.1, 0.4], ["bartender", 0.0, 0.3], ["biologist", 0.0, 0.1], ["bishop", 0.6, 0.4], ["bodyguard", 0.1, 0.9], ["bookkeeper", 0.0, -0.4], ["boss", 0.0, 0.7], ["boxer", 0.1, 0.9], ["broadcaster", -0.1, 0.4], ["broker", 0.1, 0.5], ["bureaucrat", 0.1, 0.5], ["businessman", 0.8, 0.2], ["businesswoman", -0.9, -0.1], ["butcher", 0.1, 0.9], ["butler", 0.5, 0.5], ["cab_driver", 0.1, 0.8], ["cabbie", 0.1, 0.6], ["cameraman", 0.8, 0.1], ["campaigner", 0.0, 0.2], ["captain", 0.1, 0.6], ["cardiologist", 0.1, 0.5], ["caretaker", 0.0, -0.9], ["carpenter", 0.1, 0.8], ["cartoonist", 0.0, 0.5], ["cellist", -0.1, 0.0], ["chancellor", 0.1, 0.6], ["chaplain", 0.1, 0.6], ["character", 0.0, 0.0], ["chef", 0.0, 0.5], ["chemist", 0.0, 0.2], ["choreographer", -0.2, -0.2], ["cinematographer", 0.0, 0.5], ["citizen", 0.0, 0.0], ["civil_servant", 0.0, 0.2], ["cleric", 0.3, 0.3], ["clerk", 0.0, -0.5], ["coach", 0.1, 0.8], ["collector", 0.0, 0.4], ["colonel", 0.1, 0.8], ["columnist", 0.0, 0.2], ["comedian", 0.0, 0.3], ["comic", 0.1, 0.1], ["commander", 0.1, 0.8], ["commentator", 0.0, 0.4], ["commissioner", 0.0, 0.8], ["composer", 0.1, 0.4], ["conductor", 0.1, 0.6], ["confesses", 0.0, 0.0], ["congressman", 0.7, 0.3], ["constable", 0.2, 0.6], ["consultant", 0.0, 0.1], ["cop", 0.2, 0.6], ["correspondent", 0.0, 0.0], ["councilman", 0.8, 0.1], ["councilor", -0.1, -0.1], ["counselor", 0.0, -0.1], ["critic", 0.1, 0.4], ["crooner", 0.2, 0.2], ["crusader", 0.1, 0.7], ["curator", -0.1, 0.2], ["custodian", 0.1, 0.9], ["dad", 1.0, 0.0], ["dancer", -0.1, -0.9], ["dean", 0.2, 0.7], ["dentist", 0.0, 0.7], ["deputy", 0.1, 0.7], ["dermatologist", 0.0, -0.3], ["detective", 0.1, 0.5], ["diplomat", 0.0, 0.5], ["director", 0.1, 0.6], ["disc_jockey", 0.2, 0.6], ["doctor", 0.0, 0.7], ["doctoral_student", 0.0, 0.3], ["drug_addict", 0.0, 0.0], ["drummer", 0.0, 0.9], ["economics_professor", 0.1, 0.6], ["economist", 0.1, 0.5], ["editor", 0.1, 0.4], ["educator", 0.0, -0.5], ["electrician", 0.1, 0.8], ["employee", 0.0, 0.0], ["entertainer", 0.0, 0.0], ["entrepreneur", 0.0, 0.5], ["environmentalist", 0.0, -0.4], ["envoy", 0.1, 0.2], ["epidemiologist", 0.0, 0.0], ["evangelist", 0.1, 0.4], ["farmer", 0.1, 0.8], ["fashion_designer", -0.2, -0.4], ["fighter_pilot", 0.2, 0.7], ["filmmaker", 0.1, 0.3], ["financier", 0.1, 0.5], ["firebrand", 0.0, 0.1], ["firefighter", 0.1, 0.7], ["fireman", 0.8, 0.2], ["fisherman", 0.9, 0.1], ["footballer", 0.4, 0.5], ["foreman", 0.5, 0.4], ["freelance_writer", 0.0, 0.0], ["gangster", 0.2, 0.7], ["gardener", -0.1, 0.0], ["geologist", 0.0, 0.4], ["goalkeeper", 0.1, 0.5], ["graphic_designer", 0.0, 0.2], ["guidance_counselor", 0.0, 0.0], ["guitarist", 0.1, 0.5], ["hairdresser", -0.2, -0.8], ["handyman", 0.8, 0.2], ["headmaster", 0.4, 0.2], ["historian", 0.0, 0.5], ["hitman", 0.8, 0.2], ["homemaker", -0.1, -0.9], ["hooker", -0.2, -0.8], ["housekeeper", -0.2, -0.8], ["housewife", -1.0, 0.0], ["illustrator", 0.0, 0.2], ["industrialist", 0.1, 0.7], ["infielder", 0.1, 0.5], ["inspector", 0.1, 0.5], ["instructor", 0.0, -0.3], ["interior_designer", -0.2, -0.6], ["inventor", 0.1, 0.5], ["investigator", 0.1, 0.5], ["investment_banker", 0.1, 0.7], ["janitor", 0.1, 0.9], ["jeweler", 0.1, 0.3], ["journalist", -0.1, 0.3], ["judge", 0.0, 0.7], ["jurist", 0.0, 0.0], ["laborer", 0.1, 0.9], ["landlord", 0.1, 0.4], ["lawmaker", 0.0, 0.7], ["lawyer", 0.1, 0.5], ["lecturer", 0.0, 0.2], ["legislator", 0.1, 0.7], ["librarian", -0.1, -0.9], ["lieutenant", 0.1, 0.7], ["lifeguard", 0.0, 0.6], ["lyricist", 0.0, -0.2], ["maestro", 0.1, 0.5], ["magician", 0.1, 0.7], ["magistrate", 0.0, 0.8], ["maid", -0.4, -0.6], ["major_leaguer", 0.2, 0.7], ["manager", 0.0, 0.6], ["marksman", 0.6, 0.4], ["marshal", 0.1, 0.7], ["mathematician", 0.0, 0.8], ["mechanic", 0.3, 0.6], ["mediator", 0.0, -0.2], ["medic", 0.1, 0.4], ["midfielder", 0.3, 0.5], ["minister", 0.1, 0.8], ["missionary", 0.0, 0.3], ["mobster", 0.1, 0.9], ["monk", 0.8, 0.1], ["musician", 0.0, 0.0], ["nanny", -0.3, -0.7], ["narrator", 0.0, 0.2], ["naturalist", 0.0, -0.2], ["negotiator", 0.0, 0.3], ["neurologist", 0.0, 0.6], ["neurosurgeon", 0.0, 0.7], ["novelist", 0.0, 0.0], ["nun", -0.8, -0.1], ["nurse", -0.1, -0.9], ["observer", 0.0, -0.1], ["officer", 0.1, 0.8], ["organist", -0.2, -0.3], ["painter", 0.0, 0.2], ["paralegal", -0.1, -0.4], ["parishioner", 0.0, 0.1], ["parliamentarian", 0.0, 0.6], ["pastor", 0.3, 0.7], ["pathologist", 0.0, 0.3], ["patrolman", 1.0, 0.0], ["pediatrician", 0.0, -0.2], ["performer", 0.0, -0.2], ["pharmacist", 0.0, 0.3], ["philanthropist", 0.0, 0.3], ["philosopher", 0.0, 0.8], ["photographer", 0.0, -0.1], ["photojournalist", 0.0, 0.1], ["physician", 0.0, 0.6], ["physicist", 0.1, 0.7], ["pianist", 0.0, -0.1], ["planner", 0.0, -0.3], ["plastic_surgeon", 0.2, 0.4], ["playwright", 0.0, 0.5], ["plumber", 0.1, 0.8], ["poet", 0.0, -0.1], ["policeman", 0.8, 0.2], ["politician", 0.0, 0.5], ["pollster", 0.0, 0.3], ["preacher", 0.2, 0.7], ["president", 0.1, 0.9], ["priest", 0.7, 0.3], ["principal", 0.0, 0.3], ["prisoner", 0.1, 0.6], ["professor", 0.1, 0.4], ["professor_emeritus", 0.0, 0.5], ["programmer", 0.2, 0.6], ["promoter", 0.0, 0.3], ["proprietor", 0.1, 0.4], ["prosecutor", -0.1, 0.3], ["protagonist", 0.0, 0.1], ["protege", 0.0, 0.2], ["protester", -0.1, 0.0], ["provost", 0.0, 0.4], ["psychiatrist", 0.0, -0.2], ["psychologist", 0.0, 0.0], ["publicist", -0.1, -0.2], ["pundit", 0.0, 0.2], ["rabbi", 0.2, 0.6], ["radiologist", 0.0, -0.3], ["ranger", 0.2, 0.7], ["realtor", -0.2, -0.2], ["receptionist", -0.3, -0.7], ["registered_nurse", -0.1, -0.9], ["researcher", 0.0, 0.1], ["restaurateur", 0.0, 0.2], ["sailor", 0.1, 0.8], ["saint", 0.2, 0.3], ["salesman", 0.8, 0.2], ["saxophonist", 0.1, 0.5], ["scholar", 0.0, 0.6], ["scientist", 0.0, 0.5], ["screenwriter", 0.1, 0.4], ["sculptor", 0.0, 0.5], ["secretary", -0.2, -0.8], ["senator", 0.1, 0.7], ["sergeant", 0.1, 0.7], ["servant", 0.0, 0.1], ["serviceman", 0.7, 0.3], ["sheriff_deputy", 0.1, 0.8], ["shopkeeper", 0.0, 0.5], ["singer", 0.0, -0.2], ["singer_songwriter", 0.0, -0.3], ["skipper", 0.1, 0.7], ["socialite", -0.4, -0.3], ["sociologist", 0.0, -0.2], ["soft_spoken", -0.1, -0.9], ["soldier", 0.3, 0.6], ["solicitor", 0.1, 0.3], ["solicitor_general", 0.0, 0.5], ["soloist", -0.1, -0.3], ["sportsman", 0.9, 0.1], ["sportswriter", 0.1, 0.9], ["statesman", 0.6, 0.4], ["steward", 0.4, -0.1], ["stockbroker", 0.1, 0.5], ["strategist", 0.0, 0.3], ["student", 0.0, 0.0], ["stylist", -0.2, -0.7], ["substitute", -0.1, -0.1], ["superintendent", 0.0, 0.9], ["surgeon", 0.1, 0.7], ["surveyor", 0.0, 0.5], ["swimmer", 0.0, 0.0], ["taxi_driver", 0.1, 0.9], ["teacher", 0.0, -0.8], ["technician", 0.1, 0.6], ["teenager", 0.0, -0.1], ["therapist", -0.1, -0.4], ["trader", 0.1, 0.6], ["treasurer", 0.0, -0.3], ["trooper", 0.2, 0.5], ["trucker", 0.2, 0.7], ["trumpeter", 0.0, 0.2], ["tutor", 0.0, -0.3], ["tycoon", 0.1, 0.7], ["undersecretary", 0.0, -0.3], ["understudy", 0.0, 0.0], ["valedictorian", 0.0, 0.0], ["vice_chancellor", 0.0, 0.6], ["violinist", -0.1, -0.3], ["vocalist", 0.0, -0.3], ["waiter", 1.0, 0.0], ["waitress", -0.9, -0.1], ["warden", 0.1, 0.9], ["warrior", 0.1, 0.9], ["welder", 0.3, 0.6], ["worker", 0.0, 0.3], ["wrestler", 0.2, 0.6], ["writer", 0.0, 0.0]]
\ No newline at end of file
diff --git a/debiaswe-master/data/italian/bias_specific_full.json b/debiaswe-master/data/italian/bias_specific_full.json
new file mode 100644
index 0000000000000000000000000000000000000000..bebefbcedf083def275056764a4d6cb972be97d5
--- /dev/null
+++ b/debiaswe-master/data/italian/bias_specific_full.json
@@ -0,0 +1 @@
+["mark", "reichskanzler", "binnendeutsch", "jot", "radicchio", "schufa", "verfassungsgerichtshof", "fdgb", "kirchenstaat", "spätaussiedler", "ostpreußen", "önorm", "deutsche", "austriazismus", "niederdeutsch", "handballbundesliga", "bahncard", "tagliatelle", "regionalbahn", "verteidigungsausschuss", "reichsgericht", "deutschtürkisch", "vormärz", "quempaslied", "bundesrepublikanisch", "bundesversammlung", "hartz", "reichsdeutsche", "italianist", "spieloper", "reichsgebiet", "französisch-deutsch", "zweikanalton", "bundessozialgericht", "quart", "reichsgrenze", "bundesliga", "normblatt", "belcanto", "reichsstände", "hamburg", "résistance", "fürstentag", "hochmeister", "innenausschuss", "bundespräsident", "exarch", "landesversicherungsanstalt", "erzgebirge", "wehrmacht", "reichsmark", "abate", "deutschkenntnis", "volksdeutscher", "bundesautobahn", "deutschsprachlich", "auslandsdeutsche", "deutschlandlied", "deutsch-schweizerisch", "judenstern", "fichtelberg", "regionalexpress", "nationalsozialismus", "deutschlandsender", "dax", "deutsch-türkisch", "pizza", "außerdeutsch", "bundesministerium", "trakehner", "bremen", "deutschstämmig", "deutschrock", "fra", "geniezeit", "oberlandesgericht", "din-norm", "reichsadler", "reichsregierung", "hermesbürgschaft", "bundesgerichtshof", "tagesschau", "landesstraße", "pecorino", "baron", "hitlerdeutschland", "kanzleisprache", "novecento", "reichsacht", "beitrittsgebiet", "bundesstraße", "bundeskanzler", "italoamerikaner", "kleindeutsch", "ostmark", "standarddeutsch", "zentralbankrat", "italienische", "deutschlandweit", "lufthansa", "ehrenspielführer", "quent", "reichsdeutsch", "vereinigungskriminalität", "bundesdeutsch", "ländle", "mitteldeutschland", "pfalz", "westmark", "adfc", "bure", "bundeshaus", "adac", "ichlaut", "lingua franca", "narrativum", "balsamessig", "ciabatta", "ß", "mikrozensus", "reichsautobahn", "ch-laut", "kurfürst", "mezzogiorno", "schoah", "villanell", "volksdeutsche", "signore", "verdeutschen", "apenninen-halbinsel", "hunderennen", "bundesrat", "rentenmark", "deutschstämmiger", "ostverträge", "preußen", "prädikatswein", "bundesbank", "bel paese", "bundesgartenschau", "fußballbundesliga", "sächlich", "schwarz-weiß-rot", "visconte", "reichspost", "briefmonopol", "bundesminister", "schulferien", "germanistik", "abc", "piva", "pannacotta", "hesperien", "lira", "pendolino", "reinheitsgebot", "lambrusco", "reichsdeutscher", "sütterlinschrift", "padre", "deutschkunde", "fräuleinwunder", "schwabenspiegel", "s-laut", "volksgenosse", "germania", "italowestern", "reichsinsignien", "deutsch-amerikanisch", "deutschlandchef", "frikadelle", "gorgonzola", "bundesausbildungsförderungsgesetz", "territorialverteidigung", "bundesarchiv", "jungdeutscher", "deutschherren", "regionalliga", "germanist", "reichsgründung", "deutschstämmige", "schwarz-rot-gold", "bundeshaushalt", "solidaritätszuschlag", "displaced person", "westgeld", "dlg-prämiert", "frühmittelhochdeutsch", "giro d'italia", "mozzarella", "aussiedler", "schweizerdeutsch", "italianisieren", "republikflucht", "kurrentschrift", "deutsch-französisch", "neuromantik", "reichsstadt", "parmesan", "unter", "elsass-lothringen", "pizzaservice", "amerikahaus", "commedia dell'arte", "deutscher", "deutschlandtour", "deutschritterorden", "auslandsdeutsch", "feldgrau", "scudo", "auslandsdeutscher", "deutschlehrer", "achlaut", "altdeutsch", "neuklassizismus", "trentino-südtirol", "deutsch-jüdisch", "ns-staat", "novemberrevolution", "lastenausgleichsgesetz", "reichspräsident", "deutschfeindlich", "eisenbahnerwohnung", "donna", "gulden", "reichsritter", "fdj", "sudetenland", "bundeskanzleramt", "deutsch-deutsch", "misereor", "reichskammergericht", "hanswurst", "teutonengrill", "bundesverwaltungsgericht", "sprachgesellschaft", "deutschenhass", "bundesanleihe", "landeskirche", "germanismus", "futurismus", "achtundvierziger", "deutschfreundlich", "gerundium", "schluss-s", "vergangenheitsbewältigung", "berlin", "deutscher", "deutsche", "deutschen", "deutschland", "deutschlands", "deutsch", "deutsches", "deutschsprachig", "italienisch", "italiener", "italienerin", "italien", "italiens", "italienisches", "italienische", "italienischer", "italienischen", "bundesgartenschau", "italienurlaub", "italienreise", "rom", "mailand", "neapel", "palermo", "catania", "florenz", "genua", "bologna", "apulien", "toskana", "verona", "messina", "venedig", "padua", "triest", "brescia", "arancino", "aranzini", "antipasto", "focaccia", "granita", "vivaldi", "galileo", "merkel", "bratensoße", "bratwurst", "stulle", "maultauschen", "allgäu", "ostsee", "nordsee", "tiramisu", "straciatella", "spaghetti", "risotto", "pesto", "mortadella", "gnocchi", "espresso", "ciabatta", "calzone", "bruschetta", "toskana", "apulien"]
\ No newline at end of file
diff --git a/debiaswe-master/data/italian/bias_specific_seed.json b/debiaswe-master/data/italian/bias_specific_seed.json
new file mode 100644
index 0000000000000000000000000000000000000000..bebefbcedf083def275056764a4d6cb972be97d5
--- /dev/null
+++ b/debiaswe-master/data/italian/bias_specific_seed.json
@@ -0,0 +1 @@
+["mark", "reichskanzler", "binnendeutsch", "jot", "radicchio", "schufa", "verfassungsgerichtshof", "fdgb", "kirchenstaat", "spätaussiedler", "ostpreußen", "önorm", "deutsche", "austriazismus", "niederdeutsch", "handballbundesliga", "bahncard", "tagliatelle", "regionalbahn", "verteidigungsausschuss", "reichsgericht", "deutschtürkisch", "vormärz", "quempaslied", "bundesrepublikanisch", "bundesversammlung", "hartz", "reichsdeutsche", "italianist", "spieloper", "reichsgebiet", "französisch-deutsch", "zweikanalton", "bundessozialgericht", "quart", "reichsgrenze", "bundesliga", "normblatt", "belcanto", "reichsstände", "hamburg", "résistance", "fürstentag", "hochmeister", "innenausschuss", "bundespräsident", "exarch", "landesversicherungsanstalt", "erzgebirge", "wehrmacht", "reichsmark", "abate", "deutschkenntnis", "volksdeutscher", "bundesautobahn", "deutschsprachlich", "auslandsdeutsche", "deutschlandlied", "deutsch-schweizerisch", "judenstern", "fichtelberg", "regionalexpress", "nationalsozialismus", "deutschlandsender", "dax", "deutsch-türkisch", "pizza", "außerdeutsch", "bundesministerium", "trakehner", "bremen", "deutschstämmig", "deutschrock", "fra", "geniezeit", "oberlandesgericht", "din-norm", "reichsadler", "reichsregierung", "hermesbürgschaft", "bundesgerichtshof", "tagesschau", "landesstraße", "pecorino", "baron", "hitlerdeutschland", "kanzleisprache", "novecento", "reichsacht", "beitrittsgebiet", "bundesstraße", "bundeskanzler", "italoamerikaner", "kleindeutsch", "ostmark", "standarddeutsch", "zentralbankrat", "italienische", "deutschlandweit", "lufthansa", "ehrenspielführer", "quent", "reichsdeutsch", "vereinigungskriminalität", "bundesdeutsch", "ländle", "mitteldeutschland", "pfalz", "westmark", "adfc", "bure", "bundeshaus", "adac", "ichlaut", "lingua franca", "narrativum", "balsamessig", "ciabatta", "ß", "mikrozensus", "reichsautobahn", "ch-laut", "kurfürst", "mezzogiorno", "schoah", "villanell", "volksdeutsche", "signore", "verdeutschen", "apenninen-halbinsel", "hunderennen", "bundesrat", "rentenmark", "deutschstämmiger", "ostverträge", "preußen", "prädikatswein", "bundesbank", "bel paese", "bundesgartenschau", "fußballbundesliga", "sächlich", "schwarz-weiß-rot", "visconte", "reichspost", "briefmonopol", "bundesminister", "schulferien", "germanistik", "abc", "piva", "pannacotta", "hesperien", "lira", "pendolino", "reinheitsgebot", "lambrusco", "reichsdeutscher", "sütterlinschrift", "padre", "deutschkunde", "fräuleinwunder", "schwabenspiegel", "s-laut", "volksgenosse", "germania", "italowestern", "reichsinsignien", "deutsch-amerikanisch", "deutschlandchef", "frikadelle", "gorgonzola", "bundesausbildungsförderungsgesetz", "territorialverteidigung", "bundesarchiv", "jungdeutscher", "deutschherren", "regionalliga", "germanist", "reichsgründung", "deutschstämmige", "schwarz-rot-gold", "bundeshaushalt", "solidaritätszuschlag", "displaced person", "westgeld", "dlg-prämiert", "frühmittelhochdeutsch", "giro d'italia", "mozzarella", "aussiedler", "schweizerdeutsch", "italianisieren", "republikflucht", "kurrentschrift", "deutsch-französisch", "neuromantik", "reichsstadt", "parmesan", "unter", "elsass-lothringen", "pizzaservice", "amerikahaus", "commedia dell'arte", "deutscher", "deutschlandtour", "deutschritterorden", "auslandsdeutsch", "feldgrau", "scudo", "auslandsdeutscher", "deutschlehrer", "achlaut", "altdeutsch", "neuklassizismus", "trentino-südtirol", "deutsch-jüdisch", "ns-staat", "novemberrevolution", "lastenausgleichsgesetz", "reichspräsident", "deutschfeindlich", "eisenbahnerwohnung", "donna", "gulden", "reichsritter", "fdj", "sudetenland", "bundeskanzleramt", "deutsch-deutsch", "misereor", "reichskammergericht", "hanswurst", "teutonengrill", "bundesverwaltungsgericht", "sprachgesellschaft", "deutschenhass", "bundesanleihe", "landeskirche", "germanismus", "futurismus", "achtundvierziger", "deutschfreundlich", "gerundium", "schluss-s", "vergangenheitsbewältigung", "berlin", "deutscher", "deutsche", "deutschen", "deutschland", "deutschlands", "deutsch", "deutsches", "deutschsprachig", "italienisch", "italiener", "italienerin", "italien", "italiens", "italienisches", "italienische", "italienischer", "italienischen", "bundesgartenschau", "italienurlaub", "italienreise", "rom", "mailand", "neapel", "palermo", "catania", "florenz", "genua", "bologna", "apulien", "toskana", "verona", "messina", "venedig", "padua", "triest", "brescia", "arancino", "aranzini", "antipasto", "focaccia", "granita", "vivaldi", "galileo", "merkel", "bratensoße", "bratwurst", "stulle", "maultauschen", "allgäu", "ostsee", "nordsee", "tiramisu", "straciatella", "spaghetti", "risotto", "pesto", "mortadella", "gnocchi", "espresso", "ciabatta", "calzone", "bruschetta", "toskana", "apulien"]
\ No newline at end of file
diff --git a/debiaswe-master/data/italian/definitional_pairs.json b/debiaswe-master/data/italian/definitional_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..692c6234b2b25a018c98a69ef6ad62dbbd4bcde0
--- /dev/null
+++ b/debiaswe-master/data/italian/definitional_pairs.json
@@ -0,0 +1,14 @@
+[
+    ["deutscher", "italiener"],
+    ["deutsche", "italienerin"],
+    ["deutsche", "italienerinnen"],
+    ["deutschen", "italienern"],
+    ["deutschland", "italien"],
+    ["deutschlands", "italiens"],
+    ["deutsch", "italienisch"],
+    ["deutsches", "italienisches"],
+    ["deutsche", "italienische"],
+    ["deutscher", "italienischer"],
+    ["deutschen", "italienischen"],
+    ["deutschsprachig", "italienischsprachig"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/italian/equalize_pairs.json b/debiaswe-master/data/italian/equalize_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a0a7b322ccfe2c6417536a6d63034b967ece4a7
--- /dev/null
+++ b/debiaswe-master/data/italian/equalize_pairs.json
@@ -0,0 +1,31 @@
+[
+    ["deutscher", "italiener"],
+    ["deutsche", "italienerin"],
+    ["deutsche", "italienerinnen"],
+    ["deutschen", "italienern"],
+    ["deutschland", "italien"],
+    ["deutschlands", "italiens"],
+    ["deutsch", "italienisch"],
+    ["deutsches", "italienisches"],
+    ["deutsche", "italienische"],
+    ["deutscher", "italienischer"],
+    ["deutschen", "italienischen"],
+    ["deutschsprachig", "italienischsprachig"],
+    ["berlin", "rom"],
+    ["hamburg", "mailand"],
+    ["münchen", "neapel"],
+    ["köln", "turin"],
+    ["frankfurt", "palermo"],
+    ["stuttgart", "genua"],
+    ["düsseldorf", "bologna"],
+    ["leipzig", "florenz"],
+    ["dortmund", "bari"],
+    ["allgäu", "toskana"],
+    ["ostseeküste", "apulien"],
+    ["bratensoße", "balsamico"],
+    ["jägermeister", "amaretto"],
+    ["kloß", "arancino"],
+    ["bratwurst", "antipasto"],
+    ["stulle", "focaccia"],
+    ["maultauschen", "tortellini"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/multi_attribute/bias_specific_full.json b/debiaswe-master/data/multi_attribute/bias_specific_full.json
new file mode 100644
index 0000000000000000000000000000000000000000..21d96db29b05499cc36ff495716a8e4d22f32f53
--- /dev/null
+++ b/debiaswe-master/data/multi_attribute/bias_specific_full.json
@@ -0,0 +1 @@
+["germane", "ostgeld", "focaccia", "ostalgie", "volksgenosse", "hetman", "auslandsdeutscher", "sinto", "lech", "auslandsgeschäft", "bambino", "reichsbahn", "engadin", "schilling", "grundgesetz", "prosecco", "kleindeutsch", "aventiure", "europide", "flüchtlingsausweis", "weser", "völkerwanderung", "azzurri", "landammann", "trecento", "deutschfeindlichkeit", "polnisch", "baron", "mitteldeutsch", "bundesminister", "germanisch", "itaker", "groschen", "quempaslied", "flüchtlingshilfe", "baden-württemberg",  "thai", "zuwanderin", "edeling", "italienisch", "ausländerhass", "confoederatio helvetica", "germanisieren", "vaudeville", "italowestern", "mittelhochdeutsch", "schwarz-rot-gold", "westmitteldeutsch", "tamtam", "janitscharenmusik", "öterreichisch-ungarisch", "weichsel",  "germanentum", "jungdeutscher", "plattdeutsch", "grappa", "exarch",  "abate", "carabiniere", "bairisch", "alldeutsch", "quart", "sultan", "ramasan", "liechtenstein", "sachsen-anhalt", "settecento", "greyerzer", "reichsdeutsch", "urdeutsch", "bundesrepublikanisch", "thurgau", "germanist", "labiovelar", "kampanile", "ostmitteldeutsch", "frühneuhochdeutsch", "ostverträge", "geschäftsträger", "hochlautung", "reinheitsgebot", "wallis", "signore",  "brandenburg", "nazarener", "sbrinz", "carnotzet", "verhochdeutschen", "cinquecento", "großglockner", "aargau", "einwanderungsstrom", "beg", "kastell", "asylsuchende", "panje",  "spartakiade", "veltliner", "pizzaservice", "wittum", "helvetien", "nibelungen", "papagallo", "amerikahaus", "romand", "ausländisch", "boatpeople", "neudeutsch", "zibetkatze", "rom", "bundesstadt", "schweizweit", "mora", "signor", "chianti", "bundesstraße", "asylsuchender", "indogermane", "kenning", "sejm", "stadtammann", "wessi", "normanne", "ostdeutschland", "volksdeutscher", "rugier", "sütterlinschrift", "secondo", "gambir", "hochdeutsch", "bel paese", "frühneuhochdeutsch", "bundesliga", "devisenbewirtschaftung", "signorina", "zweigelt", "toskana", "auswärtig", "bundesadler", "deutschtum", "khan",  "hodscha", "hinterindien", "franken", "deutschlandchef", "helvetier", "afroasiatisch", "bundesbürger", "eindeutschung", "oberdeutsch", "deutschtümelei", "deutschsprachlich", "radicchio", "missingsch", "ostblock", "himalaja", "puzzolan", "taverne", "sundainseln", "außerdeutsch", "signoria", "cinzano", "wirtschaftsflüchtling", "markomanne", "austrofaschismus", "swissair", "ddr-bürger", "deutsche", "migrationshintergrund", "asylbewerberin", "deutschamerikanisch", "levante", "piefke", "padre", "theatiner", "westdeutschland", "zimber", "stasimitarbeiter", "deutschnational", "karawane", "norddeutsch", "russland", "spumante", "kosovokrieg", "italienisch", "italienerin", "mittelhochdeutsch", "bundeskanzlei", "gastarbeiter", "freiheitlich", "deutsch-schweizerisch", "italienreise", "flüchtlingselend", "ostler", "landesstraße", "sachsen", "verfassungsgerichtshof", "trentino-südtirol", "freisinn", "sprachgesellschaft", "ausländerfeindlich", "kurfürst", "deutschtürke", "schwäbeln", "kirchenstaat", "kebab", "staatsgerichtshof", "oberlandesgericht", "deutschkunde", "ostmitteldeutsch", "bergama", "cajunmusic", "spieloper", "rheinfall", "auffangen", "auslieferungsantrag", "spatha", "ausführen", "magnat", "polonistik", "undeutsch", "westfernsehen", "uri", "indoeuropäisch", "donnerer", "odal", "indogermanisch", "schufa", "dw", "po", "schleswig-holstein", "unter", "humanismus", "piazza", "ausländerkind", "westen", "kentumsprache", "reisefreiheit", "tessin", "greencard", "stasiakte", "ard", "einwandererkind", "pirogge", "lambrusco", "zimbal", "cavaliere", "mark", "raki", "aussiedler", "flüchtlingslager", "ufa", "deutschschweizerisch", "jot", "gorgonzola", "kanake", "u-häkchen", "fichtelberg", "südtirol", "asti", "burgunde", "ost-west-dialog", "centime", "tiber", "jul", "hartz", "quattrocento", "alphorn", "bundesverdienstkreuz", "pasta", "madrigal", "kelte", "deutschschweizer", "displaced person", "zentralasien", "handballbundesliga", "rentenmark", "althochdeutsch", "fremdenpolizei", "tarantella", "frühmittelhochdeutsch", "sibirien", "urgermanisch", "migrantenkind", "schwyzertütsch", "arte povera", "gastarbeiterin", "weißherbst", "bundespräsident", "öterreichweit", "ural", "pidginenglisch", "uckermark", "immigrantin", "ger", "scudo", "spätaussiedler", "lastenausgleichsgesetz", "kulturinstitut", "welschschweizer", "menschenhandel", "austriazismus", "bundeshaus", "italer", "mitternachtssonne", "deutschlandfunk", "welsche", "latsche", "weißbuch", "hilfswillige", "schweizerdeutsch", "kufe", "kolonialherrschaft", "bundesdeutsche", "mitteldeutschland", "stracciatella", "frühmittelhochdeutsch", "vielfraß", "bundesdeutscher", "kaspisches meer", "kanton", "deutschtürkisch", "pfingstochse", "teutonin", "dihk", "amerikadeutsche", "stasiunterlagen", "deutsch", "ausländerpolitik", "niedersachsen", "ausfuhrgarantie", "harz", "lateinisch", "palazzo", "futhark", "schriftdeutsch", "wien", "pan", "weißwurstäquator", "pazifischer ozean", "basso", "welschland", "stabreim", "bundesministerium", "quarta", "schwyzerdütsch", "lasagne", "aga", "karelien", "polnisch", "russlanddeutsch", "kurrentschrift", "canzone", "oberdeutsch", "härtefallkommission", "nachkriegsschweiz", "levantiner", "faschismus", "pole", "angelsachse", "ararat", "reichstag", "verismo", "börde", "paying guest", "balsamessig", "schwabenspiegel", "westmitteldeutsch", "landesversicherungsanstalt", "illyrer", "pagode", "treuhandanstalt", "doktorhut", "fußballbundesliga", "italoamerikaner", "kalabrien", "arbeitsemigrant", "deutschlehrer", "arier", "bajazzo", "kabinett", "lufthansa", "mikrozensus", "verrechnungseinheit", "hanswurst", "sezession", "schlepper", "aufenthaltsgenehmigung", "deutschjüdisch", "einwandererstrom", "außenwert", "misereor", "bundesgartenschau", "bezirkstag", "alpenrepublik", "zwangsumtausch", "auslandsdeutsch", "teehaus", "panasiatisch", "einwanderin", "westdeutscher", "duce", "konsularkorps", "italianisieren", "siamkatze", "auslese", "isolationismus", "expedition", "zav", "einreisen", "turkisieren", "ostöterreich", "anwerbestopp", "waadt", "ausländerin", "franzöisch-deutsch", "vorderasien", "administrator", "stradivari", "welschschweizerisch", "ostgermane", "kolonie", "bundesbetreuung", "gefolgschaft", "bundeshaushalt", "sizilien", "vendetta", "botschafter", "hermesbürgschaft", "nidwalden", "zahlungsbilanz", "apo", "generaloberst", "altnordisch", "jura", "ostasien", "pandschabi", "volksdeutsche", "einwanderer", "saarland", "effendi", "deutschlandlied", "intershop", "eisheiligen", "ch-laut", "bundschuh", "landeshauptmann", "cherusker", "migrantin", "deutsch gesinnt", "dolma", "pecorino", "nordrhein-westfalen", "inländerin", "obwalden", "schrumpfgermane", "osten", "eindeutschen", "amerikadeutscher", "thing", "ciabatta", "hamburg", "schweizergarde", "welscher", "parmesan", "altdeutsch", "mazurka", "böhmerwald", "sowjetzone", "westdeutsche", "berlin", "deutsch", "rhododendron", "fra", "hispano", "deutschsprachig", "osmane", "immigrant", "bundespolitiker", "ubier", "hilfswilliger", "wechselkurs", "marchese", "apulien", "reisescheck", "bergamotte", "defa", "sonata", "zentralschweiz", "apennin", "dax", "ostdeutsche", "bremen", "konsistorium", "deutschfreundlichkeit", "honved", "padrone", "schweizer", "kawass", "departement", "frikadelle", "großdeutsch",  "verdeutschung", "jiddisch", "neubürger", "trattoria",  "panettone", "austromarxismus", "metamusik", "ddr-bürgerin", "boreal", "nordgermane", "notaufnahme", "antipasto", "drk", "catenaccio", "hesperien", "pannacotta", "schweizerin", "moxibustion", "allgäu", "schriftdeutsch", "welschschweiz", "bundesgebiet", "auslandsdeutsche", "eurasier", "schakal", "jass", "bundesrat", "warenumsatzsteuer", "deutscher", "swiss", "westschweiz", "trakehner", "gote", "fürstentag", "autarkie", "flühtlingsstrom", "landesgartenschau", "futurismus", "ligurien", "bundesautobahn", "ku-klux-klan", "standarddeutsch", "kappadozien", "westdeutsch", "westlich", "innerschweiz", "steppenhuhn", "ösi", "orient", "achtundvierziger", "entsendegesetz", "hethiter", "deutsch-türkisch", "romanismus", "schweizerbürgerin", "daus", "franke", "senat", "bundesnachrichtendienst", "bundesbahn", "beamtendeutsch", "zuwandrer", "lombardei", "rittmeister", "lori", "alta moda", "standarddeutsch", "buntnessel", "belcanto", "deutschkenntnis", "piccolo", "tschibuk", "auffanglager", "elba", "arlecchino", "lira", "exilliteratur", "niederdeutsch", "bundesausbildungsförderungsgesetz", "ehrenspielführer", "durchgangslager", "apenninen-halbinsel", "cassata", "schwarz-weiß-rot", "deutschlandsender", "autark", "erzherzog", "eurokommunismus", "europider", "hennastrauch", "öterreichisch", "brd", "plateresk", "prignitz", "treck", "buch", "iberer", "pancetta", "lüneburger heide", "ostig", "fdp", "couvert", "asylbewerberheim", "quintal", "heldenlied", "asiatisch", "kandidat", "notlager", "ems", "bundestag", "hindukusch", "beitrittsgebiet", "türkisch", "güteraustausch", "importe", "mittelniederdeutsch", "mauerschütze", "bundeskanzleramt", "ß", "tagliatelle", "büffel", "ossi", "seconda", "zaubernuss", "ziehungsrecht", "brandgans", "katamaran", "feldgrau", "pizza", "afrodeutsch", "importhandel", "zloty", "italienische", "ostdeutsch", "anopheles", "betäubungsmittelgesetz", "kreuzer", "resident", "bundesdeutsch", "italianismus", "ötlich", "türkischstämmig", "welsch", "valuta", "schleichkatze", "fernamt", "südasien", "deutschlandpolitik", "germanin", "muchtar", "ostpolitik", "thüringen", "flüchtlingsrat", "brillenschlange", "met", "schabzieger", "piva", "krevette", "devise", "ausländerfeindlichkeit", "boccia", "konak", "alpenjäger", "prädikatswein", "preislied", "studienkolleg", "sudetenland", "chassidismus", "hemlocktanne", "baba", "novecento", "großdeutschland", "rheinland-pfalz", "lizenziat", "nachkriegsöterreich", "binnendeutsch", "geest", "billigflagge", "bundeswehr", "amischer", "getto", "kanzleideutsch", "moschustier", "neudeutsch", "polentum", "italienischsprachig", "kamtschatka", "vacherin", "fantasia", "volksgericht", "nationalratspräsident", "kontor", "scampi", "teutonisch", "plattdeutsch", "germanistik", "biedermeier", "certosa", "eurocityzug", "ausländer", "seele", "staatsrat", "bundeskabinett", "alitalia", "italien", "migrationspolitik", "verfassungsinitiative", "diplomatie", "neuhochdeutsch", "zwergkiefer", "marktamt", "dienstpragmatik", "deutschschweiz", "frascati", "kurrent", "türkisch", "fpö", "eurasien", "kemalismus", "landeskirche", "mittelmeerländer", "eidgenosse", "friedensfahrt", "renaissance", "rotwelsch", "hyäne", "italianist", "prälat", "pfalz", "fremdarbeiter", "quent", "spruch", "wandervogel", "hortensie", "türbe", "bundesgesetzblatt", "schwarzwald", "ausländeranteil", "hafenzoll", "integrationsbeauftragte", "mecklenburg-vorpommern", "ostdeutscher", "satemsprache", "mittelniederdeutsch", "botschaft", "maggiore", "schutztruppe", "ländle", "kreole", "hamam", "conte", "incoming", "ripuarisch", "lingua franca", "aare", "bundesversammlung", "bootsflühtling", "mitteldeutsch", "unteritalien", "althochdeutsch", "bigos", "ingwäonen", "schwarzes meer", "bundesanleihe", "fremde", "ober", "ausländeramt", "qualitätswein", "sardinien", "westler", "einigungsvertrag", "asean", "visconte", "don", "halbesel", "bundesbank", "gesandtschaft", "indogermanistik", "behördendeutsch", "notaufnahmelager", "ausländerbehörde", "josephinismus", "schwaben", "flühtlingspolitik", "rote-armee-fraktion", "schutzzoll", "katzelmacher", "deutschstämmig", "reichsdeutscher", "deutsch sprechend", "staatsminister", "präfekt", "deutschamerikaner", "asylgerichtshof", "glosse", "italianistisch", "alemanne", "legionär", "sammellager", "reichsdeutsche", "kapitalflucht", "ostschweiz", "germanien", "orientteppich", "landeshauptfrau", "romandie", "ultra", "oder-neiße-linie", "platt", "neuhochdeutsch", "staatssicherheitsdienst", "südeuropäisch", "deutschstämmige", "umweltflühtling", "ostzone", "mezzogiorno", "villanell", "frisör", "oberitalien", "süddeutsch", "treudeutsch", "bundesverfassungsgericht", "ischia", "mozzarella", "sudetendeutsch", "tramontana", "bayern", "einwandererfamilie", "sprachführer", "durchgangsverkehr", "arno", "rütlischwur", "volkskammer", "mad", "ns-staat", "volksmarine", "dienstleistungsverkehr", "expatriate", "gemeindeutsch", "österreicherin", "zonenrandgebiet", "amtssprache", "tifoso", "schweizerisch", "studienaufenthalt", "hansestadt", "hessen", "bure", "ostflüchtling", "flüchtlingstreck", "ristorante", "osteria", "teutonengrill", "assisen", "riviera", "kolonialherr", "wendezeit", "flüchtlingsheim", "bundesverwaltungsgericht", "diwan", "exequatur", "krautrock", "deutschstämmiger", "woiwod", "geniezeit", "anatolien", "bundessozialgericht", "freiburg"]
\ No newline at end of file
diff --git a/debiaswe-master/data/multi_attribute/bias_specific_seed.json b/debiaswe-master/data/multi_attribute/bias_specific_seed.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4142ab2bc4a58bd1731a27cb08c53c38fd567d7
--- /dev/null
+++ b/debiaswe-master/data/multi_attribute/bias_specific_seed.json
@@ -0,0 +1 @@
+["germane", "ostgeld", "focaccia", "ostalgie", "volksgenosse", "hetman", "auslandsdeutscher", "sinto", "lech", "auslandsgeschäft", "bambino", "reichsbahn", "engadin", "schilling", "grundgesetz", "prosecco", "kleindeutsch", "aventiure", "europide", "flüchtlingsausweis", "weser", "völkerwanderung", "azzurri", "landammann", "trecento", "deutschfeindlichkeit", "polnisch", "baron", "mitteldeutsch", "bundesminister", "germanisch", "itaker", "groschen", "quempaslied", "flüchtlingshilfe", "baden-württemberg",  "thai", "zuwanderin", "edeling", "italienisch", "ausländerhass", "confoederatio helvetica", "germanisieren", "vaudeville", "italowestern", "mittelhochdeutsch", "schwarz-rot-gold", "westmitteldeutsch", "tamtam", "janitscharenmusik", "öterreichisch-ungarisch", "weichsel",  "germanentum", "jungdeutscher", "plattdeutsch", "grappa", "exarch",  "abate", "carabiniere", "bairisch", "alldeutsch", "quart", "sultan", "ramasan", "liechtenstein", "sachsen-anhalt", "settecento", "greyerzer", "reichsdeutsch", "urdeutsch", "bundesrepublikanisch", "thurgau", "germanist", "labiovelar", "kampanile", "ostmitteldeutsch", "frühneuhochdeutsch", "ostverträge", "geschäftsträger", "hochlautung", "reinheitsgebot", "wallis", "signore",  "brandenburg", "nazarener", "sbrinz", "carnotzet", "verhochdeutschen", "cinquecento", "großglockner", "aargau", "einwanderungsstrom", "beg", "kastell", "asylsuchende", "panje",  "spartakiade", "veltliner", "pizzaservice", "wittum", "helvetien", "nibelungen", "papagallo", "amerikahaus", "romand", "ausländisch", "boatpeople", "neudeutsch", "zibetkatze", "rom", "bundesstadt", "schweizweit", "mora", "signor", "chianti", "bundesstraße", "asylsuchender", "indogermane", "kenning", "sejm", "stadtammann", "wessi", "normanne", "ostdeutschland", "volksdeutscher", "rugier", "sütterlinschrift", "secondo", "gambir", "hochdeutsch", "bel paese", "frühneuhochdeutsch", "bundesliga", "devisenbewirtschaftung", "signorina", "zweigelt", "toskana", "auswärtig", "bundesadler", "deutschtum", "khan",  "hodscha", "hinterindien", "franken", "deutschlandchef", "helvetier", "afroasiatisch", "bundesbürger", "eindeutschung", "oberdeutsch", "deutschtümelei", "deutschsprachlich", "radicchio", "missingsch", "ostblock", "himalaja", "puzzolan", "taverne", "sundainseln", "außerdeutsch", "signoria", "cinzano", "wirtschaftsflüchtling", "markomanne", "austrofaschismus", "swissair", "ddr-bürger", "deutsche", "migrationshintergrund", "asylbewerberin", "deutschamerikanisch", "levante", "piefke", "padre", "theatiner", "westdeutschland", "zimber", "stasimitarbeiter", "deutschnational", "karawane", "norddeutsch", "russland", "spumante", "kosovokrieg", "italienisch", "italienerin", "mittelhochdeutsch", "bundeskanzlei", "gastarbeiter", "freiheitlich", "deutsch-schweizerisch", "italienreise", "flüchtlingselend", "ostler", "landesstraße", "sachsen", "verfassungsgerichtshof", "trentino-südtirol", "freisinn", "sprachgesellschaft", "ausländerfeindlich", "kurfürst", "deutschtürke", "schwäbeln", "kirchenstaat", "kebab", "staatsgerichtshof", "oberlandesgericht", "deutschkunde", "ostmitteldeutsch", "bergama", "cajunmusic", "spieloper", "rheinfall", "auffangen", "auslieferungsantrag", "spatha", "ausführen", "magnat", "polonistik", "undeutsch", "westfernsehen", "uri", "indoeuropäisch", "donnerer", "odal", "indogermanisch", "schufa", "dw", "po", "schleswig-holstein", "unter", "humanismus", "piazza", "ausländerkind", "westen", "kentumsprache", "reisefreiheit", "tessin", "greencard", "stasiakte", "ard", "einwandererkind", "pirogge", "lambrusco", "zimbal", "cavaliere", "mark", "raki", "aussiedler", "flüchtlingslager", "ufa", "deutschschweizerisch", "jot", "gorgonzola", "kanake", "u-häkchen", "fichtelberg", "südtirol", "asti", "burgunde", "ost-west-dialog", "centime", "tiber", "jul", "hartz", "quattrocento", "alphorn", "bundesverdienstkreuz", "pasta", "madrigal", "kelte", "deutschschweizer", "displaced person", "zentralasien", "handballbundesliga", "rentenmark", "althochdeutsch", "fremdenpolizei", "tarantella", "frühmittelhochdeutsch", "sibirien", "urgermanisch", "migrantenkind", "schwyzertütsch", "arte povera", "gastarbeiterin", "weißherbst", "bundespräsident", "öterreichweit", "ural", "pidginenglisch", "uckermark", "immigrantin", "ger", "scudo", "spätaussiedler", "lastenausgleichsgesetz", "kulturinstitut", "welschschweizer", "menschenhandel", "austriazismus", "bundeshaus", "italer", "mitternachtssonne", "deutschlandfunk", "welsche", "latsche", "weißbuch", "hilfswillige", "schweizerdeutsch", "kufe", "kolonialherrschaft", "bundesdeutsche", "mitteldeutschland", "stracciatella", "frühmittelhochdeutsch", "vielfraß", "bundesdeutscher", "kaspisches meer", "kanton", "deutschtürkisch", "pfingstochse", "teutonin", "dihk", "amerikadeutsche", "stasiunterlagen", "deutsch", "ausländerpolitik", "niedersachsen", "ausfuhrgarantie", "harz", "lateinisch", "palazzo", "futhark", "schriftdeutsch", "wien", "pan", "weißwurstäquator", "pazifischer ozean", "basso", "welschland", "stabreim", "bundesministerium", "quarta", "schwyzerdütsch", "lasagne", "aga", "karelien", "polnisch", "russlanddeutsch", "kurrentschrift", "canzone", "oberdeutsch", "härtefallkommission", "nachkriegsschweiz", "levantiner", "faschismus", "pole", "angelsachse", "ararat", "reichstag", "verismo", "börde", "paying guest", "balsamessig", "schwabenspiegel", "westmitteldeutsch", "landesversicherungsanstalt", "illyrer", "pagode", "treuhandanstalt", "doktorhut", "fußballbundesliga", "italoamerikaner", "kalabrien", "arbeitsemigrant", "deutschlehrer", "arier", "bajazzo", "kabinett", "lufthansa", "mikrozensus", "verrechnungseinheit", "hanswurst", "sezession", "schlepper", "aufenthaltsgenehmigung", "deutschjüdisch", "einwandererstrom", "außenwert", "misereor", "bundesgartenschau", "bezirkstag", "alpenrepublik", "zwangsumtausch", "auslandsdeutsch", "teehaus", "panasiatisch", "einwanderin", "westdeutscher", "duce", "konsularkorps", "italianisieren", "siamkatze", "auslese", "isolationismus", "expedition", "zav", "einreisen", "turkisieren", "ostöterreich", "anwerbestopp", "waadt", "ausländerin", "franzöisch-deutsch", "vorderasien", "administrator", "stradivari", "welschschweizerisch", "ostgermane", "kolonie", "bundesbetreuung", "gefolgschaft", "bundeshaushalt", "sizilien", "vendetta", "botschafter", "hermesbürgschaft", "nidwalden", "zahlungsbilanz", "apo", "generaloberst", "altnordisch", "jura", "ostasien", "pandschabi", "volksdeutsche", "einwanderer", "saarland", "effendi", "deutschlandlied", "intershop", "eisheiligen", "ch-laut", "bundschuh", "landeshauptmann", "cherusker", "migrantin", "deutsch gesinnt", "dolma", "pecorino", "nordrhein-westfalen", "inländerin", "obwalden", "schrumpfgermane", "osten", "eindeutschen", "amerikadeutscher", "thing", "ciabatta", "hamburg", "schweizergarde", "welscher", "parmesan", "altdeutsch", "mazurka", "böhmerwald", "sowjetzone", "westdeutsche", "berlin", "deutsch", "rhododendron", "fra", "hispano", "deutschsprachig", "osmane", "immigrant", "bundespolitiker", "ubier", "hilfswilliger", "wechselkurs", "marchese", "apulien", "reisescheck", "bergamotte", "defa", "sonata", "zentralschweiz", "apennin", "dax", "ostdeutsche", "bremen", "konsistorium", "deutschfreundlichkeit", "honved", "padrone", "schweizer", "kawass", "departement", "frikadelle", "großdeutsch",  "verdeutschung", "jiddisch", "neubürger", "trattoria",  "panettone", "austromarxismus", "metamusik", "ddr-bürgerin", "boreal", "nordgermane", "notaufnahme", "antipasto", "drk", "catenaccio", "hesperien", "pannacotta", "schweizerin", "moxibustion", "allgäu", "schriftdeutsch", "welschschweiz", "bundesgebiet", "auslandsdeutsche", "eurasier", "schakal", "jass", "bundesrat", "warenumsatzsteuer", "deutscher", "swiss", "westschweiz", "trakehner", "gote", "fürstentag", "autarkie", "flühtlingsstrom", "landesgartenschau", "futurismus", "ligurien", "bundesautobahn", "ku-klux-klan", "standarddeutsch", "kappadozien", "westdeutsch", "westlich", "innerschweiz", "steppenhuhn", "ösi", "orient", "achtundvierziger", "entsendegesetz", "hethiter", "deutsch-türkisch", "romanismus", "schweizerbürgerin", "daus", "franke", "senat", "bundesnachrichtendienst", "bundesbahn", "beamtendeutsch", "zuwandrer", "lombardei", "rittmeister", "lori", "alta moda", "standarddeutsch", "buntnessel", "belcanto", "deutschkenntnis", "piccolo", "tschibuk", "auffanglager", "elba", "arlecchino", "lira", "exilliteratur", "niederdeutsch", "bundesausbildungsförderungsgesetz", "ehrenspielführer", "durchgangslager", "apenninen-halbinsel", "cassata", "schwarz-weiß-rot", "deutschlandsender", "autark", "erzherzog", "eurokommunismus", "europider", "hennastrauch", "öterreichisch", "brd", "plateresk", "prignitz", "treck", "buch", "iberer", "pancetta", "lüneburger heide", "ostig", "fdp", "couvert", "asylbewerberheim", "quintal", "heldenlied", "asiatisch", "kandidat", "notlager", "ems", "bundestag", "hindukusch", "beitrittsgebiet", "türkisch", "güteraustausch", "importe", "mittelniederdeutsch", "mauerschütze", "bundeskanzleramt", "ß", "tagliatelle", "büffel", "ossi", "seconda", "zaubernuss", "ziehungsrecht", "brandgans", "katamaran", "feldgrau", "pizza", "afrodeutsch", "importhandel", "zloty", "italienische", "ostdeutsch", "anopheles", "betäubungsmittelgesetz", "kreuzer", "resident", "bundesdeutsch", "italianismus", "ötlich", "türkischstämmig", "welsch", "valuta", "schleichkatze", "fernamt", "südasien", "deutschlandpolitik", "germanin", "muchtar", "ostpolitik", "thüringen", "flüchtlingsrat", "brillenschlange", "met", "schabzieger", "piva", "krevette", "devise", "ausländerfeindlichkeit", "boccia", "konak", "alpenjäger", "prädikatswein", "preislied", "studienkolleg", "sudetenland", "chassidismus", "hemlocktanne", "baba", "novecento", "großdeutschland", "rheinland-pfalz", "lizenziat", "nachkriegsöterreich", "binnendeutsch", "geest", "billigflagge", "bundeswehr", "amischer", "getto", "kanzleideutsch", "moschustier", "neudeutsch", "polentum", "italienischsprachig", "kamtschatka", "vacherin", "fantasia", "volksgericht", "nationalratspräsident", "kontor", "scampi", "teutonisch", "plattdeutsch", "germanistik", "biedermeier", "certosa", "eurocityzug", "ausländer", "seele", "staatsrat", "bundeskabinett", "alitalia", "italien", "migrationspolitik", "verfassungsinitiative", "diplomatie", "neuhochdeutsch", "zwergkiefer", "marktamt", "dienstpragmatik", "deutschschweiz", "frascati", "kurrent", "türkisch", "fpö", "eurasien", "kemalismus", "landeskirche", "mittelmeerländer", "eidgenosse", "friedensfahrt", "renaissance", "rotwelsch", "hyäne", "italianist", "prälat", "pfalz", "fremdarbeiter", "quent", "spruch", "wandervogel", "hortensie", "türbe", "bundesgesetzblatt", "schwarzwald", "ausländeranteil", "hafenzoll", "integrationsbeauftragte", "mecklenburg-vorpommern", "ostdeutscher", "satemsprache", "mittelniederdeutsch", "botschaft", "maggiore", "schutztruppe", "ländle", "kreole", "hamam", "conte", "incoming", "ripuarisch", "lingua franca", "aare", "bundesversammlung", "bootsflühtling", "mitteldeutsch", "unteritalien", "althochdeutsch", "bigos", "ingwäonen", "schwarzes meer", "bundesanleihe", "fremde", "ober", "ausländeramt", "qualitätswein", "sardinien", "westler", "einigungsvertrag", "asean", "visconte", "don", "halbesel", "bundesbank", "gesandtschaft", "indogermanistik", "behördendeutsch", "notaufnahmelager", "ausländerbehörde", "josephinismus", "schwaben", "flühtlingspolitik", "rote-armee-fraktion", "schutzzoll", "katzelmacher", "deutschstämmig", "reichsdeutscher", "deutsch sprechend", "staatsminister", "präfekt", "deutschamerikaner", "asylgerichtshof", "glosse", "italianistisch", "alemanne", "legionär", "sammellager", "reichsdeutsche", "kapitalflucht", "ostschweiz", "germanien", "orientteppich", "landeshauptfrau", "romandie", "ultra", "oder-neiße-linie", "platt", "neuhochdeutsch", "staatssicherheitsdienst", "südeuropäisch", "deutschstämmige", "umweltflüchtling", "ostzone", "mezzogiorno", "villanell", "frisör", "oberitalien", "süddeutsch", "treudeutsch", "bundesverfassungsgericht", "ischia", "mozzarella", "sudetendeutsch", "tramontana", "bayern", "einwandererfamilie", "sprachführer", "durchgangsverkehr", "arno", "rütlischwur", "volkskammer", "mad", "ns-staat", "volksmarine", "dienstleistungsverkehr", "expatriate", "gemeindeutsch", "österreicherin", "zonenrandgebiet", "amtssprache", "tifoso", "schweizerisch", "studienaufenthalt", "hansestadt", "hessen", "bure", "ostflüchtling", "flüchtlingstreck", "ristorante", "osteria", "teutonengrill", "assisen", "riviera", "kolonialherr", "wendezeit", "flüchtlingsheim", "bundesverwaltungsgericht", "diwan", "exequatur", "krautrock", "deutschstämmiger", "woiwod", "geniezeit", "anatolien", "bundessozialgericht", "freiburg"]
\ No newline at end of file
diff --git a/debiaswe-master/data/multi_attribute/definitional_pairs.json b/debiaswe-master/data/multi_attribute/definitional_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..c8698d77869dbd681dca9a19d3e7d06baeff0b20
--- /dev/null
+++ b/debiaswe-master/data/multi_attribute/definitional_pairs.json
@@ -0,0 +1,10 @@
+[
+    ["deutscher", "türke", "pole", "italienier"],
+    ["deutsche", "türkin", "polin", "italienierin"],
+    ["deutschland", "türkei", "polen", "italien"],
+    ["deutsch", "türkisch", "polnisch", "italienisch"],
+    ["sabine", "elif", "wiktoria", "giulia"],
+    ["klaus", "mehmet", "jakub", "francesco"],
+    ["deutschstämmig", "türkischstämmig", "polnischstämmig", "italienischstämmig"],
+    ["deutschsprachig", "türkischsprachig", "polnischsprachig", "italienischsprachig"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/multi_attribute/equalize_pairs.json b/debiaswe-master/data/multi_attribute/equalize_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..ee91ce038287206c3493e00007cc8e51f6124274
--- /dev/null
+++ b/debiaswe-master/data/multi_attribute/equalize_pairs.json
@@ -0,0 +1,14 @@
+[
+    ["deutscher", "türke", "pole", "italienier"],
+    ["deutsche", "türkin", "polin", "italienierin"],
+    ["deutschland", "türkei", "polen", "italien"],
+    ["deutsch", "türkisch", "polnisch", "italienisch"],
+    ["sabine", "elif", "wiktoria", "giulia"],
+    ["klaus", "mehmet", "jakub", "francesco"],
+    ["deutschstämmig", "türkischstämmig", "polnischstämmig", "italienischstämmig"],
+    ["deutschsprachig", "türkischsprachig", "polnischsprachig", "italienischsprachig"],
+    ["euro", "lira", "złoty", "euro"],
+    ["christlich", "muslimisch", "katholisch", "evangelisch"],
+    ["berlin", "istanbul", "warschau", "rom"],
+    ["hamburg", "krakau", "ankara", "mailand"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/polish/bias_specific_full.json b/debiaswe-master/data/polish/bias_specific_full.json
new file mode 100644
index 0000000000000000000000000000000000000000..e3d4a2cd62e6556bbad472ca94adda1cf860d5a5
--- /dev/null
+++ b/debiaswe-master/data/polish/bias_specific_full.json
@@ -0,0 +1 @@
+["fichtelberg", "displaced person", "bundesliga", "kurrentschrift", "reichsgründung", "volksdeutscher", "germania", "deutschstämmige", "polarität", "solidaritätszuschlag", "bundesrepublikanisch", "auslandsdeutsche", "bundesbank", "bundesanleihe", "dipol", "außerdeutsch", "bahncard", "reichsstadt", "sudetenland", "auslandsdeutsch", "ostverträge", "reichsgericht", "schufa", "frühmittelhochdeutsch", "unter", "vereinigungskriminalität", "friedensfahrt", "territorialverteidigung", "kanzleisprache", "reichspräsident", "briefmonopol", "schweizerdeutsch", "ländle", "hetman", "schwarz-weiß-rot", "deutschsprachlich", "deutschlandtour", "schwabenspiegel", "dlg-prämiert", "spieloper", "deutscher", "jot", "trentino-südtirol", "misereor", "ichlaut", "bundeshaus", "bundesrat", "landesstraße", "deutschkunde", "reichsgebiet", "regionalbahn", "bundesminister", "reichskanzler", "gulden", "bure", "deutschkenntnis", "deutschfreundlich", "deutschherren", "mazurka", "ß", "reichsritter", "republikflucht", "reichsinsignien", "wehrmacht", "adfc", "deutschtürkisch", "hitlerdeutschland", "oberlandesgericht", "pfalz", "deutschlehrer", "eisenbahnerwohnung", "kurfürst", "landeskirche", "quempaslied", "narrativum", "bundesarchiv", "reichspost", "elsass-lothringen", "bundesministerium", "zweikanalton", "germanist", "reichsadler", "polnisch", "reichsstände", "verteidigungsausschuss", "germanistik", "germanismus", "reichsacht", "önorm", "hamburg", "bundeshaushalt", "bundessozialgericht", "bigos", "regionalliga", "schluss-s", "teutonengrill", "schoah", "normblatt", "bundeskanzleramt", "westmark", "hartz", "preußen", "volksdeutsche", "nationalsozialismus", "mitteldeutschland", "fdj", "ehrenspielführer", "adac", "deutschfeindlich", "schulferien", "bundesdeutsch", "deutsch-schweizerisch", "deutschstämmiger", "berlin", "handballbundesliga", "mehrpolig", "deutschlandweit", "binnendeutsch", "innenausschuss", "feldgrau", "achtundvierziger", "din-norm", "ostmark", "sächlich", "deutsch-amerikanisch", "sprachgesellschaft", "landesversicherungsanstalt", "tagesschau", "reichsmark", "deutsch gesinnt", "bremen", "volksgenosse", "bundesgerichtshof", "jungdeutscher", "reichsgrenze", "reichsregierung", "erzgebirge", "deutschritterorden", "amerikahaus", "pendolino", "bundesausbildungsförderungsgesetz", "bundeskanzler", "zentralbankrat", "abc", "fürstentag", "bundesstraße", "regionalexpress", "deutsch-türkisch", "schwarz-rot-gold", "vormärz", "umpolen", "westgeld", "trakehner", "mikrozensus", "sütterlinschrift", "deutschstämmig", "bundesautobahn", "bundesverwaltungsgericht", "vergangenheitsbewältigung", "neuklassizismus", "fräuleinwunder", "polnisch", "deutsch-jüdisch", "ch-laut", "meridian", "lastenausgleichsgesetz", "novemberrevolution", "beitrittsgebiet", "auslandsdeutscher", "fdgb", "deutschlandchef", "quart", "austriazismus", "gerundium", "verfassungsgerichtshof", "s-laut", "deutsch-deutsch", "niederdeutsch", "reichsdeutsch", "reichsdeutscher", "altdeutsch", "bundesversammlung", "judenstern", "quent", "kleindeutsch", "ns-staat", "bipolar", "deutsch-französisch", "ostpreußen", "deutsche", "deutschlandsender", "hochmeister", "geniezeit", "bundesgartenschau", "deutschlandlied", "achlaut", "aussiedler", "französisch-deutsch", "mark", "reinheitsgebot", "spätaussiedler", "rentenmark", "dax", "verdeutschen", "deutschenhass", "deutschrock", "lufthansa", "neuromantik", "frikadelle", "fußballbundesliga", "prädikatswein", "bundespräsident", "reichskammergericht", "standarddeutsch", "reichsautobahn", "baron", "hermesbürgschaft", "reichsdeutsche", "hanswurst", "polen", "pole", "polin", "polinnen", "polens", "polnisch", "polnische", "polnisches", "polnischer", "polnischen", "krakau", "kraków", "danzig", "gdańsk", "breslau", "wrocław", "białystok", "katowice", "kattowitz", "lodz", "lublin", "stettin", "warschau", "warszawa", "thorn", "bigos", "borschtsch", "eisbein", "grützwurst", "häckerle", "heringssalat", "kohlroulade", "kolatsche", "krakauer", "mazurek", "mohnkuchen", "pirogge", "polonaise", "weißwurst", "bigosch", "złoty", "euro", "krakowiak", "mazurka", "polka", "masuren", "tatra", "auschwitz", "wollin", "chopin", "sienkiewicz"]
\ No newline at end of file
diff --git a/debiaswe-master/data/polish/bias_specific_seed.json b/debiaswe-master/data/polish/bias_specific_seed.json
new file mode 100644
index 0000000000000000000000000000000000000000..e3d4a2cd62e6556bbad472ca94adda1cf860d5a5
--- /dev/null
+++ b/debiaswe-master/data/polish/bias_specific_seed.json
@@ -0,0 +1 @@
+["fichtelberg", "displaced person", "bundesliga", "kurrentschrift", "reichsgründung", "volksdeutscher", "germania", "deutschstämmige", "polarität", "solidaritätszuschlag", "bundesrepublikanisch", "auslandsdeutsche", "bundesbank", "bundesanleihe", "dipol", "außerdeutsch", "bahncard", "reichsstadt", "sudetenland", "auslandsdeutsch", "ostverträge", "reichsgericht", "schufa", "frühmittelhochdeutsch", "unter", "vereinigungskriminalität", "friedensfahrt", "territorialverteidigung", "kanzleisprache", "reichspräsident", "briefmonopol", "schweizerdeutsch", "ländle", "hetman", "schwarz-weiß-rot", "deutschsprachlich", "deutschlandtour", "schwabenspiegel", "dlg-prämiert", "spieloper", "deutscher", "jot", "trentino-südtirol", "misereor", "ichlaut", "bundeshaus", "bundesrat", "landesstraße", "deutschkunde", "reichsgebiet", "regionalbahn", "bundesminister", "reichskanzler", "gulden", "bure", "deutschkenntnis", "deutschfreundlich", "deutschherren", "mazurka", "ß", "reichsritter", "republikflucht", "reichsinsignien", "wehrmacht", "adfc", "deutschtürkisch", "hitlerdeutschland", "oberlandesgericht", "pfalz", "deutschlehrer", "eisenbahnerwohnung", "kurfürst", "landeskirche", "quempaslied", "narrativum", "bundesarchiv", "reichspost", "elsass-lothringen", "bundesministerium", "zweikanalton", "germanist", "reichsadler", "polnisch", "reichsstände", "verteidigungsausschuss", "germanistik", "germanismus", "reichsacht", "önorm", "hamburg", "bundeshaushalt", "bundessozialgericht", "bigos", "regionalliga", "schluss-s", "teutonengrill", "schoah", "normblatt", "bundeskanzleramt", "westmark", "hartz", "preußen", "volksdeutsche", "nationalsozialismus", "mitteldeutschland", "fdj", "ehrenspielführer", "adac", "deutschfeindlich", "schulferien", "bundesdeutsch", "deutsch-schweizerisch", "deutschstämmiger", "berlin", "handballbundesliga", "mehrpolig", "deutschlandweit", "binnendeutsch", "innenausschuss", "feldgrau", "achtundvierziger", "din-norm", "ostmark", "sächlich", "deutsch-amerikanisch", "sprachgesellschaft", "landesversicherungsanstalt", "tagesschau", "reichsmark", "deutsch gesinnt", "bremen", "volksgenosse", "bundesgerichtshof", "jungdeutscher", "reichsgrenze", "reichsregierung", "erzgebirge", "deutschritterorden", "amerikahaus", "pendolino", "bundesausbildungsförderungsgesetz", "bundeskanzler", "zentralbankrat", "abc", "fürstentag", "bundesstraße", "regionalexpress", "deutsch-türkisch", "schwarz-rot-gold", "vormärz", "umpolen", "westgeld", "trakehner", "mikrozensus", "sütterlinschrift", "deutschstämmig", "bundesautobahn", "bundesverwaltungsgericht", "vergangenheitsbewältigung", "neuklassizismus", "fräuleinwunder", "polnisch", "deutsch-jüdisch", "ch-laut", "meridian", "lastenausgleichsgesetz", "novemberrevolution", "beitrittsgebiet", "auslandsdeutscher", "fdgb", "deutschlandchef", "quart", "austriazismus", "gerundium", "verfassungsgerichtshof", "s-laut", "deutsch-deutsch", "niederdeutsch", "reichsdeutsch", "reichsdeutscher", "altdeutsch", "bundesversammlung", "judenstern", "quent", "kleindeutsch", "ns-staat", "bipolar", "deutsch-französisch", "ostpreußen", "deutsche", "deutschlandsender", "hochmeister", "geniezeit", "bundesgartenschau", "deutschlandlied", "achlaut", "aussiedler", "französisch-deutsch", "mark", "reinheitsgebot", "spätaussiedler", "rentenmark", "dax", "verdeutschen", "deutschenhass", "deutschrock", "lufthansa", "neuromantik", "frikadelle", "fußballbundesliga", "prädikatswein", "bundespräsident", "reichskammergericht", "standarddeutsch", "reichsautobahn", "baron", "hermesbürgschaft", "reichsdeutsche", "hanswurst", "polen", "pole", "polin", "polinnen", "polens", "polnisch", "polnische", "polnisches", "polnischer", "polnischen", "krakau", "kraków", "danzig", "gdańsk", "breslau", "wrocław", "białystok", "katowice", "kattowitz", "lodz", "lublin", "stettin", "warschau", "warszawa", "thorn", "bigos", "borschtsch", "eisbein", "grützwurst", "häckerle", "heringssalat", "kohlroulade", "kolatsche", "krakauer", "mazurek", "mohnkuchen", "pirogge", "polonaise", "weißwurst", "bigosch", "złoty", "euro", "krakowiak", "mazurka", "polka", "masuren", "tatra", "auschwitz", "wollin", "chopin", "sienkiewicz"]
\ No newline at end of file
diff --git a/debiaswe-master/data/polish/definitional_pairs.json b/debiaswe-master/data/polish/definitional_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..60bd50978725f654b7c8ca87df259323fd2d0cbc
--- /dev/null
+++ b/debiaswe-master/data/polish/definitional_pairs.json
@@ -0,0 +1,13 @@
+[
+    ["deutscher", "pole"],
+    ["deutsche", "polin"],
+    ["deutschen", "polen"],
+    ["deutschen", "polinnen"],
+    ["deutschlands", "polens"],
+    ["deutschland", "polen"],
+    ["deutsch", "polnisch"],
+    ["deutsches", "polnisches"],
+    ["deutscher", "polnischer"],
+    ["deutsche", "polnische"],
+    ["deutschsprachig", "polnischsprachig"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/polish/equalize_pairs.json b/debiaswe-master/data/polish/equalize_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5bf5d9774a7eb17556d642d96bd0f340730ad0a
--- /dev/null
+++ b/debiaswe-master/data/polish/equalize_pairs.json
@@ -0,0 +1,30 @@
+[
+    ["deutscher", "pole"],
+    ["deutsche", "polin"],
+    ["deutsche", "polinnen"],
+    ["deutschen", "polen"],
+    ["deutschland", "polen"],
+    ["deutschlands", "polens"],
+    ["deutsch", "polnisch"],
+    ["deutschsprachig", "polnischsprachig"],
+    ["euro", "złoty"],
+    ["berlin", "warschau"],
+    ["hamburg", "krakau"],
+    ["münchen", "lodz"],
+    ["köln", "breslau"],
+    ["frankfurt", "posen"],
+    ["stuttgart", "danzig"],
+    ["düsseldorf", "stettin"],
+    ["leipzig", "bromberg"],
+    ["dortmund", "lublin"],
+    ["currywurst", "bigos"],
+    ["grießsuppe", "borschtsch"],
+    ["mohnkuchen", "mazurek"],
+    ["maultausche", "pirogge"],
+    ["walzer", "mazurka"],
+    ["gardetanz", "krakowiak"],
+    ["siebenschritt", "polka"],
+    ["emsland", "masuren"],
+    ["zugspitze", "tatra"],
+    ["rügen", "wollin"]
+] 
\ No newline at end of file
diff --git a/debiaswe-master/data/turkish/bias_specific_full.json b/debiaswe-master/data/turkish/bias_specific_full.json
new file mode 100644
index 0000000000000000000000000000000000000000..fa25abb3c98cf26662e86486e9ba2cba00672635
--- /dev/null
+++ b/debiaswe-master/data/turkish/bias_specific_full.json
@@ -0,0 +1 @@
+["sächlich", "deutschkenntnis", "reichsregierung", "rentenmark", "deutschtürke", "displaced person", "hanswurst", "hochmeister", "schufa", "ß", "din-norm", "solidaritätszuschlag", "schulferien", "schwabenspiegel", "deutschstämmiger", "sudetenland", "bundesminister", "deutschlandtour", "außerdeutsch", "misereor", "territorialverteidigung", "pendolino", "oberlandesgericht", "zweikanalton", "wehrmacht", "deutsch-deutsch", "deutsch gesinnt", "bundeskanzleramt", "bundessozialgericht", "deutschlandweit", "reichspost", "bundesautobahn", "kleindeutsch", "germanist", "hermesbürgschaft", "deutsch-türkisch", "schweizerdeutsch", "schwarz-rot-gold", "teutonengrill", "standarddeutsch", "volksdeutsche", "türkischstämmig", "reichsinsignien", "reichsgründung", "bremen", "judenstern", "deutsche", "auslandsdeutsche", "volksdeutscher", "altdeutsch", "beg", "reichsstadt", "deutschlandchef", "narrativum", "frikadelle", "ländle", "verdeutschen", "tagesschau", "westgeld", "feldgrau", "deutschstämmig", "bundesanleihe", "hartz", "bundesgartenschau", "deutsch-schweizerisch", "germanistik", "deutschenhass", "handballbundesliga", "westmark", "deutschrock", "fürstentag", "deutschfeindlich", "reichsgrenze", "reichsgericht", "briefmonopol", "prädikatswein", "bure", "deutsch-französisch", "frühmittelhochdeutsch", "regionalliga", "reichsacht", "ichlaut", "spieloper", "gerundium", "verfassungsgerichtshof", "deutschlandlied", "kanzleisprache", "turkisieren", "deutschstämmige", "kurrentschrift", "fdj", "berlin", "deutsch-amerikanisch", "reichsgebiet", "reichspräsident", "bundesstraße", "schwarz-weiß-rot", "bundesbank", "bundespräsident", "binnendeutsch", "deutschlehrer", "bundesgerichtshof", "jot", "pfalz", "bundesrat", "elsass-lothringen", "germania", "reichsdeutsche", "reichsstände", "ostverträge", "reichsmark", "deutschtürkisch", "heißluftbad", "beitrittsgebiet", "kurfürst", "nationalsozialismus", "adfc", "sprachgesellschaft", "ch-laut", "mitteldeutschland", "deutschlandsender", "mark", "schoah", "ostmark", "bundesdeutsch", "reichskanzler", "austriazismus", "normblatt", "abc", "spätaussiedler", "deutschfreundlich", "landesstraße", "fräuleinwunder", "reichsritter", "preußen", "erzgebirge", "auslandsdeutsch", "regionalexpress", "achlaut", "landesversicherungsanstalt", "\u00d6norm", "unter", "trakehner", "achtundvierziger", "volksgenosse", "germanismus", "schluss-s", "fichtelberg", "quart", "daX", "aussiedler", "auslandsdeutscher", "niederdeutsch", "gulden", "neuromantik", "lufthansa", "vereinigungskriminalität", "bundesarchiv", "bundeskanzler", "bundesrepublikanisch", "quent", "regionalbahn", "s-laut", "deutsch-jüdisch", "türkisch", "innenausschuss", "bundeshaushalt", "bundeshaus", "quempaslied", "reichsdeutscher", "jungdeutscher", "reinheitsgebot", "reichsdeutsch", "reichsadler", "vergangenheitsbewältigung", "fußballbundesliga", "reichsautobahn", "deutschritterorden", "adac", "französisch-deutsch", "deutschsprachlich", "bahncard", "deutschherren", "hamburg", "baron", "bundesverwaltungsgericht", "bundesversammlung", "landeskirche", "ehrenspielführer", "tschibuk", "sütterlinschrift", "verteidigungsausschuss", "fdgb", "bundesausbildungsförderungsgesetz", "amerikahaus", "geniezeit", "neuklassizismus", "zentralbankrat", "hitlerdeutschland", "lastenausgleichsgesetz", "reichskammergericht", "bundesliga", "deutschkunde", "deutscher", "novemberrevolution", "ns-staat", "trentino-südtirol", "dlg-prämiert", "vormärz", "bundesministerium", "republikflucht", "ostpreußen", "eisenbahnerwohnung", "mikrozensus", "deutscher", "deutsche", "deutschland", "deutschlands", "deusches", "deutschen", "türkei", "türkisch", "türke", "türkin", "türkinnen", "türken", "türkisch", "türkisches", "türkischsprachig", "osmanisch", "osmanisches", "osmane", "istanbul", "ankara", "izmir", "bursa", "konya", "antalya", "kayseri", "börek", "kefir", "lahmacun", "dolma", "kebab", "köfte", "pide", "lira", "hora", "bosporus", "ararat", "taurus", "ägäis", "atatürk", "erdoğan"]
\ No newline at end of file
diff --git a/debiaswe-master/data/turkish/bias_specific_seed.json b/debiaswe-master/data/turkish/bias_specific_seed.json
new file mode 100644
index 0000000000000000000000000000000000000000..fa25abb3c98cf26662e86486e9ba2cba00672635
--- /dev/null
+++ b/debiaswe-master/data/turkish/bias_specific_seed.json
@@ -0,0 +1 @@
+["sächlich", "deutschkenntnis", "reichsregierung", "rentenmark", "deutschtürke", "displaced person", "hanswurst", "hochmeister", "schufa", "ß", "din-norm", "solidaritätszuschlag", "schulferien", "schwabenspiegel", "deutschstämmiger", "sudetenland", "bundesminister", "deutschlandtour", "außerdeutsch", "misereor", "territorialverteidigung", "pendolino", "oberlandesgericht", "zweikanalton", "wehrmacht", "deutsch-deutsch", "deutsch gesinnt", "bundeskanzleramt", "bundessozialgericht", "deutschlandweit", "reichspost", "bundesautobahn", "kleindeutsch", "germanist", "hermesbürgschaft", "deutsch-türkisch", "schweizerdeutsch", "schwarz-rot-gold", "teutonengrill", "standarddeutsch", "volksdeutsche", "türkischstämmig", "reichsinsignien", "reichsgründung", "bremen", "judenstern", "deutsche", "auslandsdeutsche", "volksdeutscher", "altdeutsch", "beg", "reichsstadt", "deutschlandchef", "narrativum", "frikadelle", "ländle", "verdeutschen", "tagesschau", "westgeld", "feldgrau", "deutschstämmig", "bundesanleihe", "hartz", "bundesgartenschau", "deutsch-schweizerisch", "germanistik", "deutschenhass", "handballbundesliga", "westmark", "deutschrock", "fürstentag", "deutschfeindlich", "reichsgrenze", "reichsgericht", "briefmonopol", "prädikatswein", "bure", "deutsch-französisch", "frühmittelhochdeutsch", "regionalliga", "reichsacht", "ichlaut", "spieloper", "gerundium", "verfassungsgerichtshof", "deutschlandlied", "kanzleisprache", "turkisieren", "deutschstämmige", "kurrentschrift", "fdj", "berlin", "deutsch-amerikanisch", "reichsgebiet", "reichspräsident", "bundesstraße", "schwarz-weiß-rot", "bundesbank", "bundespräsident", "binnendeutsch", "deutschlehrer", "bundesgerichtshof", "jot", "pfalz", "bundesrat", "elsass-lothringen", "germania", "reichsdeutsche", "reichsstände", "ostverträge", "reichsmark", "deutschtürkisch", "heißluftbad", "beitrittsgebiet", "kurfürst", "nationalsozialismus", "adfc", "sprachgesellschaft", "ch-laut", "mitteldeutschland", "deutschlandsender", "mark", "schoah", "ostmark", "bundesdeutsch", "reichskanzler", "austriazismus", "normblatt", "abc", "spätaussiedler", "deutschfreundlich", "landesstraße", "fräuleinwunder", "reichsritter", "preußen", "erzgebirge", "auslandsdeutsch", "regionalexpress", "achlaut", "landesversicherungsanstalt", "\u00d6norm", "unter", "trakehner", "achtundvierziger", "volksgenosse", "germanismus", "schluss-s", "fichtelberg", "quart", "daX", "aussiedler", "auslandsdeutscher", "niederdeutsch", "gulden", "neuromantik", "lufthansa", "vereinigungskriminalität", "bundesarchiv", "bundeskanzler", "bundesrepublikanisch", "quent", "regionalbahn", "s-laut", "deutsch-jüdisch", "türkisch", "innenausschuss", "bundeshaushalt", "bundeshaus", "quempaslied", "reichsdeutscher", "jungdeutscher", "reinheitsgebot", "reichsdeutsch", "reichsadler", "vergangenheitsbewältigung", "fußballbundesliga", "reichsautobahn", "deutschritterorden", "adac", "französisch-deutsch", "deutschsprachlich", "bahncard", "deutschherren", "hamburg", "baron", "bundesverwaltungsgericht", "bundesversammlung", "landeskirche", "ehrenspielführer", "tschibuk", "sütterlinschrift", "verteidigungsausschuss", "fdgb", "bundesausbildungsförderungsgesetz", "amerikahaus", "geniezeit", "neuklassizismus", "zentralbankrat", "hitlerdeutschland", "lastenausgleichsgesetz", "reichskammergericht", "bundesliga", "deutschkunde", "deutscher", "novemberrevolution", "ns-staat", "trentino-südtirol", "dlg-prämiert", "vormärz", "bundesministerium", "republikflucht", "ostpreußen", "eisenbahnerwohnung", "mikrozensus", "deutscher", "deutsche", "deutschland", "deutschlands", "deusches", "deutschen", "türkei", "türkisch", "türke", "türkin", "türkinnen", "türken", "türkisch", "türkisches", "türkischsprachig", "osmanisch", "osmanisches", "osmane", "istanbul", "ankara", "izmir", "bursa", "konya", "antalya", "kayseri", "börek", "kefir", "lahmacun", "dolma", "kebab", "köfte", "pide", "lira", "hora", "bosporus", "ararat", "taurus", "ägäis", "atatürk", "erdoğan"]
\ No newline at end of file
diff --git a/debiaswe-master/data/turkish/definitional_pairs.json b/debiaswe-master/data/turkish/definitional_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..f06b3d86d9ac8d6cdef18877a22684835c47336a
--- /dev/null
+++ b/debiaswe-master/data/turkish/definitional_pairs.json
@@ -0,0 +1,16 @@
+[
+    ["deutscher", "türke"],
+    ["deutsche", "türkin"],
+    ["deutschen", "türken"],
+    ["deutschen", "türkinnen"],
+    ["deutschland", "türkei"],
+    ["deutschlands", "türkeis"],
+    ["deutsch", "türkisch"],
+    ["deutsches", "türkisches"],
+    ["deutscher", "türkischer"],
+    ["deutsche", "türkische"],
+    ["deutschen", "türkischen"],
+    ["germanisch", "osmanisch"],
+    ["germane", "osmane"],
+    ["deutschsprachig", "türkischsprachig"]
+]
\ No newline at end of file
diff --git a/debiaswe-master/data/turkish/equalize_pairs.json b/debiaswe-master/data/turkish/equalize_pairs.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc389e912814001213275e0b566ac2dd373f72f2
--- /dev/null
+++ b/debiaswe-master/data/turkish/equalize_pairs.json
@@ -0,0 +1,44 @@
+[
+    ["deutscher", "türke"],
+    ["deutsche", "türkin"],
+    ["deutschen", "türken"],
+    ["deutsche", "türken"],
+    ["deutsche", "türkinnen"],
+    ["deutschland", "türkei"],
+    ["deutschlands", "türkei"],
+    ["deutsch", "türkisch"],
+    ["deutsches", "türkisches"],
+    ["deutschen", "türkischen"],
+    ["deutsche", "türkische"],
+    ["deutscher", "türkischer"],
+    ["germanisch", "osmanisch"],
+    ["germane", "osmane"],
+    ["deutschsprachig", "türkischsprachig"],
+    ["berlin", "istanbul"],
+    ["hamburg", "ankara"],
+    ["münchen", "izmir"],
+    ["köln", "bursa"],
+    ["frankfurt", "adana"],
+    ["stuttgart", "gaziantep"],
+    ["düsseldorf", "konya"],
+    ["leipzig", "antalya"],
+    ["dortmund", "kayseri"],
+    ["christlich", "muslimisch"],
+    ["euro", "lira"],
+    ["schnitzel", "köfte"],
+    ["strudel", "börek"],
+    ["sauermilch", "kefir"],
+    ["flammkuchen", "lahmacun"],
+    ["kohlrouladen", "dolma"],
+    ["hackbällchen", "köfte"],
+    ["brötchen", "pide"],
+    ["stulle", "kebab"],
+    ["walzer", "hora"],
+    ["merkel", "erdoÄŸan"],
+    ["europäisch", "asiatisch"],
+    ["rhein", "euphrat"],
+    ["elbe", "bosporus"],
+    ["alpen", "ararat"],
+    ["ostseeküste", "ägäis"],
+    ["bismarck", "atatürk"]
+] 
\ No newline at end of file
diff --git a/debiaswe-master/debiaswe/__init__.py b/debiaswe-master/debiaswe/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/debiaswe-master/debiaswe/data.py b/debiaswe-master/debiaswe/data.py
new file mode 100644
index 0000000000000000000000000000000000000000..94be7bbc9b824014f6e42b068d4b126579e2f0a4
--- /dev/null
+++ b/debiaswe-master/debiaswe/data.py
@@ -0,0 +1,23 @@
+import json
+import os
+
+"""
+Tools for data operations
+
+Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings
+Tolga Bolukbasi, Kai-Wei Chang, James Zou, Venkatesh Saligrama, and Adam Kalai
+2016
+"""
+PKG_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def load_professions():
+    professions_file = os.path.join(PKG_DIR, '../data', 'professions.json')
+    with open(professions_file, 'r') as f:
+        professions = json.load(f)
+    print('Loaded professions\n' +
+          'Format:\n' +
+          'word,\n' +
+          'definitional female -1.0 -> definitional male 1.0\n' +
+          'stereotypical female -1.0 -> stereotypical male 1.0')
+    return professions
diff --git a/debiaswe-master/debiaswe/debias.py b/debiaswe-master/debiaswe/debias.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b0d4a5aff4991103ff259958e036a9bc9b11338
--- /dev/null
+++ b/debiaswe-master/debiaswe/debias.py
@@ -0,0 +1,103 @@
+"""
+Hard-debias embedding
+
+Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings
+Tolga Bolukbasi, Kai-Wei Chang, James Zou, Venkatesh Saligrama, and Adam Kalai
+2016
+"""
+from __future__ import print_function, division
+import argparse
+import json
+import numpy as np
+# we = __import__("debiaswe-master.debiaswe.we")
+import we  # linter isn't happy but it works, unlike attempt above
+import gensim
+import fasttext
+
+def debias(E, gender_specific_words, definitional, equalize):
+    gender_direction = we.doPCA(definitional, E).components_[0]
+    specific_set = set(gender_specific_words)
+    for i, w in enumerate(E.words):
+        if w not in specific_set:
+            E.vecs[i] = we.drop(E.vecs[i], gender_direction)
+    E.normalize()
+    candidates = {x for e1, e2 in equalize for x in [(e1.lower(), e2.lower()),
+                                                     (e1.title(), e2.title()),
+                                                     (e1.upper(), e2.upper())]}
+    print(candidates)
+    for (a, b) in candidates:
+        if (a in E.index and b in E.index):
+            y = we.drop((E.v(a) + E.v(b)) / 2, gender_direction)
+            z = np.sqrt(1 - np.linalg.norm(y)**2)
+            if (E.v(a) - E.v(b)).dot(gender_direction) < 0:
+                z = -z
+            E.vecs[E.index[a]] = z * gender_direction + y
+            E.vecs[E.index[b]] = -z * gender_direction + y
+    E.normalize()
+
+def remove_oov(word_list: list, vocab: list) -> list:
+    """Takes a definitional, equalisation or bias-specific word list
+    and removes those words which are out of the model's vocabulary.
+    Relevant especially for the GloVe model. Reports on removal."""
+    
+    cleaned_list = []
+    
+    for element in word_list:
+        if type(element) == list:
+            if element[0] in vocab and element[1] in vocab:
+                cleaned_list.append(element)
+            else:
+                print(f"Removed element {element}")
+        else:
+            if element in vocab:
+                cleaned_list.append(element)
+            else:
+                print(f"Removed element {element}")
+    
+    return cleaned_list
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("embedding_filename", help="The name of the embedding")
+    parser.add_argument("definitional_filename", help="JSON of definitional pairs")
+    parser.add_argument("gendered_words_filename", help="File containing words not to neutralize (one per line)")
+    parser.add_argument("equalize_filename", help="???.bin")
+    parser.add_argument("debiased_filename", help="???.bin")
+
+    args = parser.parse_args()
+    print(args)
+
+    with open(args.definitional_filename, "r") as f:
+        defs = json.load(f)
+
+    with open(args.equalize_filename, "r") as f:
+        equalize_pairs = json.load(f)
+
+    with open(args.gendered_words_filename, "r") as f:
+        gender_specific_words = json.load(f)
+    
+    if args.embedding_filename.endswith("wiki.de.bin"):
+        ft = fasttext.load_model(args.embedding_filename)
+        words = ft.words
+    elif args.embedding_filename.endswith("vectors_no_debiasing.txt"):
+        model = gensim.models.KeyedVectors.load_word2vec_format(args.embedding_filename, binary=False)
+        words = model.index_to_key
+
+    defs = remove_oov(defs, words)
+    equalize_pairs = remove_oov(equalize_pairs, words)
+    gender_specific_words = remove_oov(gender_specific_words, words)
+
+    E = we.WordEmbedding(args.embedding_filename)
+
+    print("Debiasing...")
+    debias(E, gender_specific_words, defs, equalize_pairs)
+
+    print("Saving to file...")
+    if args.embedding_filename[-4:] == args.debiased_filename[-4:] == ".bin":
+        E.save_w2v(args.debiased_filename)
+    else:
+        E.save(args.debiased_filename)
+
+    print("\n\nDone!\n")
diff --git a/debiaswe-master/debiaswe/learn_gender_specific.py b/debiaswe-master/debiaswe/learn_gender_specific.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b91dfca410cee4d5e951df5a51692a16722d32d
--- /dev/null
+++ b/debiaswe-master/debiaswe/learn_gender_specific.py
@@ -0,0 +1,68 @@
+from __future__ import print_function, division
+import sys
+import argparse
+from we import *
+from sklearn.svm import LinearSVC
+import json
+if sys.version_info[0] < 3:
+    import io
+    open = io.open
+"""
+Learn gender specific words
+
+Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings
+Tolga Bolukbasi, Kai-Wei Chang, James Zou, Venkatesh Saligrama, and Adam Kalai
+2016
+"""
+
+parser = argparse.ArgumentParser()
+parser.add_argument("embedding_filename", help="The name of the embedding")
+parser.add_argument("NUM_TRAINING", type=int)
+parser.add_argument("GENDER_SPECIFIC_SEED_WORDS")
+parser.add_argument("outfile")
+
+args = parser.parse_args()
+
+embedding_filename = args.embedding_filename
+NUM_TRAINING = args.NUM_TRAINING
+GENDER_SPECIFIC_SEED_WORDS = args.GENDER_SPECIFIC_SEED_WORDS
+OUTFILE = args.outfile
+
+with open(GENDER_SPECIFIC_SEED_WORDS, "r") as f:
+    gender_seed = json.load(f)
+
+print("Loading embedding...")
+E = WordEmbedding(embedding_filename)
+
+print("Embedding has {} words.".format(len(E.words)))
+print("{} seed words from '{}' out of which {} are in the embedding.".format(
+    len(gender_seed),
+    GENDER_SPECIFIC_SEED_WORDS,
+    len([w for w in gender_seed if w in E.words]))
+)
+
+gender_seed = set(w for i, w in enumerate(E.words) if w in gender_seed or (w.lower() in gender_seed and i<NUM_TRAINING))
+labeled_train = [(i, 1 if w in gender_seed else 0) for i, w in enumerate(E.words) if (i<NUM_TRAINING or w in gender_seed)]
+train_indices, train_labels = zip(*labeled_train)
+y = np.array(train_labels)
+X = np.array([E.vecs[i] for i in train_indices])
+C = 1.0
+clf = LinearSVC(C=C, tol=0.0001)
+clf.fit(X, y)
+weights = (0.5 / (sum(y)) * y + 0.5 / (sum(1 - y)) * (1 - y))
+weights = 1.0 / len(y)
+score = sum((clf.predict(X) == y) * weights)
+print(1 - score, sum(y) * 1.0 / len(y))
+
+pred = clf.coef_[0].dot(X.T)
+direction = clf.coef_[0]
+intercept = clf.intercept_
+
+is_gender_specific = (E.vecs.dot(clf.coef_.T) > -clf.intercept_)
+
+full_gender_specific = list(set([w for label, w in zip(is_gender_specific, E.words)
+                            if label]).union(gender_seed))
+full_gender_specific.sort(key=lambda w: E.index[w])
+
+with open(OUTFILE, "w") as f:
+    json.dump(full_gender_specific, f)
diff --git a/debiaswe-master/debiaswe/we.py b/debiaswe-master/debiaswe/we.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fefad937d3281d1d2f8e0c03c81873ca3235c2c
--- /dev/null
+++ b/debiaswe-master/debiaswe/we.py
@@ -0,0 +1,252 @@
+"""
+Tools for debiasing word embeddings
+
+Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings
+Tolga Bolukbasi, Kai-Wei Chang, James Zou, Venkatesh Saligrama, and Adam Kalai
+2016
+"""
+from __future__ import print_function, division
+import re
+import numpy as np
+import scipy.sparse
+import gensim
+import fasttext
+from sklearn.decomposition import PCA
+
+unicode = str  # simply creates empty string called unicode
+
+# DEFAULT_NUM_WORDS = 27000
+# FILENAMES = {"g_wiki": "glove.6B.300d.small.txt",
+#              "g_twitter": "glove.twitter.27B.200d.small.txt",
+#              "g_crawl": "glove.840B.300d.small.txt",
+#              "w2v": "GoogleNews-word2vec.small.txt",
+#              "w2v_large": "GoogleNews-word2vec.txt"}
+
+
+def dedup(seq):
+    seen = set()
+    return [x for x in seq if not (x in seen or seen.add(x))]
+
+
+def safe_word(w):
+    # ignore words with numbers, etc.
+    # [a-zA-Z\.'_\- :;\(\)\]] for emoticons
+    return (re.match(r"^[a-z_]*$", w) and len(w) < 20 and not re.match(r"^_*$", w))
+
+
+def to_utf8(text, errors='strict', encoding='utf8'):
+    """Convert a string (unicode or bytestring in `encoding`), to bytestring in utf8."""
+    if isinstance(text, unicode):
+        return text.encode('utf8')
+    # do bytestring -> unicode -> utf8 full circle, to ensure valid utf8
+    return unicode(text, encoding, errors=errors).encode('utf8')
+
+
+class WordEmbedding:
+    def __init__(self, fname):
+        self.thresh = None
+        self.max_words = None
+        self.desc = fname
+        print("*** Reading data from " + fname)
+        if fname.endswith("wiki.de.bin"):
+            ft = fasttext.load_model(fname)
+            words = ft.words
+            vecs = [ft[word] for word in words]
+        elif fname.endswith("vectors_no_debiasing.txt"):
+            model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False)
+            words = model.index_to_key
+            vecs = [model[word] for word in words]
+        elif fname.endswith(".bin"):
+            model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=True)
+            words = model.index_to_key
+            vecs = [model[word] for word in words]
+        else:
+            vecs = []
+            words = []
+
+            with open(fname, "r", encoding='utf8') as f:
+                for line in f:
+                    s = line.split()
+                    v = np.array([float(x) for x in s[1:]])
+                    if len(vecs) and vecs[-1].shape!=v.shape:
+                        print("Got weird line", line)
+                        continue
+    #                 v /= np.linalg.norm(v)
+                    words.append(s[0])
+                    vecs.append(v)
+        self.vecs = np.array(vecs, dtype='float32')
+        print(self.vecs.shape)
+        self.words = words
+        self.reindex()
+        norms = np.linalg.norm(self.vecs, axis=1)
+        if max(norms)-min(norms) > 0.0001:
+            self.normalize()
+
+    def reindex(self):
+        self.index = {w: i for i, w in enumerate(self.words)}
+        self.n, self.d = self.vecs.shape
+        assert self.n == len(self.words) == len(self.index)
+        self._neighbors = None
+        print(self.n, "words of dimension", self.d, ":", ", ".join(self.words[:4] + ["..."] + self.words[-4:]))
+
+    def v(self, word):
+        return self.vecs[self.index[word]]
+
+    def diff(self, word1, word2):
+        v = self.vecs[self.index[word1]] - self.vecs[self.index[word2]]
+        return v/np.linalg.norm(v)
+
+    def normalize(self):
+        self.desc += ", normalize"
+        self.vecs /= np.linalg.norm(self.vecs, axis=1)[:, np.newaxis]
+        self.reindex()
+
+    def shrink(self, numwords):
+        self.desc += ", shrink " + str(numwords)
+        self.filter_words(lambda w: self.index[w]<numwords)
+
+    def filter_words(self, test):
+        """
+        Keep some words based on test, e.g. lambda x: x.lower()==x
+        """
+        self.desc += ", filter"
+        kept_indices, words = zip(*[[i, w] for i, w in enumerate(self.words) if test(w)])
+        self.words = list(words)
+        self.vecs = self.vecs[kept_indices, :]
+        self.reindex()
+
+    def save(self, filename):
+        with open(filename, "w") as f:
+            f.write("\n".join([w+" " + " ".join([str(x) for x in v]) for w, v in zip(self.words, self.vecs)]))
+        print("Wrote", self.n, "words to", filename)
+
+    def save_w2v(self, filename, binary=True):
+        with open(filename, 'wb') as fout:
+            fout.write(to_utf8("%s %s\n" % self.vecs.shape))
+            # store in sorted order: most frequent words at the top
+            for i, word in enumerate(self.words):
+                row = self.vecs[i]
+                if binary:
+                    fout.write(to_utf8(word) + b" " + row.tostring())
+                else:
+                    fout.write(to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
+
+    def remove_directions(self, directions): #directions better be orthogonal
+        self.desc += ", removed"
+        for direction in directions:
+            self.desc += " "
+            if type(direction) is np.ndarray:
+                v = direction / np.linalg.norm(direction)
+                self.desc += "vector "
+            else:
+                w1, w2 = direction
+                v = self.diff(w1, w2)
+                self.desc += w1 + "-" + w2
+            self.vecs = self.vecs - self.vecs.dot(v)[:, np.newaxis].dot(v[np.newaxis, :])
+        self.normalize()
+
+    def compute_neighbors_if_necessary(self, thresh, max_words):
+        thresh = float(thresh) # dang python 2.7!
+        if self._neighbors is not None and self.thresh == thresh and self.max_words == max_words:
+            return
+        print("Computing neighbors")
+        self.thresh = thresh
+        self.max_words = max_words
+        vecs = self.vecs[:max_words]
+        dots = vecs.dot(vecs.T)
+        dots = scipy.sparse.csr_matrix(dots * (dots >= 1-thresh/2))
+        from collections import Counter
+        rows, cols = dots.nonzero()
+        nums = list(Counter(rows).values())
+        print("Mean:", np.mean(nums)-1)
+        print("Median:", np.median(nums)-1)
+        rows, cols, vecs = zip(*[(i, j, vecs[i]-vecs[j]) for i, j, x in zip(rows, cols, dots.data) if i<j])
+        self._neighbors = rows, cols, np.array([v/np.linalg.norm(v) for v in vecs])
+
+    def neighbors(self, word, thresh=1):
+        dots = self.vecs.dot(self.v(word))
+        return [self.words[i] for i, dot in enumerate(dots) if dot >= 1-thresh/2]
+
+    def more_words_like_these(self, words, topn=50, max_freq=100000):
+        v = sum(self.v(w) for w in words)
+        dots = self.vecs[:max_freq].dot(v)
+        thresh = sorted(dots)[-topn]
+        words = [w for w, dot in zip(self.words, dots) if dot>=thresh]
+        return sorted(words, key=lambda w: self.v(w).dot(v))[-topn:][::-1]
+
+    def best_analogies_dist_thresh(self, v, thresh=1, topn=500, max_words=50000):
+        """Metric is cos(a-c, b-d) if |b-d|^2 < thresh, otherwise 0
+        """
+        vecs, vocab = self.vecs[:max_words], self.words[:max_words]
+        self.compute_neighbors_if_necessary(thresh, max_words)
+        rows, cols, vecs = self._neighbors
+        scores = vecs.dot(v/np.linalg.norm(v))
+        pi = np.argsort(-abs(scores))
+
+        ans = []
+        usedL = set()
+        usedR = set()
+        for i in pi:
+            if abs(scores[i])<0.001:
+                break
+            row = rows[i] if scores[i] > 0 else cols[i]
+            col = cols[i] if scores[i] > 0 else rows[i]
+            if row in usedL or col in usedR:
+                continue
+            usedL.add(row)
+            usedR.add(col)
+            ans.append((vocab[row], vocab[col], abs(scores[i])))
+            if len(ans)==topn:
+                break
+
+        return ans
+
+
+def viz(analogies):
+    print("\n".join(str(i).rjust(4)+a[0].rjust(29) + " | " + a[1].ljust(29) + (str(a[2]))[:4] for i, a in enumerate(analogies)))
+
+
+def text_plot_words(xs, ys, words, width = 90, height = 40, filename=None):
+    PADDING = 10 # num chars on left and right in case words spill over
+    res = [[' ' for i in range(width)] for j in range(height)]
+    def rescale(nums):
+        a = min(nums)
+        b = max(nums)
+        return [(x-a)/(b-a) for x in nums]
+    print("x:", (min(xs), max(xs)), "y:",(min(ys),max(ys)))
+    xs = rescale(xs)
+    ys = rescale(ys)
+    for (x, y, word) in zip(xs, ys, words):
+        i = int(x*(width - 1 - PADDING))
+        j = int(y*(height-1))
+        row = res[j]
+        z = list(row[i2] != ' ' for i2 in range(max(i-1, 0), min(width, i + len(word) + 1)))
+        if any(z):
+            continue
+        for k in range(len(word)):
+            if i+k>=width:
+                break
+            row[i+k] = word[k]
+    string = "\n".join("".join(r) for r in res)
+    if filename:
+        with open(filename, "w", encoding="utf8") as f:
+            f.write(string)
+        print("Wrote to", filename)
+    else:
+        print(string)
+
+
+def doPCA(pairs, embedding, num_components = 10):
+    matrix = []
+    for a, b in pairs:
+        center = (embedding.v(a) + embedding.v(b))/2
+        matrix.append(embedding.v(a) - center)
+        matrix.append(embedding.v(b) - center)
+    matrix = np.array(matrix)
+    pca = PCA(n_components = num_components)
+    pca.fit(matrix)
+    return pca
+
+
+def drop(u, v):
+    return u - v * u.dot(v) / v.dot(v)
diff --git a/debiaswe-master/get_bias_specific_words.py b/debiaswe-master/get_bias_specific_words.py
new file mode 100644
index 0000000000000000000000000000000000000000..77096a386516b4f49b78911673886996819c9018
--- /dev/null
+++ b/debiaswe-master/get_bias_specific_words.py
@@ -0,0 +1,39 @@
+"""Get origin specific words by looking up
+origin-definitional words in dictionary entries."""
+
+import json
+import typing
+import pandas as pd
+
+
+def check_definitions(word_list: typing.List[str]) -> set:
+    dduw = pd.read_csv("/home/students/reichelt/ba/bias-mitigation-ba/data/dduw.csv", sep=";", encoding="utf-8")
+    pattern = ' | '.join(word_list)  # regex pattern matching any of the definitional words
+
+    # str.contains() filters any rows where >= 1 definitional word is present
+    relevant_entries = dduw[dduw["Definition/Erklärung"].notna() &
+                            dduw["Term-Label"].notna() &
+                            dduw["Definition/Erklärung"].str.contains(
+                            pattern, case=True, regex=True)]
+
+    # Term-Label == 1 ensures only the first senses/definitions for each lemma are considered
+    # not sure why it doesn't work when including in first query, but it is what it is
+    relevant_entries = relevant_entries[relevant_entries["Term-Label"] == "1"]
+
+    # to make sure each word occurs only once
+    lemma_values = list(set(relevant_entries['Lemma'].tolist()))
+    return lemma_values
+
+if __name__ == "__main__":
+    definitional_words = ["polnisch", "polnisches", "polnische", "polnischer",
+                          "polnischsprachig", "polnischstämmig", "Pole", "Polen",
+                          "Polin", "Polens", "Polinnen",
+                          "deutsch", "deutsches", "deutsche", "deutschen",
+                          "deutschsprachig", "deutschstämmig", "Deutschlands",
+                          "Deutschland", "Deutscher", "Deutsche", "Deutschen"]
+
+    origin_specific_words = check_definitions(definitional_words)
+    lowercased_words = [w.lower() for w in origin_specific_words]
+
+    with open("/home/students/reichelt/ba/bias-mitigation-ba/debiaswe-master/data/polish/bias_specific_seed.json", mode="w", encoding="utf-8") as f:
+        json.dump(lowercased_words, f)
diff --git a/debiaswe-master/run_debias.sh b/debiaswe-master/run_debias.sh
new file mode 100644
index 0000000000000000000000000000000000000000..29d60d1d0d3377aee948cbabc453828c1385d39e
--- /dev/null
+++ b/debiaswe-master/run_debias.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+#SBATCH --job-name=hd_glove_italian
+#SBATCH --output=hd_glove_italian_output.txt
+#SBATCH --mem=128G
+#SBATCH --partition=compute
+#SBATCH --cpus-per-task=8
+#SBATCH --mail-user=reichelt@cl.uni-heidelberg.de
+#SBATCH --mail-type=ALL
+#SBATCH --time=2-00:00:00
+
+# JOB STEPS
+source /home/students/reichelt/ba/bias-mitigation-ba/bias-venv/bin/activate
+srun python /home/students/reichelt/ba/bias-mitigation-ba/debiaswe-master/debiaswe/debias.py /home/students/reichelt/ba/bias-mitigation-ba/data/embeddings/glove/dd-glove/vectors_no_debiasing.txt /home/students/reichelt/ba/bias-mitigation-ba/debiaswe-master/data/italian/definitional_pairs.json /home/students/reichelt/ba/bias-mitigation-ba/debiaswe-master/data/italian/bias_specific_full.json /home/students/reichelt/ba/bias-mitigation-ba/debiaswe-master/data/italian/equalize_pairs.json /home/students/reichelt/ba/bias-mitigation-ba/data/embeddings/glove/dd-glove/glove_hard_debiased_italian.txt