diff --git "a/DD-GloVe/seed_word_tests/dd-glove-deutscher-ausl\303\244nder-1iter_job_output.txt" "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-ausl\303\244nder-1iter_job_output.txt" new file mode 100644 index 0000000000000000000000000000000000000000..a5f14dc731db91761a60818cbc6c62955d73d569 --- /dev/null +++ "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-ausl\303\244nder-1iter_job_output.txt" @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_deutscher_ausländer_1iter -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.05 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.050000000 +MALE index: 60 +FEMALE index: 9226 +10 girls words (id): 9226, 23315, 48057, 74166, 74750, 100067, 115724, 118421, 158142, 182498, +10 boys words (id): 60, 130, 51815, 68091, 106323, 155515, 171755, 213204, 292604, 358249, +08/25/23 - 01:32.15AM, iter: 001, glove_cost: 0.024489, def_cost: 8.742632, ortho_cost: 0.313535, context_ortho_cost: 0.326759, proj_cost: 0.307762 + + diff --git "a/DD-GloVe/seed_word_tests/dd-glove-deutscher-ausl\303\244nder-top10_job_output.txt" "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-ausl\303\244nder-top10_job_output.txt" new file mode 100644 index 0000000000000000000000000000000000000000..85bd30c5e403bf12a501de8011b7c49e1d0d4fa0 --- /dev/null +++ "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-ausl\303\244nder-top10_job_output.txt" @@ -0,0 +1,144 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_deutscher_ausländer_top10 -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 20 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.05 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.050000000 +MALE index: 60 +FEMALE index: 9226 +10 girls words (id): 9226, 23315, 48057, 74166, 74750, 100067, 115724, 118421, 158142, 182498, +10 boys words (id): 60, 130, 51815, 68091, 106323, 155515, 171755, 213204, 292604, 358249, +08/25/23 - 02:11.08AM, iter: 001, glove_cost: 0.024486, def_cost: 8.889090, ortho_cost: 0.357260, context_ortho_cost: 0.366417, proj_cost: 0.298285 + + +10 girls words (id): 21549, 47617, 55702, 98100, 141072, 198233, 209099, 260702, 289223, 325380, +10 boys words (id): 5723, 16144, 16942, 22969, 26190, 63715, 131803, 183075, 213204, 230527, +08/25/23 - 02:27.10AM, iter: 002, glove_cost: 0.016187, def_cost: 11.336639, ortho_cost: 1.240580, context_ortho_cost: 1.265384, proj_cost: 0.271909 + + +10 girls words (id): 9226, 21812, 27929, 58593, 152203, 225017, 234950, 236126, 258027, 299567, +10 boys words (id): 16942, 22969, 26190, 30741, 63715, 131803, 183075, 213204, 230527, 239184, +08/25/23 - 02:43.01AM, iter: 003, glove_cost: 0.013208, def_cost: 12.797412, ortho_cost: 2.190056, context_ortho_cost: 2.218666, proj_cost: 0.195842 + + +10 girls words (id): 9226, 14344, 21812, 73739, 152203, 197165, 234950, 236126, 258027, 393758, +10 boys words (id): 60, 16942, 22969, 26190, 30741, 63715, 131803, 183075, 213204, 230527, +08/25/23 - 02:58.50AM, iter: 004, glove_cost: 0.011636, def_cost: 13.628941, ortho_cost: 2.907994, context_ortho_cost: 2.952259, proj_cost: 0.191755 + + +10 girls words (id): 9226, 21812, 86588, 136831, 152203, 197165, 234950, 258027, 374813, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 63715, 131803, 183075, 230527, 239184, +08/25/23 - 03:14.39AM, iter: 005, glove_cost: 0.010679, def_cost: 14.182186, ortho_cost: 3.475672, context_ortho_cost: 3.523808, proj_cost: 0.188521 + + +10 girls words (id): 9226, 73739, 93217, 152203, 158142, 197165, 225017, 234950, 258027, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 63715, 131803, 183075, 213204, 230527, +08/25/23 - 03:30.34AM, iter: 006, glove_cost: 0.010047, def_cost: 14.603496, ortho_cost: 3.942534, context_ortho_cost: 3.998224, proj_cost: 0.190407 + + +10 girls words (id): 9226, 14344, 73739, 152203, 158142, 197165, 234950, 247759, 258027, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 63715, 131803, 183075, 213204, 230527, +08/25/23 - 03:46.28AM, iter: 007, glove_cost: 0.009588, def_cost: 14.915628, ortho_cost: 4.314709, context_ortho_cost: 4.371996, proj_cost: 0.163701 + + +10 girls words (id): 9226, 50364, 93217, 152203, 158142, 197165, 234950, 247759, 258027, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 04:02.32AM, iter: 008, glove_cost: 0.009277, def_cost: 15.161369, ortho_cost: 4.640566, context_ortho_cost: 4.717416, proj_cost: 0.191363 + + +10 girls words (id): 9226, 14344, 158142, 176484, 197165, 234950, 247759, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 51815, 63715, 183075, 213204, 230527, +08/25/23 - 04:18.19AM, iter: 009, glove_cost: 0.009048, def_cost: 15.367541, ortho_cost: 4.918548, context_ortho_cost: 4.998279, proj_cost: 0.167393 + + +10 girls words (id): 9226, 14344, 123910, 158142, 197165, 225452, 234950, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 04:34.18AM, iter: 010, glove_cost: 0.008836, def_cost: 15.552681, ortho_cost: 5.171641, context_ortho_cost: 5.251921, proj_cost: 0.169931 + + +10 girls words (id): 9226, 158142, 197165, 202483, 225452, 234950, 247759, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 51815, 63715, 183075, 213204, 230527, +08/25/23 - 04:50.09AM, iter: 011, glove_cost: 0.008693, def_cost: 15.719580, ortho_cost: 5.397158, context_ortho_cost: 5.494118, proj_cost: 0.191012 + + +10 girls words (id): 9226, 158142, 197165, 202483, 225452, 234950, 247759, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 51815, 63715, 183075, 213204, 230527, +08/25/23 - 05:06.03AM, iter: 012, glove_cost: 0.008563, def_cost: 15.868202, ortho_cost: 5.598580, context_ortho_cost: 5.712181, proj_cost: 0.176035 + + +10 girls words (id): 9226, 158142, 197165, 202483, 225452, 234950, 247759, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 51815, 63715, 183075, 213204, 230527, +08/25/23 - 05:22.06AM, iter: 013, glove_cost: 0.008463, def_cost: 16.009734, ortho_cost: 5.791415, context_ortho_cost: 5.912694, proj_cost: 0.171906 + + +10 girls words (id): 9226, 158142, 197165, 202483, 225452, 234950, 247759, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 51815, 63715, 183075, 213204, 230527, +08/25/23 - 05:38.03AM, iter: 014, glove_cost: 0.008386, def_cost: 16.108001, ortho_cost: 5.954953, context_ortho_cost: 6.078050, proj_cost: 0.162920 + + +10 girls words (id): 9226, 158142, 197165, 202483, 225452, 234950, 247759, 258027, 268334, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 51815, 63715, 183075, 213204, 230527, +08/25/23 - 05:53.50AM, iter: 015, glove_cost: 0.008304, def_cost: 16.217364, ortho_cost: 6.109559, context_ortho_cost: 6.236578, proj_cost: 0.167809 + + +10 girls words (id): 9226, 14344, 158142, 202483, 225017, 258027, 268334, 302122, 340752, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 06:09.49AM, iter: 016, glove_cost: 0.008241, def_cost: 16.289017, ortho_cost: 6.243312, context_ortho_cost: 6.343895, proj_cost: 0.156228 + + +10 girls words (id): 9226, 14344, 158142, 202483, 225017, 258027, 268334, 302122, 340752, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 06:25.36AM, iter: 017, glove_cost: 0.008174, def_cost: 16.359045, ortho_cost: 6.360470, context_ortho_cost: 6.467474, proj_cost: 0.178326 + + +10 girls words (id): 9226, 14344, 158142, 202483, 225017, 258027, 268334, 302122, 340752, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 06:41.37AM, iter: 018, glove_cost: 0.008111, def_cost: 16.435525, ortho_cost: 6.483256, context_ortho_cost: 6.596933, proj_cost: 0.159693 + + +10 girls words (id): 9226, 14344, 158142, 202483, 225017, 258027, 268334, 302122, 340752, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 06:57.22AM, iter: 019, glove_cost: 0.008064, def_cost: 16.503791, ortho_cost: 6.587645, context_ortho_cost: 6.698036, proj_cost: 0.160435 + + +10 girls words (id): 9226, 14344, 158142, 202483, 225017, 258027, 268334, 302122, 340752, 393758, +10 boys words (id): 60, 5723, 16942, 26190, 30741, 44699, 63715, 183075, 213204, 230527, +08/25/23 - 07:13.14AM, iter: 020, glove_cost: 0.008038, def_cost: 16.559257, ortho_cost: 6.689269, context_ortho_cost: 6.809974, proj_cost: 0.153703 + + diff --git "a/DD-GloVe/seed_word_tests/dd-glove-deutscher-t\303\274rke-gloss5_job_output.txt" "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-t\303\274rke-gloss5_job_output.txt" new file mode 100644 index 0000000000000000000000000000000000000000..5c0e8eb0b065eb6c9a4a5e2ff9da31a02c970c20 --- /dev/null +++ "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-t\303\274rke-gloss5_job_output.txt" @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_deutscher_türke_gloss5 -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.05 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.050000000 +MALE index: 60 +FEMALE index: 12204 +10 girls words (id): 12204, 12958, 60351, 96136, 135245, 144544, 180666, 284292, 364724, 372648, +10 boys words (id): 60, 130, 1566, 30741, 63715, 68091, 91200, 232876, 303124, 358249, +08/25/23 - 01:20.43AM, iter: 001, glove_cost: 0.024491, def_cost: 8.883560, ortho_cost: 0.358425, context_ortho_cost: 0.377477, proj_cost: 0.527199 + + diff --git "a/DD-GloVe/seed_word_tests/dd-glove-deutscher-t\303\274rke-top10_job_output.txt" "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-t\303\274rke-top10_job_output.txt" new file mode 100644 index 0000000000000000000000000000000000000000..a8c1016d11114b2a4cc5b82c615a70a9f8b445a6 --- /dev/null +++ "b/DD-GloVe/seed_word_tests/dd-glove-deutscher-t\303\274rke-top10_job_output.txt" @@ -0,0 +1,144 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_deutscher_türke_top10 -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 20 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.05 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.050000000 +MALE index: 60 +FEMALE index: 12204 +10 girls words (id): 12204, 12958, 60351, 96136, 135245, 144544, 180666, 284292, 364724, 372648, +10 boys words (id): 60, 130, 1566, 30741, 63715, 68091, 91200, 232876, 303124, 358249, +08/25/23 - 02:12.40AM, iter: 001, glove_cost: 0.024491, def_cost: 8.856998, ortho_cost: 0.355882, context_ortho_cost: 0.376121, proj_cost: 0.562321 + + +10 girls words (id): 9403, 17269, 28163, 93642, 118555, 125351, 228760, 247875, 286882, 289770, +10 boys words (id): 60, 16144, 24688, 24996, 52507, 118179, 322712, 347504, 363631, 384818, +08/25/23 - 02:28.37AM, iter: 002, glove_cost: 0.016195, def_cost: 11.311446, ortho_cost: 1.252456, context_ortho_cost: 1.286748, proj_cost: 0.331975 + + +10 girls words (id): 17085, 28423, 53579, 85901, 125657, 135245, 197165, 306875, 371056, 373283, +9 boys words (id): 60, 130, 9471, 22106, 63715, 115894, 214333, 322712, 363631, +08/25/23 - 02:44.25AM, iter: 003, glove_cost: 0.013451, def_cost: 12.805616, ortho_cost: 2.219702, context_ortho_cost: 2.274251, proj_cost: 0.236318 + + +10 girls words (id): 20746, 28423, 53579, 85901, 125657, 135245, 197165, 334656, 371056, 373605, +10 boys words (id): 60, 130, 24996, 26190, 63715, 162128, 183075, 285645, 322712, 363631, +08/25/23 - 03:00.48AM, iter: 004, glove_cost: 0.011759, def_cost: 13.660814, ortho_cost: 2.951984, context_ortho_cost: 3.013063, proj_cost: 0.170428 + + +10 girls words (id): 17085, 28423, 37749, 53579, 125657, 135245, 197165, 218876, 250291, 371056, +9 boys words (id): 60, 130, 22106, 63715, 115894, 162128, 285645, 322712, 363631, +08/25/23 - 03:16.45AM, iter: 005, glove_cost: 0.010760, def_cost: 14.203024, ortho_cost: 3.514058, context_ortho_cost: 3.588900, proj_cost: 0.166661 + + +10 girls words (id): 20746, 28423, 37749, 85901, 125657, 197165, 198978, 234950, 250291, 373605, +10 boys words (id): 60, 130, 22106, 24996, 63715, 162128, 213204, 285645, 322712, 363631, +08/25/23 - 03:32.34AM, iter: 006, glove_cost: 0.010104, def_cost: 14.588806, ortho_cost: 3.956758, context_ortho_cost: 4.043040, proj_cost: 0.163097 + + +10 girls words (id): 12204, 53579, 85901, 125657, 197165, 198978, 234950, 250291, 371056, 373605, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 322712, 363631, +08/25/23 - 03:48.32AM, iter: 007, glove_cost: 0.009645, def_cost: 14.902840, ortho_cost: 4.327842, context_ortho_cost: 4.419956, proj_cost: 0.168574 + + +10 girls words (id): 12204, 20746, 21885, 37749, 53579, 125657, 197165, 198978, 250291, 371056, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 285645, 363631, +08/25/23 - 04:04.18AM, iter: 008, glove_cost: 0.009342, def_cost: 15.121135, ortho_cost: 4.640926, context_ortho_cost: 4.728810, proj_cost: 0.146937 + + +10 girls words (id): 12204, 20746, 23272, 53579, 85901, 125657, 197165, 198978, 250291, 371703, +10 boys words (id): 60, 130, 1216, 22106, 26190, 63715, 162128, 183075, 213204, 363631, +08/25/23 - 04:20.15AM, iter: 009, glove_cost: 0.009058, def_cost: 15.340890, ortho_cost: 4.919165, context_ortho_cost: 5.033283, proj_cost: 0.183933 + + +10 girls words (id): 12204, 21885, 53579, 63057, 125657, 150097, 197165, 198978, 250291, 371056, +10 boys words (id): 60, 130, 16942, 22106, 26190, 63715, 162128, 183075, 213204, 363631, +08/25/23 - 04:36.01AM, iter: 010, glove_cost: 0.008877, def_cost: 15.515099, ortho_cost: 5.165199, context_ortho_cost: 5.280660, proj_cost: 0.147773 + + +10 girls words (id): 12204, 20746, 23272, 53579, 85901, 125657, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 358249, 363631, +08/25/23 - 04:51.55AM, iter: 011, glove_cost: 0.008721, def_cost: 15.637385, ortho_cost: 5.378206, context_ortho_cost: 5.476506, proj_cost: 0.142646 + + +10 girls words (id): 12204, 20746, 23272, 53579, 85901, 125657, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 358249, 363631, +08/25/23 - 05:07.50AM, iter: 012, glove_cost: 0.008564, def_cost: 15.762064, ortho_cost: 5.573569, context_ortho_cost: 5.669101, proj_cost: 0.144326 + + +10 girls words (id): 12204, 20746, 23272, 53579, 85901, 125657, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 358249, 363631, +08/25/23 - 05:23.41AM, iter: 013, glove_cost: 0.008462, def_cost: 15.872049, ortho_cost: 5.749739, context_ortho_cost: 5.841379, proj_cost: 0.147216 + + +10 girls words (id): 12204, 20746, 23272, 53579, 85901, 125657, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 358249, 363631, +08/25/23 - 05:39.45AM, iter: 014, glove_cost: 0.008355, def_cost: 15.973991, ortho_cost: 5.905923, context_ortho_cost: 6.000210, proj_cost: 0.142029 + + +10 girls words (id): 12204, 20746, 23272, 53579, 85901, 125657, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 22106, 26190, 63715, 162128, 183075, 213204, 358249, 363631, +08/25/23 - 05:55.36AM, iter: 015, glove_cost: 0.008277, def_cost: 16.057654, ortho_cost: 6.055811, context_ortho_cost: 6.144939, proj_cost: 0.139400 + + +10 girls words (id): 12204, 21266, 21885, 43619, 125657, 150097, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 12175, 22106, 34578, 63715, 162128, 213204, 358249, 363631, +08/25/23 - 06:11.28AM, iter: 016, glove_cost: 0.008201, def_cost: 16.137112, ortho_cost: 6.172060, context_ortho_cost: 6.273944, proj_cost: 0.167582 + + +10 girls words (id): 12204, 21266, 21885, 43619, 125657, 150097, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 12175, 22106, 34578, 63715, 162128, 213204, 358249, 363631, +08/25/23 - 06:26.57AM, iter: 017, glove_cost: 0.008164, def_cost: 16.225286, ortho_cost: 6.304372, context_ortho_cost: 6.416719, proj_cost: 0.159468 + + +10 girls words (id): 12204, 21266, 21885, 43619, 125657, 150097, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 12175, 22106, 34578, 63715, 162128, 213204, 358249, 363631, +08/25/23 - 06:42.43AM, iter: 018, glove_cost: 0.008105, def_cost: 16.297032, ortho_cost: 6.419616, context_ortho_cost: 6.537435, proj_cost: 0.152865 + + +10 girls words (id): 12204, 21266, 21885, 43619, 125657, 150097, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 12175, 22106, 34578, 63715, 162128, 213204, 358249, 363631, +08/25/23 - 06:58.32AM, iter: 019, glove_cost: 0.008050, def_cost: 16.359567, ortho_cost: 6.526110, context_ortho_cost: 6.645736, proj_cost: 0.156093 + + +10 girls words (id): 12204, 21266, 21885, 43619, 125657, 150097, 197165, 198978, 228760, 250291, +10 boys words (id): 60, 130, 12175, 22106, 34578, 63715, 162128, 213204, 358249, 363631, +08/25/23 - 07:14.30AM, iter: 020, glove_cost: 0.007990, def_cost: 16.415100, ortho_cost: 6.623221, context_ortho_cost: 6.741158, proj_cost: 0.151385 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-cap30000-top10_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-cap30000-top10_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb234cf659234f8b4dcf8df827a593bea0c2d3c7 --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-cap30000-top10_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_cap30000_top10 -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 88 +FEMALE index: 211 +10 girls words (id): 28, 211, 1196, 1254, 1486, 1701, 5237, 10296, 10304, 19108, +10 boys words (id): 10, 88, 573, 1165, 1949, 2241, 7281, 7407, 23959, 26589, +08/16/23 - 07:30.33PM, iter: 001, glove_cost: 0.024502, def_cost: 8.352455, ortho_cost: 0.344849, context_ortho_cost: 0.367556, proj_cost: 0.843610 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-cap30000_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-cap30000_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..efc436f83848355f7bdeb4a11572057d9de6596d --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-cap30000_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:755:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 755 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:766:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 766 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:772:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 772 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_cap30000 -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 88 +FEMALE index: 211 +30 girls words (id): 28, 211, 1196, 1254, 1256, 1486, 1701, 3829, 5237, 6031, 7064, 7439, 7660, 9310, 9918, 10296, 10304, 11360, 14293, 16010, 18120, 18503, 19108, 19773, 19803, 25174, 27240, 27746, 27907, 28009, +30 boys words (id): 10, 88, 573, 1165, 1949, 2157, 2241, 2303, 3775, 7281, 7361, 7407, 7450, 7482, 7907, 9410, 11882, 11939, 15593, 16705, 17573, 22259, 22438, 23959, 24481, 25441, 26589, 29199, 29328, 29590, +08/16/23 - 06:42.53PM, iter: 001, glove_cost: 0.024501, def_cost: 8.379469, ortho_cost: 0.348447, context_ortho_cost: 0.360655, proj_cost: 0.798561 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-custom-def_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-custom-def_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..e11cb6c589cffc2f6b69ff134292be4d00f1a2a2 --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-custom-def_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_custom_def -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 88 +FEMALE index: 211 +30 girls words (id): 211, 6892, 7774, 16554, 22194, 26673, 30605, 39488, 40510, 40815, 44543, 60446, 70242, 83002, 86538, 93319, 96524, 103124, 104741, 113405, 115633, 132604, 151764, 212171, 225986, 230987, 235360, 302454, 354449, 360914, +30 boys words (id): 88, 3361, 18422, 21635, 30338, 44916, 45105, 58273, 59948, 62058, 67401, 82470, 92823, 102943, 103959, 115088, 118675, 119377, 140318, 173664, 183686, 184080, 186510, 194589, 201095, 237830, 266771, 341260, 342111, 354354, +08/24/23 - 10:33.09PM, iter: 001, glove_cost: 0.024495, def_cost: 8.770339, ortho_cost: 0.362764, context_ortho_cost: 0.365069, proj_cost: 0.647450 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-full-def_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-full-def_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..f1b7320f577dd37bf540973326f620553a545bdf --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-full-def_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_full_def -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 88 +FEMALE index: 211 +30 girls words (id): 28, 211, 1486, 1701, 1949, 6892, 10526, 32417, 33890, 39250, 42184, 43576, 53557, 54665, 57406, 72566, 78180, 90126, 108625, 119641, 166331, 170172, 172417, 236714, 239701, 283166, 323073, 327339, 329266, 362413, +30 boys words (id): 88, 2303, 3361, 10655, 13768, 32648, 39921, 74612, 74812, 75404, 86098, 87610, 96950, 100228, 118914, 132137, 141065, 174967, 176994, 179168, 200593, 213239, 213504, 226296, 283777, 298746, 335620, 347492, 361247, 380012, +08/16/23 - 08:14.22PM, iter: 001, glove_cost: 0.024523, def_cost: 8.637469, ortho_cost: 0.374078, context_ortho_cost: 0.424632, proj_cost: 0.806380 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-less-stopwords_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-less-stopwords_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b77bdc677a4b4cf36b224dcba5e4fba9b30bbc1 --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-less-stopwords_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_less_stopwords -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 88 +FEMALE index: 211 +10 girls words (id): 211, 1486, 6892, 32417, 33890, 53557, 78180, 170172, 249338, 362413, +9 boys words (id): 88, 2303, 3361, 74612, 86098, 92243, 132137, 279331, 347492, +08/25/23 - 01:35.14AM, iter: 001, glove_cost: 0.024513, def_cost: 8.715401, ortho_cost: 0.314692, context_ortho_cost: 0.322874, proj_cost: 0.599305 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-swapped_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-swapped_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..1edda1da8cfac81ca8ff67ab69c7bc929fd4e46f --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-swapped_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:755:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 755 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:766:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 766 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:772:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 772 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_order_swapped -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 211 +FEMALE index: 88 +30 girls words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +30 boys words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +08/09/23 - 09:07.49PM, iter: 001, glove_cost: 0.024502, def_cost: 8.616560, ortho_cost: 0.365397, context_ortho_cost: 0.390244, proj_cost: 0.721238 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender-top10_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender-top10_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..c2d1035716bf5bdae07c6b9d52c512ee4cdbff1b --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender-top10_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:755:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 755 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:766:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 766 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:772:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 772 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_top10 -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 211 +FEMALE index: 88 +10 girls words (id): 10, 44249, 99256, 170308, 213504, 225577, 252320, 282380, 290371, 304509, +10 boys words (id): 1486, 40555, 74518, 78180, 144221, 161618, 172417, 236714, 278774, 301693, +08/09/23 - 09:49.34PM, iter: 001, glove_cost: 0.024496, def_cost: 8.699146, ortho_cost: 0.363419, context_ortho_cost: 0.374847, proj_cost: 0.541516 + + diff --git a/DD-GloVe/seed_word_tests/dd-glove-gender_job_output.txt b/DD-GloVe/seed_word_tests/dd-glove-gender_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..6295a75adb4548643603231e6be23ecedd2d14bb --- /dev/null +++ b/DD-GloVe/seed_word_tests/dd-glove-gender_job_output.txt @@ -0,0 +1,244 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:755:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 755 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:766:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 766 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:772:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 772 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_gender_mann_frau_glosslimit -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 40 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 1 -lambda 0.001 -use-ortho-loss 1 -beta 0.001 -use-proj-loss 1 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 1 +use_ortho_loss: 1 +use_proj_loss: 1 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 88 +FEMALE index: 211 +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 08:19.52PM, iter: 001, glove_cost: 0.024508, def_cost: 8.599910, ortho_cost: 0.362973, context_ortho_cost: 0.385264, proj_cost: 0.743773 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 08:35.49PM, iter: 002, glove_cost: 0.016561, def_cost: 11.343991, ortho_cost: 1.320829, context_ortho_cost: 1.422297, proj_cost: 0.589744 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 08:51.43PM, iter: 003, glove_cost: 0.013556, def_cost: 12.934350, ortho_cost: 2.355101, context_ortho_cost: 2.489955, proj_cost: 0.599920 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 09:07.32PM, iter: 004, glove_cost: 0.011882, def_cost: 13.901731, ortho_cost: 3.121596, context_ortho_cost: 3.345502, proj_cost: 0.505931 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 09:23.18PM, iter: 005, glove_cost: 0.010860, def_cost: 14.483919, ortho_cost: 3.724617, context_ortho_cost: 3.957530, proj_cost: 0.520379 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 09:39.26PM, iter: 006, glove_cost: 0.010212, def_cost: 14.892465, ortho_cost: 4.238442, context_ortho_cost: 4.449541, proj_cost: 0.623507 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 09:55.19PM, iter: 007, glove_cost: 0.009848, def_cost: 15.223503, ortho_cost: 4.695566, context_ortho_cost: 4.938107, proj_cost: 0.651656 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 10:11.20PM, iter: 008, glove_cost: 0.009508, def_cost: 15.565508, ortho_cost: 5.100838, context_ortho_cost: 5.423738, proj_cost: 0.593890 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 10:27.25PM, iter: 009, glove_cost: 0.009261, def_cost: 15.791383, ortho_cost: 5.489141, context_ortho_cost: 5.869295, proj_cost: 0.594095 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 10:43.19PM, iter: 010, glove_cost: 0.009130, def_cost: 16.063648, ortho_cost: 5.828555, context_ortho_cost: 6.357533, proj_cost: 0.594019 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 10:59.19PM, iter: 011, glove_cost: 0.008912, def_cost: 16.228962, ortho_cost: 6.146638, context_ortho_cost: 6.690904, proj_cost: 0.651612 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 11:15.12PM, iter: 012, glove_cost: 0.008845, def_cost: 16.402334, ortho_cost: 6.447113, context_ortho_cost: 7.140005, proj_cost: 0.626761 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 11:31.00PM, iter: 013, glove_cost: 0.008718, def_cost: 16.598837, ortho_cost: 6.742481, context_ortho_cost: 7.748021, proj_cost: 0.682935 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/30/23 - 11:46.57PM, iter: 014, glove_cost: 0.008679, def_cost: 16.752654, ortho_cost: 7.085257, context_ortho_cost: 8.220202, proj_cost: 0.757641 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 12:02.58AM, iter: 015, glove_cost: 0.008705, def_cost: 16.902698, ortho_cost: 7.396091, context_ortho_cost: 8.626924, proj_cost: 0.614267 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 12:19.02AM, iter: 016, glove_cost: 0.008567, def_cost: 17.033904, ortho_cost: 7.628203, context_ortho_cost: 8.951228, proj_cost: 0.617746 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 12:34.49AM, iter: 017, glove_cost: 0.008458, def_cost: 17.098335, ortho_cost: 7.767718, context_ortho_cost: 9.183129, proj_cost: 0.611214 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 12:50.41AM, iter: 018, glove_cost: 0.008375, def_cost: 17.188865, ortho_cost: 7.928848, context_ortho_cost: 9.351497, proj_cost: 0.590445 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 01:06.36AM, iter: 019, glove_cost: 0.008308, def_cost: 17.279277, ortho_cost: 8.107746, context_ortho_cost: 9.475195, proj_cost: 0.570287 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 01:22.26AM, iter: 020, glove_cost: 0.008299, def_cost: 17.295448, ortho_cost: 8.257997, context_ortho_cost: 9.610889, proj_cost: 0.693953 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 01:38.22AM, iter: 021, glove_cost: 0.008302, def_cost: 17.404245, ortho_cost: 8.453667, context_ortho_cost: 9.846960, proj_cost: 0.633765 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 01:54.15AM, iter: 022, glove_cost: 0.008202, def_cost: 17.411190, ortho_cost: 8.538109, context_ortho_cost: 10.001082, proj_cost: 0.640205 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 02:10.08AM, iter: 023, glove_cost: 0.008162, def_cost: 17.496799, ortho_cost: 8.648274, context_ortho_cost: 10.147272, proj_cost: 0.580383 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 02:26.06AM, iter: 024, glove_cost: 0.008103, def_cost: 17.511213, ortho_cost: 8.709227, context_ortho_cost: 10.149979, proj_cost: 0.561422 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 02:41.42AM, iter: 025, glove_cost: 0.008057, def_cost: 17.526840, ortho_cost: 8.756351, context_ortho_cost: 10.145406, proj_cost: 0.547741 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 02:57.43AM, iter: 026, glove_cost: 0.008031, def_cost: 17.519970, ortho_cost: 8.786677, context_ortho_cost: 10.168812, proj_cost: 0.533350 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 03:13.32AM, iter: 027, glove_cost: 0.008001, def_cost: 17.547757, ortho_cost: 8.812504, context_ortho_cost: 10.228374, proj_cost: 0.509270 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 03:29.32AM, iter: 028, glove_cost: 0.007946, def_cost: 17.564121, ortho_cost: 8.852304, context_ortho_cost: 10.174522, proj_cost: 0.536259 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 03:45.18AM, iter: 029, glove_cost: 0.007922, def_cost: 17.527114, ortho_cost: 8.871338, context_ortho_cost: 10.101682, proj_cost: 0.541773 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 04:01.06AM, iter: 030, glove_cost: 0.007906, def_cost: 17.587799, ortho_cost: 8.944607, context_ortho_cost: 10.115477, proj_cost: 0.522215 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 04:17.07AM, iter: 031, glove_cost: 0.007871, def_cost: 17.569988, ortho_cost: 8.945342, context_ortho_cost: 10.123500, proj_cost: 0.500709 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 04:33.04AM, iter: 032, glove_cost: 0.007852, def_cost: 17.565745, ortho_cost: 8.931514, context_ortho_cost: 10.165391, proj_cost: 0.519233 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 04:49.02AM, iter: 033, glove_cost: 0.007845, def_cost: 17.581133, ortho_cost: 8.934835, context_ortho_cost: 10.204487, proj_cost: 0.489462 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 05:04.48AM, iter: 034, glove_cost: 0.007819, def_cost: 17.575296, ortho_cost: 8.940916, context_ortho_cost: 10.179373, proj_cost: 0.487809 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 05:20.35AM, iter: 035, glove_cost: 0.007791, def_cost: 17.575419, ortho_cost: 8.944391, context_ortho_cost: 10.140888, proj_cost: 0.465001 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 05:36.36AM, iter: 036, glove_cost: 0.007761, def_cost: 17.566569, ortho_cost: 8.925891, context_ortho_cost: 10.090097, proj_cost: 0.450684 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 05:52.28AM, iter: 037, glove_cost: 0.007751, def_cost: 17.539588, ortho_cost: 8.938650, context_ortho_cost: 10.065664, proj_cost: 0.544252 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 06:08.14AM, iter: 038, glove_cost: 0.007766, def_cost: 17.547917, ortho_cost: 8.942601, context_ortho_cost: 10.064383, proj_cost: 0.487221 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 06:24.12AM, iter: 039, glove_cost: 0.007728, def_cost: 17.522602, ortho_cost: 8.928248, context_ortho_cost: 10.044246, proj_cost: 0.457408 + + +30 girls words (id): 28, 211, 1254, 1486, 1701, 40555, 42123, 43576, 70007, 74518, 78180, 85133, 89839, 102195, 132123, 140077, 144221, 161618, 163598, 167993, 172417, 209682, 236714, 278774, 301693, 312064, 359555, 366392, 383944, 393788, +30 boys words (id): 10, 88, 573, 23959, 26589, 30318, 44249, 47851, 50827, 59433, 63545, 78210, 80647, 99256, 107658, 123240, 149138, 154585, 170308, 176755, 212716, 213504, 225577, 242790, 252320, 262857, 282380, 290371, 304509, 341767, +07/31/23 - 06:40.26AM, iter: 040, glove_cost: 0.007721, def_cost: 17.507245, ortho_cost: 8.903341, context_ortho_cost: 10.098027, proj_cost: 0.502304 + + diff --git a/DD-GloVe/seed_word_tests/deutscher_seedwords.txt b/DD-GloVe/seed_word_tests/deutscher_seedwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..928631cdd05d58317cd34bcd2f7380e91e82fd30 --- /dev/null +++ b/DD-GloVe/seed_word_tests/deutscher_seedwords.txt @@ -0,0 +1,133 @@ +DEUTSCHTUM +Def.: Gesamtheit der für die Deutschen typischen Lebensäußerungen; deutsche Wesensart +Def.: Zugehörigkeit zum deutschen Volk +Def.: Gesamtheit der deutschen Volksgruppen im Ausland +Definitional bias: 0.0068 + +DEUTSCHSTÄMMIG +Def.: von deutschen Vorfahren abstammend +Definitional bias: 0.0036 + +DEUTSCH-FRANZÖSISCH +Def.: zwischen Deutschland und Frankreich, aus Deutschen und Franzosen bestehend +Definitional bias: -0.0152 + +ACHTUNDVIERZIGER +Def.: Person, die an der deutschen Revolution von 1848 teilgenommen oder mit ihr sympathisiert hat +Definitional bias: -0.0195 + +WESTMARK +Def.: Deutsche Mark (im Unterschied zur Mark der Deutschen Demokratischen Republik) +Definitional bias: -0.0291 + +DEUTSCHKUNDE +Def.: Lehre von der deutschen Sprache und Kultur +Definitional bias: -0.0292 + +SCHWABENSPIEGEL +Def.: Rechtssammlung des deutschen Mittelalters +Definitional bias: -0.0306 + +DEUTSCHLANDLIED +Def.: Nationalhymne des Deutschen Reiches (seit 1922), deren dritte Strophe die offizielle Hymne der Bundesrepublik Deutschland ist +Definitional bias: -0.0339 + +NOVEMBERREVOLUTION +Def.: Revolution im Deutschen Reich und in Österreich im November 1918 +Definitional bias: -0.0383 + +LANDESVERSICHERUNGSANSTALT +Def.: (in der Bundesrepublik Deutschland bis 2005) öffentlich-rechtliche Versicherungsgesellschaft, die durch die Deutsche Rentenversicherung abgelöst wurde +Definitional bias: -0.0404 + +DEUTSCHSPRACHIG +Def.: die deutsche Sprache sprechend +Def.: in deutscher Sprache ablaufend, verfasst +Definitional bias: -0.0418 + +HITLERDEUTSCHLAND +Def.: das Deutschland der Hitlerzeit +Definitional bias: -0.0439 + +DEUTSCHLANDWEIT +Def.: ganz Deutschland umfassend, einschließend; in ganz Deutschland +Definitional bias: -0.0468 + +NACHKRIEGSDEUTSCHLAND +Def.: Deutschland nach dem Zweiten Weltkrieg +Definitional bias: -0.0470 + +BIEDERMEIER +Def.: deutsche Kunst- und Kulturepoche (etwa 1815 bis 1848) +Def.: Biedermeierstil +Definitional bias: -0.0489 + +OSTPREUSSEN +Def.: ehemalige Provinz des Deutschen Reiches +Definitional bias: -0.0500 + +GESAMTDEUTSCHLAND +Def.: Deutschland mit allen seinen Fürstentümern, Ländern +Def.: Deutschland mit seinen beiden Staaten nach dem 2. Weltkrieg +Def.: ganz Deutschland +Definitional bias: -0.0505 + +DEUTSCH +Def.: die Deutschen, Deutschland betreffend +Def.: in der Sprache der Bevölkerung besonders Deutschlands, Österreichs und in Teilen der Schweiz +Def.: in deutscher Schreibschrift [verfasst] +Def.: die deutsche Sprache [eines Einzelnen oder einer Gruppe]; die näher gekennzeichnete deutsche Sprache +Def.: Unterrichtsfach, in dem deutsche Sprache und Literatur gelehrt wird +Definitional bias: -0.0556 + +BUNDESPOST +Def.: Deutsche Bundespost (früheres staatliches Postunternehmen der Bundesrepublik Deutschland) +Definitional bias: -0.0613 + +DEUTSCHER +Def.: Angehöriger des deutschen Volkes, aus Deutschland stammende Person +Def.: das deutsche Volk +Definitional bias: -0.0630 + +BUNDESBANK +Def.: Kurzform von Deutsche Bundesbank (zentrale Notenbank der Bundesrepublik Deutschland) +Definitional bias: -0.0632 + +GERMANIA +Def.: Frauengestalt, die das ehemalige Deutsche Reich symbolisiert +Definitional bias: -0.0720 + +DEUTSCHLEHRER +Def.: Lehrer, der deutsche Sprache und Literatur unterrichtet +Definitional bias: -0.0742 + +DEUTSCHE +Def.: Angehörige des deutschen Volkes, aus Deutschland stammende weibliche Person +Def.: die deutsche Sprache im Allgemeinen +Definitional bias: -0.0786 + +KURRENTSCHRIFT +Def.: (früher benutzte) deutsche Schreibschrift +Definitional bias: -0.0890 + +HOCHDEUTSCHE +Def.: Deutsche +Definitional bias: -0.0956 + +HOLLÄNDISCHE +Def.: vgl. Deutsche +Definitional bias: -0.0966 + +SCHWEIZERDEUTSCHE +Def.: vgl. Deutsche +Definitional bias: -0.0966 + +ROTWELSCH +Def.: in der Gaunersprache Rotwelsch, zu ihr gehörend +Def.: deutsche Gaunersprache +Definitional bias: -0.1080 + +LUFTHANSA +Def.: Kurzform von: Deutsche Lufthansa AG (eine deutsche Luftverkehrsgesellschaft) +Definitional bias: -0.1316 + diff --git a/DD-GloVe/seed_word_tests/deutscher_seedwords_1iter.txt b/DD-GloVe/seed_word_tests/deutscher_seedwords_1iter.txt new file mode 100644 index 0000000000000000000000000000000000000000..673ebdd4f5f013fe6ea316415d79ee8f3e79314f --- /dev/null +++ b/DD-GloVe/seed_word_tests/deutscher_seedwords_1iter.txt @@ -0,0 +1,49 @@ +DEUTSCHNATIONAL +Def.: zu einer liberalen Bewegung in Österreich gehörend, die die Anlehnung des deutschen Österreichs an Deutschland fordert +Def.: zu einer monarchistischen und betont nationalistischen Partei der Weimarer Republik gehörend, sie betreffend +Def.: eine extrem nationalistische, [angebliche] deutsche Interessen in den Vordergrund stellende, politische Einstellung betreffend, aufweisend +Definitional bias: 0.0767 + +GROSSDEUTSCHLAND +Def.: (in der expansionistischen Vorstellung der Nationalsozialisten) durch den Zusammenschluss aller geschlossen siedelnden Deutschen zu schaffendes Deutschland +Def.: Deutschland nach dem sogenannten Anschluss Österreichs (im Jahre 1938) +Def.: Deutschland als (besonders seit der Wiedervereinigung bedrohlich empfundene) wirtschaftliche Großmacht +Definitional bias: 0.0370 + +DEUTSCHLANDTOUR +Def.: durch Deutschland führende Tournee oder Konzerttour +Def.: Deutschland-Rundfahrt +Definitional bias: 0.0180 + +DEUTSCHTUM +Def.: Gesamtheit der für die Deutschen typischen Lebensäußerungen; deutsche Wesensart +Def.: Zugehörigkeit zum deutschen Volk +Def.: Gesamtheit der deutschen Volksgruppen im Ausland +Definitional bias: 0.0151 + +DEUTSCH-FRANZÖSISCH +Def.: zwischen Deutschland und Frankreich, aus Deutschen und Franzosen bestehend +Definitional bias: -0.0022 + +HITLERDEUTSCHLAND +Def.: das Deutschland der Hitlerzeit +Definitional bias: -0.0299 + +DEUTSCHE +Def.: Angehörige des deutschen Volkes, aus Deutschland stammende weibliche Person +Def.: die deutsche Sprache im Allgemeinen +Definitional bias: -0.0518 + +DEUTSCHROCK +Def.: aus Deutschland stammende Rockmusik +Definitional bias: -0.0525 + +DEUTSCHER +Def.: Angehöriger des deutschen Volkes, aus Deutschland stammende Person +Def.: das deutsche Volk +Definitional bias: -0.0631 + +SUDETENDEUTSCHE +Def.: aus dem Sudetenland stammende ethnische Deutsche +Definitional bias: -0.0871 + diff --git a/DD-GloVe/seed_word_tests/deutscher_seedwords_gloss5.txt b/DD-GloVe/seed_word_tests/deutscher_seedwords_gloss5.txt new file mode 100644 index 0000000000000000000000000000000000000000..976b932ce61bfa5cc10f90bb7d769e3a561fca47 --- /dev/null +++ b/DD-GloVe/seed_word_tests/deutscher_seedwords_gloss5.txt @@ -0,0 +1,45 @@ +NATIONALSOZIALISMUS +Def.: nach dem Ersten Weltkrieg in Deutschland aufgekommene, extrem nationalistische, imperialistische und rassistische politische Bewegung +Def.: auf der Ideologie des Nationalsozialismus basierende faschistische Herrschaft von A. Hitler in Deutschland von 1933 bis 1945 +Definitional bias: 0.0585 + +VOLKSHERRSCHAFT +Def.: Herrschaft durch das Volk +Definitional bias: 0.0543 + +LANDESVERSICHERUNGSANSTALT +Def.: (in der Bundesrepublik Deutschland bis 2005) öffentlich-rechtliche Versicherungsgesellschaft, die durch die Deutsche Rentenversicherung abgelöst wurde +Definitional bias: 0.0434 + +TAMILE +Def.: Angehöriger eines vorderindischen Volkes +Definitional bias: 0.0418 + +DEUTSCHTUM +Def.: Gesamtheit der für die Deutschen typischen Lebensäußerungen; deutsche Wesensart +Def.: Zugehörigkeit zum deutschen Volk +Def.: Gesamtheit der deutschen Volksgruppen im Ausland +Definitional bias: 0.0228 + +DEUTSCH-FRANZÖSISCH +Def.: zwischen Deutschland und Frankreich, aus Deutschen und Franzosen bestehend +Definitional bias: 0.0053 + +DEUTSCHE +Def.: Angehörige des deutschen Volkes, aus Deutschland stammende weibliche Person +Def.: die deutsche Sprache im Allgemeinen +Definitional bias: -0.0390 + +DEUTSCHER +Def.: Angehöriger des deutschen Volkes, aus Deutschland stammende Person +Def.: das deutsche Volk +Definitional bias: -0.0423 + +BUNDESBANK +Def.: Kurzform von Deutsche Bundesbank (zentrale Notenbank der Bundesrepublik Deutschland) +Definitional bias: -0.0458 + +HOCHDEUTSCHE +Def.: Deutsche +Definitional bias: -0.0784 + diff --git a/DD-GloVe/seed_word_tests/deutscher_seedwords_top10.txt b/DD-GloVe/seed_word_tests/deutscher_seedwords_top10.txt new file mode 100644 index 0000000000000000000000000000000000000000..d862bd17435b08dc16617804e0c41320fb324f49 --- /dev/null +++ b/DD-GloVe/seed_word_tests/deutscher_seedwords_top10.txt @@ -0,0 +1,43 @@ +BIEDERMEIER +Def.: deutsche Kunst- und Kulturepoche (etwa 1815 bis 1848) +Def.: Biedermeierstil +Definitional bias: -0.0082 + +HITLERDEUTSCHLAND +Def.: das Deutschland der Hitlerzeit +Definitional bias: -0.0157 + +BUNDESBANK +Def.: Kurzform von Deutsche Bundesbank (zentrale Notenbank der Bundesrepublik Deutschland) +Definitional bias: -0.0259 + +DEUTSCHER +Def.: Angehöriger des deutschen Volkes, aus Deutschland stammende Person +Def.: das deutsche Volk +Definitional bias: -0.0290 + +GERMANIA +Def.: Frauengestalt, die das ehemalige Deutsche Reich symbolisiert +Definitional bias: -0.0309 + +HOCHDEUTSCHE +Def.: Deutsche +Definitional bias: -0.0426 + +HOLLÄNDISCHE +Def.: vgl. Deutsche +Definitional bias: -0.0465 + +SCHWEIZERDEUTSCHE +Def.: vgl. Deutsche +Definitional bias: -0.0465 + +ROTWELSCH +Def.: in der Gaunersprache Rotwelsch, zu ihr gehörend +Def.: deutsche Gaunersprache +Definitional bias: -0.0885 + +LUFTHANSA +Def.: Kurzform von: Deutsche Lufthansa AG (eine deutsche Luftverkehrsgesellschaft) +Definitional bias: -0.0925 + diff --git "a/DD-GloVe/seed_word_tests/deutscher_seedwords_top10_vs_t\303\274rke.txt" "b/DD-GloVe/seed_word_tests/deutscher_seedwords_top10_vs_t\303\274rke.txt" new file mode 100644 index 0000000000000000000000000000000000000000..392908d50e1630cb6a2967898771f7870e3d863c --- /dev/null +++ "b/DD-GloVe/seed_word_tests/deutscher_seedwords_top10_vs_t\303\274rke.txt" @@ -0,0 +1,43 @@ +DEUTSCH-FRANZÖSISCH +Def.: zwischen Deutschland und Frankreich, aus Deutschen und Franzosen bestehend +Definitional bias: 0.0120 + +BUNDESPRÄSIDENT +Def.: Staatsoberhaupt in der Bundesrepublik Deutschland und in Österreich +Def.: Vorsitzender des Bundesrates in der Schweiz +Definitional bias: 0.0036 + +ACHTUNDVIERZIGER +Def.: Person, die an der deutschen Revolution von 1848 teilgenommen oder mit ihr sympathisiert hat +Definitional bias: -0.0052 + +NOVEMBERREVOLUTION +Def.: Revolution im Deutschen Reich und in Österreich im November 1918 +Definitional bias: -0.0114 + +HITLERDEUTSCHLAND +Def.: das Deutschland der Hitlerzeit +Definitional bias: -0.0314 + +DEUTSCHER +Def.: Angehöriger des deutschen Volkes, aus Deutschland stammende Person +Def.: das deutsche Volk +Definitional bias: -0.0497 + +DEUTSCHE +Def.: Angehörige des deutschen Volkes, aus Deutschland stammende weibliche Person +Def.: die deutsche Sprache im Allgemeinen +Definitional bias: -0.0514 + +HOCHDEUTSCHE +Def.: Deutsche +Definitional bias: -0.0586 + +WITWER +Def.: Mann, dessen Ehefrau gestorben ist +Definitional bias: -0.0702 + +AUSLANDSDEUTSCHE +Def.: im Ausland lebende deutsche weibliche Person +Definitional bias: -0.0845 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords.txt b/DD-GloVe/seed_word_tests/frau_seedwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..9d09a8cddf1a460ed17dacf3b40b85149ce9136a --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords.txt @@ -0,0 +1,145 @@ +FRAU +Def.: erwachsene Person weiblichen Geschlechts +Def.: Ehefrau +Def.: Hausherrin, Dame +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person weiblichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Definitional bias: -0.1405 + +MILCHDRÜSE +Def.: Milch absondernde Drüse bei der Frau und den weiblichen Säugetieren +Definitional bias: -0.1070 + +SIE +Def.: Person oder Tier weiblichen Geschlechts +Definitional bias: -0.0965 + +ZITZE +Def.: Milch bildendes, paarig angeordnetes Organ bei weiblichen Säugetieren +Def.: [weibliche] Brust[warze] +Definitional bias: -0.0963 + +DOMINIKANERIN +Def.: Angehörige des weiblichen Zweiges des Dominikanerordens +Def.: weibliche Form zu Dominikaner +Definitional bias: -0.0841 + +BRUSTKREBS +Def.: Krebs besonders der weiblichen Brustdrüsen +Definitional bias: -0.0817 + +MÄDCHEN +Def.: Kind weiblichen Geschlechts +Def.: junge, jüngere weibliche Person +Def.: Freundin (eines jungen Mannes) +Def.: Hausmädchen, Hausangestellte, Hausgehilfin +Definitional bias: -0.0765 + +KARMELITIN +Def.: Angehörige des weiblichen Zweiges des Karmelitenordens +Definitional bias: -0.0679 + +ZISTERZIENSERIN +Def.: Angehörige des weiblichen Zweiges der Zisterzienser +Definitional bias: -0.0659 + +EIERSTOCK +Def.: paarig angelegtes Geschlechtsorgan, das die weiblichen Keimzellen bildet +Definitional bias: -0.0655 + +GEBÄRMUTTER +Def.: (beim Menschen und Säugetier) Hohlorgan des weiblichen Körpers, in dem sich das befruchtete Ei fortentwickelt; Uterus +Definitional bias: -0.0625 + +SCHEIDE +Def.: schmale, längliche, der Form der jeweiligen Klinge angepasste Hülse aus festem Material, in die eine Hieb- oder Stichwaffe bis zum Knauf hineingesteckt wird +Def.: von der Gebärmutter nach außen führender, mit Schleimhaut ausgekleideter, schlauchartiger Teil der weiblichen Geschlechtsorgane; Vagina +Def.: Grenze +Definitional bias: -0.0510 + +STERBLICHE +Def.: sterblicher Mensch weiblichen Geschlechts +Definitional bias: -0.0499 + +BRUSTVERGRÖSSERUNG +Def.: Vergrößerung der weiblichen Brust +Definitional bias: -0.0486 + +FRAUSCHAFT +Def.: aus weiblichen Mitgliedern bestehendes Team +Definitional bias: -0.0485 + +SCHWESTER +Def.: Person weiblichen Geschlechts im Verwandtschaftsverhältnis zu einer anderen Person, die von denselben Eltern abstammt +Def.: Mitmensch weiblichen Geschlechts, mit dem man sich verbunden fühlt +Def.: Nonne, Ordensschwester +Def.: Homosexueller +Definitional bias: -0.0426 + +MUTTERMILCH +Def.: nach der Geburt eines Kindes in den Drüsen der weiblichen Brust gebildete Milch +Definitional bias: -0.0418 + +GEBURTSKANAL +Def.: Kanal im weiblichen Körper, den das Kind bei der Geburt durchwandert +Definitional bias: -0.0364 + +INNEWOHNEN +Def.: als Eigentümlichkeit, Besonderheit, als etwas Charakteristisches in etwas mit enthalten sein, zu jemandem, etwas gehören +Definitional bias: -0.0352 + +EUTER +Def.: in der Leistengegend bei bestimmten weiblichen Säugetieren (z. B. Kühen, Ziegen, Schafen, Kamelen) sack- oder beutelartig herabhängendes Organ mit zwei oder mehr Zitzen, in dem sich die Milchdrüsen befinden +Definitional bias: -0.0332 + +WOCHENBETT +Def.: Zeitraum von 6 bis 8 Wochen nach der Entbindung, in dem es zur Rückbildung der durch Schwangerschaft und Geburt am weiblichen Körper hervorgerufenen Veränderungen kommt +Definitional bias: -0.0226 + +VULVA +Def.: Gesamtheit der äußeren weiblichen Geschlechtsorgane +Definitional bias: -0.0210 + +PEEPSHOW +Def.: auf sexuelle Stimulation zielendes Sich-zur-Schau-Stellen einer nackten, besonders einer weiblichen Person, die gegen Geldeinwurf durch das Guckfenster einer Kabine betrachtet werden kann +Def.: Einrichtung für Peepshows +Definitional bias: -0.0184 + +RAUMAUFTEILUNG +Def.: Aufteilung des gegebenen Raumes in Gebäuden +Def.: Nutzen des zur Verfügung stehenden Raumes auf dem Spielfeld durch die angreifende oder verteidigende Mannschaft +Definitional bias: -0.0167 + +BÜSTENHALTER +Def.: Teil der Unterkleidung, der der weiblichen Brust Form und Halt geben soll +Definitional bias: -0.0092 + +TRÄCHTIGKEIT +Def.: das Trächtigsein; Zustand eines weiblichen Säugetiers von der Befruchtung bis zur Geburt des oder der Jungen +Def.: das Trächtigsein +Definitional bias: -0.0024 + +FREIEN +Def.: heiraten, mit jemandem eine Ehe schließen +Def.: [für einen andern] einer weiblichen Person einen Heiratsantrag machen, um sie werben, um ihre Hand bitten +Definitional bias: 0.0254 + +BRENNEND +Def.: sehr wichtig, akut +Def.: sehr +Definitional bias: 0.0262 + +HITZIG +Def.: von leicht erregbarem Temperament und dabei heftig, jähzornig in seinen Reaktionen +Def.: [in ungezügelter Weise] leidenschaftlich +Def.: erregt, mit Leidenschaft [geführt] +Def.: heiß, fiebrig +Def.: (von weiblichen Hunden und Katzen) läufig, brünstig +Def.: (vom Boden) gut durchlüftet und dadurch Humus und Nährstoffe rasch abbauend +Definitional bias: 0.0452 + +BEFRUCHTEN +Def.: (bei der weiblichen Geschlechtszelle) die Befruchtung vollziehen, herbeiführen +Def.: bei jemandem, etwas geistig anregend wirken, jemandem, einer Sache wertvolle, wesentliche Anregungen geben +Definitional bias: 0.0723 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_cap30000.txt b/DD-GloVe/seed_word_tests/frau_seedwords_cap30000.txt new file mode 100644 index 0000000000000000000000000000000000000000..eaf24a20ad8779c779b783aa02ac1b9bfb1f9892 --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_cap30000.txt @@ -0,0 +1,149 @@ +FRAU +Def.: erwachsene Person weiblichen Geschlechts +Def.: Ehefrau +Def.: Hausherrin, Dame +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person weiblichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Definitional bias: -0.1571 + +SIE +Def.: Person oder Tier weiblichen Geschlechts +Definitional bias: -0.1015 + +MÄDCHEN +Def.: Kind weiblichen Geschlechts +Def.: junge, jüngere weibliche Person +Def.: Freundin (eines jungen Mannes) +Def.: Hausmädchen, Hausangestellte, Hausgehilfin +Definitional bias: -0.0881 + +OBERLAUSITZ +Def.: Gebiet um Bautzen und Görlitz +Definitional bias: -0.0700 + +BRUST +Def.: Brustschwimmen +Def.: vordere Seite des Rumpfes bei Mensch und Wirbeltieren +Def.: die im Brustkorb gelegenen Atmungsorgane +Def.: paariges, halbkugelförmiges Organ (an der Vorderseite des weiblichen Oberkörpers), das die Milchdrüsen enthält und das in der Stillzeit Milch bildet +Def.: Bruststück eines Schlachttiers +Definitional bias: -0.0692 + +SCHWESTER +Def.: Person weiblichen Geschlechts im Verwandtschaftsverhältnis zu einer anderen Person, die von denselben Eltern abstammt +Def.: Mitmensch weiblichen Geschlechts, mit dem man sich verbunden fühlt +Def.: Nonne, Ordensschwester +Def.: Homosexueller +Definitional bias: -0.0478 + +JURIST +Def.: jemand, der Rechtswissenschaften studiert hat bzw. auf diesem Gebiet arbeitet +Definitional bias: -0.0433 + +ENTDECKER +Def.: jemand, der etwas entdeckt hat +Definitional bias: -0.0367 + +GESCHICHTSFORSCHUNG +Def.: wissenschaftliche Erforschung der Geschichte +Definitional bias: -0.0351 + +QUARZ +Def.: in verschiedenen Arten vorkommendes, in reinem Zustand farbloses, hartes und sprödes kristallines Mineral +Def.: Quarzkristall, besonders als elektronisches Bauelement +Definitional bias: -0.0347 + +BAUGESCHICHTE +Def.: Geschichte der Entstehung eines Bauwerks +Definitional bias: -0.0322 + +GESCHICHTSWISSENSCHAFT +Def.: Wissenschaft von der Geschichte und ihrer Erforschung; Historie +Definitional bias: -0.0298 + +FREIBERUFLICH +Def.: in einem freien Beruf [tätig] +Definitional bias: -0.0276 + +THEOLOGE +Def.: jemand, der Theologie studiert, studiert hat und auf diesem Gebiet beruflich, wissenschaftlich tätig ist +Definitional bias: -0.0218 + +EISENBAHNNETZ +Def.: Netz der Eisenbahnlinien in einem Gebiet +Definitional bias: -0.0097 + +STADTPARK +Def.: öffentlicher Park in einer Stadt +Definitional bias: 0.0023 + +SCHACH +Def.: Brettspiel für zwei Personen, die mit je sechzehn schwarzen bzw. weißen Schachfiguren (von unterschiedlichem Wert und mit unterschiedlicher Funktion) abwechselnd ziehen mit dem Ziel, den gegnerischen König mattzusetzen +Def.: Stellung im Schach, bei der der König unmittelbar geschlagen werden könnte +Def.: Schachspiel +Def.: Partie Schach +Definitional bias: 0.0091 + +ABWASSER +Def.: durch häuslichen, gewerblichen oder industriellen Gebrauch verunreinigtes abfließendes Wasser +Definitional bias: 0.0108 + +ENDRUNDE +Def.: letzte, über den Gesamtsieg entscheidende Runde eines aus mehreren Runden bestehenden Wettbewerbs +Definitional bias: 0.0111 + +FERIEN +Def.: mehrere zusammenhängende Tage oder Wochen dauernde, der Erholung dienende, turnusmäßig wiederkehrende Arbeitspause einer Institution (z. B. der Schule, der Hochschule, des Gerichts oder des Parlaments) +Def.: Urlaub +Definitional bias: 0.0126 + +FREIEN +Def.: heiraten, mit jemandem eine Ehe schließen +Def.: [für einen andern] einer weiblichen Person einen Heiratsantrag machen, um sie werben, um ihre Hand bitten +Definitional bias: 0.0196 + +REICHSGERICHT +Def.: höchstes Gericht des Deutschen Reiches für Angelegenheiten des Zivil- und Strafrechts +Definitional bias: 0.0208 + +SYNTHESIZER +Def.: elektronisches Musikinstrument, das aus einer Kombination aufeinander abgestimmter elektronischer Bauelemente (zur Erzeugung von Klängen und Geräuschen) besteht +Definitional bias: 0.0372 + +FREIMAURER +Def.: Mitglied eines weltweit verbreiteten, in Logen gegliederten Männerbundes mit ethischen und kosmopolitischen Zielen und einem mystischen Ritual +Definitional bias: 0.0396 + +BAUSTIL +Def.: Stil eines Bauwerks +Definitional bias: 0.0481 + +BEDEUTEND +Def.: besonderes Gewicht, besondere Tragweite habend; wichtig +Def.: großes Ansehen genießend; berühmt, namhaft, sehr bekannt +Def.: eine hohe Qualität und daher einen großen Wert aufweisend; hervorragend, wertvoll +Def.: eine beachtliche Größe, Höhe aufweisend; von besonderem Ausmaß; groß, beachtlich +Def.: um vieles, sehr +Definitional bias: 0.0560 + +BEMERKENSWERT +Def.: beachtlich, bedeutend, ziemlich groß +Def.: Aufmerksamkeit, Beachtung verdienend +Def.: sehr, ungewöhnlich +Definitional bias: 0.0658 + +BOMBEN +Def.: bombardieren +Def.: Bomben legen +Def.: mit großer Wucht [aufs Tor] schießen +Definitional bias: 0.0699 + +HEILIGTUM +Def.: heilige Stätte zur Verehrung [eines] Gottes +Def.: heiliger, der Verehrung würdiger Gegenstand +Definitional bias: 0.0764 + +ENGAGIERT +Def.: entschieden für etwas eintretend, ein starkes persönliches Interesse an etwas habend +Definitional bias: 0.0920 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_cap30000_top10.txt b/DD-GloVe/seed_word_tests/frau_seedwords_cap30000_top10.txt new file mode 100644 index 0000000000000000000000000000000000000000..d1481b53b900e423964e198867e691482f602d76 --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_cap30000_top10.txt @@ -0,0 +1,51 @@ +FRAU +Def.: erwachsene Person weiblichen Geschlechts +Def.: Ehefrau +Def.: Hausherrin, Dame +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person weiblichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Definitional bias: -0.0910 + +OBERLAUSITZ +Def.: Gebiet um Bautzen und Görlitz +Definitional bias: -0.0908 + +SIE +Def.: Person oder Tier weiblichen Geschlechts +Definitional bias: -0.0601 + +BAUGESCHICHTE +Def.: Geschichte der Entstehung eines Bauwerks +Definitional bias: -0.0086 + +MÄDCHEN +Def.: Kind weiblichen Geschlechts +Def.: junge, jüngere weibliche Person +Def.: Freundin (eines jungen Mannes) +Def.: Hausmädchen, Hausangestellte, Hausgehilfin +Definitional bias: -0.0014 + +SCHWESTER +Def.: Person weiblichen Geschlechts im Verwandtschaftsverhältnis zu einer anderen Person, die von denselben Eltern abstammt +Def.: Mitmensch weiblichen Geschlechts, mit dem man sich verbunden fühlt +Def.: Nonne, Ordensschwester +Def.: Homosexueller +Definitional bias: -0.0005 + +THEOLOGE +Def.: jemand, der Theologie studiert, studiert hat und auf diesem Gebiet beruflich, wissenschaftlich tätig ist +Definitional bias: 0.0134 + +STADTPARK +Def.: öffentlicher Park in einer Stadt +Definitional bias: 0.0142 + +FREIEN +Def.: heiraten, mit jemandem eine Ehe schließen +Def.: [für einen andern] einer weiblichen Person einen Heiratsantrag machen, um sie werben, um ihre Hand bitten +Definitional bias: 0.0462 + +ENDRUNDE +Def.: letzte, über den Gesamtsieg entscheidende Runde eines aus mehreren Runden bestehenden Wettbewerbs +Definitional bias: 0.0734 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_custom_def.txt b/DD-GloVe/seed_word_tests/frau_seedwords_custom_def.txt new file mode 100644 index 0000000000000000000000000000000000000000..80a686913eb39f281c936bbdde38806613d6313f --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_custom_def.txt @@ -0,0 +1,138 @@ +FRAU +Def.: erwachsene Person weiblichen Geschlechts +Def.: Ehefrau +Def.: Hausherrin, Dame +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person weiblichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Definitional bias: -0.1054 + +ZINNOBER +Def.: [hell]rotes, schwarzes oder bleigraues, Quecksilber enthaltendes Mineral +Def.: leuchtend gelblich rote Farbe +Def.: leuchtend gelblich roter Farbton +Def.: wertloses Zeug +Def.: Unsinn, dummes Zeug +Definitional bias: -0.0977 + +ERPEL +Def.: männliche Ente; Enterich +Definitional bias: -0.0852 + +RUSSLANDDEUTSCHE +Def.: in Russland geborene [und dort lebende] ethnische Deutsche +Definitional bias: -0.0662 + +RUNDLICH +Def.: annähernd rund, mit einer Rundung versehen +Def.: ein wenig dick, füllig, mollig +Definitional bias: -0.0492 + +TAUBER +Def.: jemand, der taub ist +Def.: männliche Taube +Definitional bias: -0.0402 + +MÄDEL +Def.: Mädchen +Definitional bias: -0.0072 + +DEERN +Def.: Mädchen +Definitional bias: -0.0072 + +MADEL +Def.: Mädchen +Definitional bias: -0.0072 + +GATTIN +Def.: Ehefrau +Definitional bias: -0.0067 + +LOGGIA +Def.: nicht oder kaum vorspringender, nach der Außenseite hin offener, überdachter Raum im [Ober]geschoss eines Hauses +Def.: zu einer oder mehreren Seiten hin offene, von Säulen, Pfeilern getragene Halle als selbstständiger Bau oder als Teil des Erdgeschosses +Definitional bias: -0.0046 + +IKONOGRAFIE +Def.: Beschreibung, Form- und Inhaltsdeutung von [alten] Bildwerken +Def.: Ikonologie +Def.: wissenschaftliche Bestimmung von Bildnissen des griechischen und römischen Altertums +Definitional bias: 0.0034 + +GEMAHLIN +Def.: Ehefrau, Gattin +Definitional bias: 0.0036 + +HEXENMEISTER +Def.: (nach dem Volksglauben) männliche Person mit den dämonischen Fähigkeiten einer weiblichen Hexe; Zauberer +Definitional bias: 0.0152 + +KUCKUCK +Def.: besonders in Wäldern lebender Vogel mit braungrauem Gefieder, einem leicht gekrümmten Schnabel und langem Schwanz, der seine Eier zum Ausbrüten in Nester von Singvögeln legt +Def.: Siegel, das der Gerichtsvollzieher bei der Pfändung an Einrichtungsgegenstände klebt +Definitional bias: 0.0276 + +PERSONENKULT +Def.: starke Überbewertung, Überbetonung der Führungsrolle der Einzelpersönlichkeit in Politik, Gesellschaft, Geschichte +Definitional bias: 0.0334 + +BENUTZERFÜHRUNG +Def.: Hilfestellung durch [selbsterklärende oder intuitiv erschließbare] Elemente auf der Benutzeroberfläche, die eine Anwendung bedienbar machen +Definitional bias: 0.0446 + +MORALPHILOSOPH +Def.: Philosoph, der eine Moralphilosophie begründet, vertritt +Definitional bias: 0.0489 + +TESTOSTERON +Def.: männliches Keimdrüsenhormon +Definitional bias: 0.0579 + +SCHWÄGERIN +Def.: Ehefrau eines Bruders oder Schwester des Ehemanns, der Ehefrau +Definitional bias: 0.0589 + +PUCK +Def.: Kobold, schalkhafter Elf +Def.: Scheibe aus Hartgummi, die mit dem Schläger ins gegnerische Tor zu treiben ist +Definitional bias: 0.0709 + +GROSSTANTE +Def.: Schwester der Großmutter oder des Großvaters +Def.: Ehefrau des Großonkels +Definitional bias: 0.0730 + +BIRKHAHN +Def.: männliches Birkhuhn +Definitional bias: 0.0772 + +EINSPEISEVERGÜTUNG +Def.: Vergütung, die jemand für das Einspeisen von aus erneuerbaren Energien gewonnenem Strom in das öffentliche Stromnetz bekommt +Definitional bias: 0.0810 + +GEBETSRAUM +Def.: zur Verrichtung von Gebeten genutzter Raum +Definitional bias: 0.1090 + +TYPHUSERKRANKUNG +Def.: Erkrankung an Typhus +Definitional bias: 0.1230 + +ZARENTUM +Def.: monarchische Staatsform, bei der ein Zar Herrscher ist +Def.: das Zarsein +Definitional bias: 0.1326 + +EINSICHTIG +Def.: Einsicht habend; vernünftig, verständnisvoll +Def.: verständlich, [leicht] einzusehen +Definitional bias: 0.1614 + +AUSBILDUNGSVERGÜTUNG +Def.: Vergütung während der Ausbildungszeit +Definitional bias: 0.1647 + +HERVORRAGEND +Def.: durch Begabung, Können oder Qualität hervorstechend; sehr gut +Definitional bias: 0.1737 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_full_def.txt b/DD-GloVe/seed_word_tests/frau_seedwords_full_def.txt new file mode 100644 index 0000000000000000000000000000000000000000..f938e180833078119b0d0a9e194a922bca644760 --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_full_def.txt @@ -0,0 +1,156 @@ +FRAU +Def.: erwachsene Person weiblichen Geschlechts +Def.: Ehefrau +Def.: Hausherrin, Dame +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person weiblichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Definitional bias: -0.1028 + +ETIKETTENSCHWINDEL +Def.: irreführende Benennung [durch Verwendung einer bekannten Bezeichnung für eine minderwertige Sache] +Definitional bias: -0.0740 + +HERR +Def.: Mann (auch als übliche höfliche Bezeichnung für eine männliche Person im gesellschaftlichen Verkehr) +Def.: gebildeter, kultivierter, gepflegter Mann +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person männlichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Def.: jemand, der über andere oder über etwas herrscht; Gebieter; Besitzer +Def.: jemand, der jemanden, etwas unter Kontrolle hat, beherrscht +Def.: Gott +Definitional bias: -0.0630 + +YANKEE +Def.: US-Amerikaner +Definitional bias: -0.0569 + +SIE +Def.: Person oder Tier weiblichen Geschlechts +Definitional bias: -0.0548 + +FRAUSCHAFT +Def.: aus weiblichen Mitgliedern bestehendes Team +Definitional bias: -0.0257 + +WIRBELKASTEN +Def.: am Ende des Halses bestimmter Saiteninstrumente unterhalb der Schnecke befindliche Öffnung, durch die quer die Wirbel geführt sind +Definitional bias: -0.0138 + +SCHWESTER +Def.: Person weiblichen Geschlechts im Verwandtschaftsverhältnis zu einer anderen Person, die von denselben Eltern abstammt +Def.: Mitmensch weiblichen Geschlechts, mit dem man sich verbunden fühlt +Def.: Nonne, Ordensschwester +Def.: Homosexueller +Definitional bias: -0.0007 + +GATTIN +Def.: Ehefrau +Definitional bias: 0.0023 + +MÄDCHEN +Def.: Kind weiblichen Geschlechts +Def.: junge, jüngere weibliche Person +Def.: Freundin (eines jungen Mannes) +Def.: Hausmädchen, Hausangestellte, Hausgehilfin +Definitional bias: 0.0048 + +KARMELITIN +Def.: Angehörige des weiblichen Zweiges des Karmelitenordens +Definitional bias: 0.0099 + +FLUSSRICHTUNG +Def.: Richtung des Fließens +Definitional bias: 0.0100 + +OSTWESTRICHTUNG +Def.: ostwestliche Richtung +Definitional bias: 0.0100 + +FREIFRAU +Def.: Adelstitel der Ehefrau eines Freiherrn +Def.: Ehefrau eines Freiherrn +Definitional bias: 0.0123 + +FLUGBLATT +Def.: meist unentgeltlich verteiltes oder von Flugzeugen o. Ä. in größerer Menge abgeworfenes Blatt, das ein- oder zweiseitig bedruckt [und illustriert] ist und über ein aktuelles Ereignis informiert oder dazu Stellung nimmt +Definitional bias: 0.0183 + +STERBLICHE +Def.: sterblicher Mensch weiblichen Geschlechts +Definitional bias: 0.0231 + +PASSAT +Def.: in Richtung Äquator gleichmäßig wehender Ostwind in den Tropen +Definitional bias: 0.0247 + +HISTORIENMALEREI +Def.: Richtung der Malerei, die historische Ereignisse zum Bildgegenstand hat +Definitional bias: 0.0251 + +SCHUSSRICHTUNG +Def.: Richtung eines Schusses; Richtung, in die geschossen wird, werden soll +Definitional bias: 0.0283 + +GEBÄRMUTTER +Def.: (beim Menschen und Säugetier) Hohlorgan des weiblichen Körpers, in dem sich das befruchtete Ei fortentwickelt; Uterus +Definitional bias: 0.0316 + +GEGENRICHTUNG +Def.: entgegengesetzte Richtung +Definitional bias: 0.0520 + +BESCHREITEN +Def.: (einen Weg, eine Richtung) schreitend gehen, einschlagen +Definitional bias: 0.0532 + +ANDERSHERUM +Def.: in die andere, in die entgegengesetzte Richtung +Def.: in anderer, entgegengesetzter Richtung +Def.: von der anderen, entgegengesetzten Richtung +Def.: homosexuell +Definitional bias: 0.0563 + +NATURBELASSEN +Def.: in seiner natürlichen Substanz unverändert; ohne fremden Zusatz +Def.: in seinem natürlichen Zustand belassen +Definitional bias: 0.0584 + +FORTSCHREITEN +Def.: sich in derselben Richtung weiterentwickeln +Definitional bias: 0.0644 + +WENDEN +Def.: auf die andere Seite drehen, herumdrehen, umwenden +Def.: wälzen +Def.: in die entgegengesetzte Richtung bringen +Def.: drehen und die entgegengesetzte Richtung einschlagen; die Richtung um 180° ändern +Def.: in eine andere Richtung drehen +Def.: sich (zu etwas) anschicken +Def.: eine Frage, Bitte an jemanden richten +Def.: jemandem, einer Sache entgegentreten +Def.: (für jemanden, etwas) aufwenden, benötigen, verbrauchen +Definitional bias: 0.0800 + +SCHRÄGEN +Def.: in eine schräge Lage, Stellung bringen +Def.: abschrägen +Definitional bias: 0.0802 + +ABDREHEN +Def.: durch Drehen einer entsprechenden Vorrichtung ausschalten, abstellen +Def.: durch Drehen einer entsprechenden Vorrichtung die Zufuhr von etwas unterbinden +Def.: durch eine drehende Bewegung von etwas abtrennen, lösen +Def.: wegdrehen, abwenden +Def.: (einen Film, Filmszenen) [fertig] drehen +Def.: eine andere Richtung einschlagen, einen anderen Kurs nehmen +Def.: sich aufregen, die Beherrschung verlieren +Definitional bias: 0.1135 + +HAFTENTSCHÄDIGUNG +Def.: finanzielle Entschädigung, die jemandem für eine nicht gerechtfertigte Haft von staatlicher Seite zuteilwird +Definitional bias: 0.1436 + +ARBEITERAUFSTAND +Def.: Aufstand, Revolte von Arbeitern +Definitional bias: 0.1564 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_less_stopwords.txt b/DD-GloVe/seed_word_tests/frau_seedwords_less_stopwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..1dcbf31637c00ea56f05d778fb9a7e7c87c69b33 --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_less_stopwords.txt @@ -0,0 +1,48 @@ +FRAU +Def.: erwachsene Person weiblichen Geschlechts +Def.: Ehefrau +Def.: Hausherrin, Dame +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person weiblichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Definitional bias: -0.1028 + +BADESALZ +Def.: körniger, wohlriechender Zusatz für das Badewasser +Definitional bias: -0.0655 + +GATTIN +Def.: Ehefrau +Definitional bias: -0.0413 + +FREIFRAU +Def.: Adelstitel der Ehefrau eines Freiherrn +Def.: Ehefrau eines Freiherrn +Definitional bias: -0.0245 + +SCHWESTER +Def.: Person weiblichen Geschlechts im Verwandtschaftsverhältnis zu einer anderen Person, die von denselben Eltern abstammt +Def.: Mitmensch weiblichen Geschlechts, mit dem man sich verbunden fühlt +Def.: Nonne, Ordensschwester +Def.: Homosexueller +Definitional bias: -0.0243 + +FLUSSRICHTUNG +Def.: Richtung des Fließens +Definitional bias: 0.0312 + +OSTWESTRICHTUNG +Def.: ostwestliche Richtung +Definitional bias: 0.0312 + +STERBLICHE +Def.: sterblicher Mensch weiblichen Geschlechts +Definitional bias: 0.0389 + +GEGENRICHTUNG +Def.: entgegengesetzte Richtung +Definitional bias: 0.0699 + +FORTSCHREITEN +Def.: sich in derselben Richtung weiterentwickeln +Definitional bias: 0.0861 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_own_calc_projection.txt b/DD-GloVe/seed_word_tests/frau_seedwords_own_calc_projection.txt new file mode 100644 index 0000000000000000000000000000000000000000..35b4279faaebab9bfe1c2310f757e7c8de037ac3 --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_own_calc_projection.txt @@ -0,0 +1,38 @@ +BEZEICHNUNG +Def.: Kennzeichnung, Markierung +Def.: Wort, mit dem etwas bezeichnet wird; Benennung +Definitional bias: -0.1000 + +VARIANTE +Def.: leicht veränderte Art, Form von etwas; Abwandlung, Abart, Spielart +Def.: abweichende Lesart einer Textstelle bei mehreren Fassungen eines Textes +Def.: Wechsel von Moll nach Dur (und umgekehrt) durch Veränderung der großen Terz in eine kleine (und umgekehrt) in der Tonika +Definitional bias: -0.0192 + +VERWENDETE +Definitional bias: 0.0000 + +VARIANTEN +Definitional bias: 0.0000 + +GEBRÄUCHLICHE +Definitional bias: 0.0000 + +VERALTETE +Definitional bias: 0.0000 + +NAMENSTRÄGERINNEN +Definitional bias: 0.0000 + +ALAE +Definitional bias: 0.0000 + +WERKSINTERNE +Definitional bias: 0.0000 + +ABKÜRZUNG +Def.: das Abkürzen, Verkürzen +Def.: eine Entfernung, Wegstrecke abkürzender Weg +Def.: abgekürztes Wort +Definitional bias: 0.0245 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_own_calculations.txt b/DD-GloVe/seed_word_tests/frau_seedwords_own_calculations.txt new file mode 100644 index 0000000000000000000000000000000000000000..23dfd7bbb644b633ce7bcf03e9d5bd282d1b9c80 --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_own_calculations.txt @@ -0,0 +1,101 @@ +NAMENS +Def.: mit [dem] Namen +Definitional bias: -0.1006 + +BEZEICHNUNG +Def.: Kennzeichnung, Markierung +Def.: Wort, mit dem etwas bezeichnet wird; Benennung +Definitional bias: -0.1000 + +GEBRÄUCHLICH +Def.: allgemein üblich +Definitional bias: -0.0547 + +VORNAME +Def.: von den Eltern bestimmter [und amtlich eingetragener] Name, der die Individualität einer Person kennzeichnet +Definitional bias: -0.0330 + +VARIANTE +Def.: leicht veränderte Art, Form von etwas; Abwandlung, Abart, Spielart +Def.: abweichende Lesart einer Textstelle bei mehreren Fassungen eines Textes +Def.: Wechsel von Moll nach Dur (und umgekehrt) durch Veränderung der großen Terz in eine kleine (und umgekehrt) in der Tonika +Definitional bias: -0.0192 + +VERWENDETE +Definitional bias: 0.0000 + +GENANNTE +Definitional bias: 0.0000 + +VARIANTEN +Definitional bias: 0.0000 + +BEZEICHNUNGEN +Definitional bias: 0.0000 + +BLAUE +Definitional bias: 0.0000 + +MG +Definitional bias: 0.0000 + +GRAUE +Definitional bias: 0.0000 + +GEBRÄUCHLICHE +Definitional bias: 0.0000 + +MODIFIZIERTE +Definitional bias: 0.0000 + +MINERVA +Definitional bias: 0.0000 + +VERALTETE +Definitional bias: 0.0000 + +NAMENSTRÄGERINNEN +Definitional bias: 0.0000 + +D3 +Definitional bias: 0.0000 + +BAUGLEICHE +Definitional bias: 0.0000 + +MIDORI +Definitional bias: 0.0000 + +PARVIFLORA +Definitional bias: 0.0000 + +OBJEKT-IDENTIFIKATOR +Definitional bias: 0.0000 + +AHS +Definitional bias: 0.0000 + +ALAE +Definitional bias: 0.0000 + +NOTARZTEINSATZFAHRZEUG +Definitional bias: 0.0000 + +LEGALLY +Definitional bias: 0.0000 + +HELIANGELUS +Definitional bias: 0.0000 + +POMARANCIO +Definitional bias: 0.0000 + +WERKSINTERNE +Definitional bias: 0.0000 + +ABKÜRZUNG +Def.: das Abkürzen, Verkürzen +Def.: eine Entfernung, Wegstrecke abkürzender Weg +Def.: abgekürztes Wort +Definitional bias: 0.0245 + diff --git a/DD-GloVe/seed_word_tests/frau_seedwords_top10.txt b/DD-GloVe/seed_word_tests/frau_seedwords_top10.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b9fecf99978621104354a4e5265163f217c24fd --- /dev/null +++ b/DD-GloVe/seed_word_tests/frau_seedwords_top10.txt @@ -0,0 +1,44 @@ +SCHWESTER +Def.: Person weiblichen Geschlechts im Verwandtschaftsverhältnis zu einer anderen Person, die von denselben Eltern abstammt +Def.: Mitmensch weiblichen Geschlechts, mit dem man sich verbunden fühlt +Def.: Nonne, Ordensschwester +Def.: Homosexueller +Definitional bias: -0.0123 + +BRUSTVERGRÖSSERUNG +Def.: Vergrößerung der weiblichen Brust +Definitional bias: -0.0019 + +FRAUSCHAFT +Def.: aus weiblichen Mitgliedern bestehendes Team +Definitional bias: 0.0046 + +ZISTERZIENSERIN +Def.: Angehörige des weiblichen Zweiges der Zisterzienser +Definitional bias: 0.0136 + +KARMELITIN +Def.: Angehörige des weiblichen Zweiges des Karmelitenordens +Definitional bias: 0.0208 + +BÜSTENHALTER +Def.: Teil der Unterkleidung, der der weiblichen Brust Form und Halt geben soll +Definitional bias: 0.0248 + +STERBLICHE +Def.: sterblicher Mensch weiblichen Geschlechts +Definitional bias: 0.0286 + +BRUSTKREBS +Def.: Krebs besonders der weiblichen Brustdrüsen +Definitional bias: 0.0344 + +VULVA +Def.: Gesamtheit der äußeren weiblichen Geschlechtsorgane +Definitional bias: 0.0396 + +TRÄCHTIGKEIT +Def.: das Trächtigsein; Zustand eines weiblichen Säugetiers von der Befruchtung bis zur Geburt des oder der Jungen +Def.: das Trächtigsein +Definitional bias: 0.0728 + diff --git a/DD-GloVe/seed_word_tests/get_seed_word_definitions.py b/DD-GloVe/seed_word_tests/get_seed_word_definitions.py new file mode 100644 index 0000000000000000000000000000000000000000..93d2e29bca4101fecbd0081fc47574435d9e31a1 --- /dev/null +++ b/DD-GloVe/seed_word_tests/get_seed_word_definitions.py @@ -0,0 +1,121 @@ +"""Helper script to get seed words, their +definitions, and their definition's embedding's bias +from a list of seed word indexes. + +'Girl' and 'Boy' are used as placeholder terms for the +two groups, can also mean e.g. German and Foreign.""" + +import ast +import json +import pandas as pd +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity as cosine + +# hard-coded, you can get seed word indexes from GloVe training output +girl_indexes = [ + 41, 2933, 624, 7595, 2626, 6107, 5306, 807, 10470, 125, 4877, + 7744, 8440, 425, 2068, 971, 3026, 1674, 618, 3180, 2236, + 428, 1780, 2484, 42, 3237, 4977, 1417, 1067, 3819, 1452 + ] +boy_indexes = [ + 16, 98915, 65269, 42557, 138164, 97023, 18330, 134737, 27996, 57301, + 17621, 14234, 47588, 108234, 211932, 77434, 37725, 47421, 346084, + 303133, 47821, 22468, 44522, 330544, 79490, 49760, 109485, 214051, + 47483, 53517, 89180 + ] + +# dicts will contain pairs of tokens and their dictionary definitions +girl_words = dict() +boy_words = dict() + +# look up tokens corresponding to indexes in vocab text file +with open("/workspace/students/reichelt/BA/data/dd-glove/english_vocab.txt", encoding="utf-8") as vocab: + for idx, line in enumerate(vocab.readlines()): + token = line.split()[0] + if idx in girl_indexes: + girl_words[idx] = dict() # {10: {"token": mann, "definition": "...", "bias": 0.0}} + girl_words[idx]["token"] = token + if idx in boy_indexes: + boy_words[idx] = dict() + boy_words[idx]["token"] = token + +with open("/workspace/students/reichelt/BA/data/dd-glove/english_definitions.json", + "r", encoding="utf-8") as f: + data = json.load(f) +for idx, info in girl_words.items(): + word = info["token"] + girl_words[idx]["definition"] = data[word] +for idx, info in boy_words.items(): + word = info["token"] + boy_words[idx]["definition"] = data[word] + +# calculate bias of seed words and sort accordingly + +# load embeddings +embeddings = pd.read_csv("/workspace/students/reichelt/BA/data/dd-glove/english_vectors_gender_1iter.txt", + skiprows=1, header=None, sep=" ") +embeddings.rename(columns={0: "token"}, inplace=True) # name first column "token" +embeddings["vector"] = embeddings.iloc[:, 1:].values.tolist() # convert other columns into one, containing 300-dim vec as a list + +# load definition indexes +definition_indexes = pd.read_csv("/workspace/students/reichelt/BA/data/dd-glove/english_definitions.txt", sep="\t", usecols=[1], header=None, names=["def_words"]) +definition_indexes = definition_indexes["def_words"].tolist() +definition_indexes = [ast.literal_eval(item) for item in definition_indexes] + +def get_definition_embedding(word_index: int) -> np.array: + """Calculate definition embedding by averaging embeddings of + words occurring in given definition. Definition is given by + a word index, i.e. 88 which is token 'Mann'.""" + emb_sum = np.zeros(300) + if definition_indexes[word_index]: + for i in definition_indexes[word_index]: + vec = np.array(embeddings["vector"].iloc[i]) # get embedding of token with index idx + emb_sum += vec + definitional_embedding = (1/len(definition_indexes[word_index])) * emb_sum + return definitional_embedding + return emb_sum + +def calculate_definitional_bias(v_1: np.array, v_2: np.array, w: int) -> float: + """Calculate bias of a word using is definitional embedding. + Calculate the projection of the word's definitional embedding + along the difference between the seed words' definitional embeddings.""" + boy_def = v_1.reshape(1, -1) # reshape to fit cosine function + girl_def = v_2.reshape(1, -1) + word_def = get_definition_embedding(w).reshape(1, -1) + bias = cosine(word_def, boy_def) - cosine(word_def, girl_def) + return bias + +# look up def embedding for initial seed words +mann_vec = get_definition_embedding(16) +frau_vec = get_definition_embedding(41) + +# calculate girl biases and sort in ascending order (= strongest bias first, here) +for girl_index in girl_indexes: + b = calculate_definitional_bias(mann_vec, frau_vec, girl_index) + girl_words[girl_index]["bias"] = b[0].item() + +girl_words_sorted = dict(sorted(girl_words.items(), key=lambda item: item[1]["bias"])) + +# calculate boy biases and sort in descending order (= strongest bias first, here) +for boy_index in boy_indexes: + b = calculate_definitional_bias(mann_vec, frau_vec, boy_index) + boy_words[boy_index]["bias"] = b[0].item() + +boy_words_sorted = dict(sorted(boy_words.items(), key=lambda item: item[1]["bias"], reverse=True)) + +# write results to files +with open("/home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/seed_word_tests/she_seedwords_alternate_calc.txt", + "w", encoding="utf-8") as f: + for idx, info in girl_words_sorted.items(): + f.write(info["token"].upper() + "\n") + for d in info["definition"]: + f.write(f"Def.: {d}\n") + f.write(f"Definitional bias: {info['bias']:.4f}\n\n") + +with open("/home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/seed_word_tests/he_seedwords_alternate_calc.txt", + "w", encoding="utf-8") as f: + for idx, info in boy_words_sorted.items(): + f.write(info["token"].upper() + "\n") + for d in info["definition"]: + f.write(f"Def.: {d}\n") + f.write(f"Definitional bias: {info['bias']:.4f}\n\n") diff --git a/DD-GloVe/seed_word_tests/glove-teddy-jogging_job_output.txt b/DD-GloVe/seed_word_tests/glove-teddy-jogging_job_output.txt new file mode 100644 index 0000000000000000000000000000000000000000..83fd1a47859ea878719d62b78d875ea101660db3 --- /dev/null +++ b/DD-GloVe/seed_word_tests/glove-teddy-jogging_job_output.txt @@ -0,0 +1,49 @@ +mkdir -p build +gcc -c src/glove.c -o build/glove.o -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic +src/glove.c: In function ‘load_init_file’: +src/glove.c:107:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 107 | fread(&array[a], sizeof(real), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘glove_thread’: +src/glove.c:216:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 216 | fread(&cr, sizeof(CREC), 1, fin); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c: In function ‘train_glove’: +src/glove.c:756:5: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 756 | fread(&def_word_num, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:767:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 767 | fread(&curr_id, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:768:9: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 768 | fread(&curr_size, sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +src/glove.c:773:15: warning: ignoring return value of ‘fread’, declared with attribute warn_unused_result [-Wunused-result] + 773 | fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gcc build/glove.o build/common.o -o build/glove -lm -pthread -O3 -march=native -funroll-loops -Wall -Wextra -Wpedantic + +$ /home/students/reichelt/ba/bias-mitigation-ba/DD-GloVe/build/glove -save-file /workspace/students/reichelt/BA/data/dd-glove/vectors_no_debiasing_teddy_jogging -threads 32 -input-file /workspace/students/reichelt/BA/data/dd-glove/cooccurrence.shuf.bin -x-max 100 -iter 1 -vector-size 300 -binary 2 -vocab-file /workspace/students/reichelt/BA/data/dd-glove/vocab.txt -verbose 2 -use-def-loss 0 -lambda 0.001 -use-ortho-loss 0 -beta 0.001 -use-proj-loss 0 -gamma 0.2 -seed 42 +TRAINING MODEL +Read 1612975968 lines. +Opened definition file +Building definition lists... +Initializing parameters...Using random seed 42 +done. +vector size: 300 +vocab size: 400000 +x_max: 100.000000 +alpha: 0.750000 +use_def_loss: 0 +use_ortho_loss: 0 +use_proj_loss: 0 +lambda: 0.001000000 +beta: 0.001000000 +gamma: 0.200000000 +MALE index: 160647 +FEMALE index: 225193 +30 girls words (id): 828, 1113, 1735, 3395, 3565, 39794, 43293, 51377, 54247, 60785, 70065, 107814, 109266, 114284, 134869, 136926, 145437, 156707, 165080, 203832, 219135, 225193, 230698, 243042, 279270, 279656, 308241, 343627, 375778, 391638, +30 boys words (id): 9384, 15557, 18135, 43393, 47337, 54418, 70634, 83133, 83849, 86313, 95286, 95826, 118220, 131438, 132034, 139294, 160475, 160647, 164027, 175177, 189785, 222311, 259084, 261029, 273061, 285439, 287131, 350535, 372374, 377027, +08/18/23 - 05:52.56AM, iter: 001, glove_cost: 0.024486, def_cost: 8.656775, ortho_cost: 0.327693, context_ortho_cost: 0.327807, proj_cost: 2.642347 + + diff --git a/DD-GloVe/seed_word_tests/he_seedwords_alternate_calc.txt b/DD-GloVe/seed_word_tests/he_seedwords_alternate_calc.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b4ed6dfe591ffd1ae33c834b536f72102b6cfba --- /dev/null +++ b/DD-GloVe/seed_word_tests/he_seedwords_alternate_calc.txt @@ -0,0 +1,226 @@ +BEREA +Def.: a city in NE Ohio, near Cleveland. +Definitional bias: 0.0821 + +RADNOR +Def.: a town in SE Pennsylvania, near Philadelphia. +Def.: Radnorshire. +Definitional bias: 0.0729 + +RIVERDALE +Def.: a city in NE Illinois. +Definitional bias: 0.0711 + +BECKLEY +Def.: a city in SW West Virginia. +Definitional bias: 0.0646 + +EDWARDSVILLE +Def.: a town in SW Illinois. +Definitional bias: 0.0641 + +POTTSVILLE +Def.: a city in E Pennsylvania. +Definitional bias: 0.0636 + +HYATTSVILLE +Def.: a city in central Maryland. +Definitional bias: 0.0601 + +SHUBERT +Def.: Lee Levi Shubert, 1875–1953, and his brothers Sam S., 1876–1905, and Jacob J., 1880–1963, U.S. theatrical managers. +Definitional bias: 0.0555 + +FRAT +Def.: fraternity (def. 1). +Def.: US slang +Def.: a member of a fraternity +Def.: (as modifier): the frat kid +Definitional bias: 0.0550 + +DIXIELAND +Def.: (sometimes lowercase) a style of jazz, originating in New Orleans, played by a small group of instruments, as trumpet, trombone, clarinet, piano, and drums, and marked by strongly accented four-four rhythm and vigorous, quasi-improvisational solos and ensembles. +Def.: Also Dixie Land . Dixie (def. 1). +Def.: a form of jazz that originated in New Orleans, becoming popular esp with White musicians in the second decade of the 20th century +Def.: a revival of this style in the 1950s +Def.: See Dixie (def. 1) +Definitional bias: 0.0535 + +MANASSAS +Def.: a town in NE Virginia: battles of Bull Run 1861, 1862. +Def.: a town in NE Virginia, west of Alexandria: site of the victory of Confederate forces in the Battles of Bull Run, or First and Second Manassas (1861; 1862), during the American Civil War. Pop: 37 166 (2003 est) +Definitional bias: 0.0507 + +AVE. +Def.: hail; welcome. +Def.: farewell; goodbye. +Def.: the salutation “ave.†+Def.: (initial capital letter) Ave Maria. +Def.: avenue. +Def.: welcome or farewell +Def.: short for Ave Maria See Hail Mary +Def.: the time for the Angelus to be recited, so called because of the threefold repetition of the Ave Maria in this devotion +Def.: the beads of the rosary used to count the number of Ave Marias said +Def.: avenue +Definitional bias: 0.0496 + +DIXY +Def.: dixie. +Definitional bias: 0.0474 + +XAVERIAN +Def.: Saint Francis Francisco Javier; "the Apostle of the Indies", 1506–52, Spanish Jesuit missionary, especially in India and Japan. +Def.: a male given name: from an Arabic word meaning “bright.†+Def.: Saint Francis, known as the Apostle of the Indies. 1506–52, Spanish missionary, who was a founding member of the Jesuit society (1534) and later preached in Goa, Ceylon, the East Indies, and Japan. Feast day: Dec 3 +Definitional bias: 0.0456 + +IGNATIUS +Def.: Saint Ignatius Theophorus, a.d. c40–107?, bishop of Antioch and Apostolic Father. +Def.: Saint Nicetas, a.d. 799?–878, patriarch of Constantinople 846–858, 867–878. +Def.: Saint, surnamed Theophorus. died ?110 ad, bishop of Antioch. His seven letters, written on his way to his martyrdom in Rome, give valuable insight into the early Christian Church. Feast day: Oct 17 or Dec 17 or 20 +Definitional bias: 0.0455 + +X-HEIGHT +Def.: the height of a lowercase x. +Def.: printing the height of lower case letters of a typeface, without ascenders or descenders +Definitional bias: 0.0291 + +OVERPRESSURE +Def.: pressure in excess of normal atmospheric pressure, as that caused by an explosion's shock wave or created in an accelerating airplane. +Def.: to cause or expose to overpressure. +Def.: to make undue demands on by a regimen, work load, etc.: students overpressured with heavy academic schedules. +Def.: the blast effect of a nuclear weapon expressed as an amount of pressure greater than normal barometric pressure +Definitional bias: 0.0284 + +ENKEPHALIN +Def.: either of two pentapeptides that bind to morphine receptors in the central nervous system and have opioid properties of relatively short duration; one pentapeptide (Met enkephalin ) has the amino acid sequence Tyr-Gly-Gly-Phe-Met and the other (Leu enkephalin ) has the sequence Tyr-Gly-Gly-Phe-Leu. +Def.: a chemical occurring in the brain, having effects similar to those of morphine: See also endorphin +Definitional bias: 0.0277 + +HE +Def.: the male person or animal being discussed or last mentioned; that male. +Def.: anyone (without reference to gender); that person: He who hesitates is lost. +Def.: any male person or animal; a man: hes and shes. +Def.: male (usually used in combination): a he-goat. +Def.: the fifth letter of the Hebrew alphabet. +Def.: any of the sounds represented by this letter. +Def.: helium. +Def.: high explosive. +Def.: high explosive.: Also HE +Def.: His Eminence. +Def.: His Excellency; Her Excellency. +Def.: refers to a male person or animal: he looks interesting; he's a fine stallion +Def.: refers to an indefinite antecedent such as one, whoever, or anybody: everybody can do as he likes in this country +Def.: refers to a person or animal of unknown or unspecified sex: a member of the party may vote as he sees fit +Def.: +Def.: a male person or animal +Def.: (in combination): he-goat +Def.: +Def.: a children's game in which one player chases the others in an attempt to touch one of them, who then becomes the chaser: Compare tag 2 +Def.: the person chasing: Compare it 1 (def. 7) +Def.: the fifth letter of the Hebrew alphabet (ה), transliterated as h +Def.: an expression of amusement or derision: Also: he-he!, hee-hee! +Def.: helium +Def.: high explosive +Def.: His Eminence +Def.: His (or Her) Excellency +Definitional bias: 0.0242 + +CAVALIERS +Def.: a horseman, especially a mounted soldier; knight. +Def.: one having the spirit or bearing of a knight; a courtly gentleman; gallant. +Def.: a man escorting a woman or acting as her partner in dancing. +Def.: (initial capital letter) an adherent of Charles I of England in his contest with Parliament. +Def.: haughty, disdainful, or supercilious: an arrogant and cavalier attitude toward others. +Def.: offhand or unceremonious: The very dignified officials were confused by his cavalier manner. +Def.: (initial capital letter) of or relating to the Cavaliers. +Def.: (initial capital letter) of, relating to, or characteristic of the Cavalier poets or their work. +Def.: to play the cavalier. +Def.: to be haughty or domineering. +Def.: showing haughty disregard; offhand +Def.: a gallant or courtly gentleman, esp one acting as a lady's escort +Def.: archaic a horseman, esp one who is armed +Def.: a supporter of Charles I during the English Civil War: Compare Roundhead +Definitional bias: 0.0188 + +UNPLEDGED +Def.: a solemn promise or agreement to do or refrain from doing something: a pledge of aid; a pledge not to wage war. +Def.: something delivered as security for the payment of a debt or fulfillment of a promise, and subject to forfeiture on failure to pay or fulfill the promise. +Def.: the state of being given or held as security: to put a thing in pledge. +Def.: Law. +Def.: the act of delivering goods, property, etc., to another for security. +Def.: the resulting legal relationship. +Def.: something given or regarded as a security. +Def.: a person accepted for membership in a club, fraternity, or sorority, but not yet formally approved. +Def.: an assurance of support or goodwill conveyed by drinking a person's health; a toast. +Def.: Obsolete. +Def.: a hostage. +Def.: a person who becomes bail or surety for another. +Def.: to bind by or as if by a pledge: to pledge hearers to secrecy. +Def.: to promise solemnly: to pledge one's support. +Def.: to give or deposit as a pledge; pawn. +Def.: to stake, as one's honor. +Def.: to secure by a pledge; give a pledge for. +Def.: to accept as a pledge for club, fraternity, or sorority membership. +Def.: to drink a health or toast to. +Def.: to make or give a pledge: to pledge for someone. +Def.: to drink a pledge; toast someone's health, success, etc. +Def.: take the pledge, to make a solemn, formal vow to abstain from intoxicating drink. +Def.: a formal or solemn promise or agreement, esp to do or refrain from doing something +Def.: +Def.: collateral for the payment of a debt or the performance of an obligation +Def.: the condition of being collateral (esp in the phrase in pledge) +Def.: a sign, token, or indication: the gift is a pledge of their sincerity +Def.: an assurance of support or goodwill, conveyed by drinking to a person, cause, etc; toast: we drank a pledge to their success +Def.: a person who binds himself, as by becoming bail or surety for another +Def.: sign the pledge or take the pledge to make a vow to abstain from alcoholic drink +Def.: to promise formally or solemnly: he pledged allegiance +Def.: (tr) to bind or secure by or as if by a pledge: they were pledged to secrecy +Def.: to give, deposit, or offer (one's word, freedom, property, etc) as a guarantee, as for the repayment of a loan +Def.: to drink a toast to (a person, cause, etc) +Definitional bias: 0.0170 + +LIWAN +Def.: iwan. +Definitional bias: 0.0164 + +DOOBIE +Def.: a marijuana cigarette. +Definitional bias: 0.0110 + +EMBIID +Def.: web spinner. +Definitional bias: 0.0093 + +SCOTTIE +Def.: Scottish terrier. +Def.: a male given name, form of Scott. +Def.: a female given name. +Def.: See Scottish terrier +Def.: informal a Scotsman +Definitional bias: 0.0033 + +DIST +Def.: distance. +Def.: distant. +Def.: distinguish. +Def.: distinguished. +Def.: district. +Definitional bias: 0.0030 + +|-ALIGN=CENTER +Definitional bias: 0.0000 + +FONT-SIZE:8PT +Definitional bias: 0.0000 + +|L +Definitional bias: 0.0000 + +GORCEY +Definitional bias: 0.0000 + +OVERPOTENTIAL +Def.: overvoltage. +Definitional bias: -0.0280 + diff --git a/DD-GloVe/seed_word_tests/he_seedwords_full_definition.txt b/DD-GloVe/seed_word_tests/he_seedwords_full_definition.txt new file mode 100644 index 0000000000000000000000000000000000000000..e75a192dc6e37f09d589ddbace6af52031e6e062 --- /dev/null +++ b/DD-GloVe/seed_word_tests/he_seedwords_full_definition.txt @@ -0,0 +1,87 @@ +HE +Def.: the male person or animal being discussed or last mentioned; that male. +Def.: anyone (without reference to gender); that person: He who hesitates is lost. +Def.: any male person or animal; a man: hes and shes. +Def.: male (usually used in combination): a he-goat. +Def.: the fifth letter of the Hebrew alphabet. +Def.: any of the sounds represented by this letter. +Def.: helium. +Def.: high explosive. +Def.: high explosive.: Also HE +Def.: His Eminence. +Def.: His Excellency; Her Excellency. +Def.: refers to a male person or animal: he looks interesting; he's a fine stallion +Def.: refers to an indefinite antecedent such as one, whoever, or anybody: everybody can do as he likes in this country +Def.: refers to a person or animal of unknown or unspecified sex: a member of the party may vote as he sees fit +Def.: +Def.: a male person or animal +Def.: (in combination): he-goat +Def.: +Def.: a children's game in which one player chases the others in an attempt to touch one of them, who then becomes the chaser: Compare tag 2 +Def.: the person chasing: Compare it 1 (def. 7) +Def.: the fifth letter of the Hebrew alphabet (ה), transliterated as h +Def.: an expression of amusement or derision: Also: he-he!, hee-hee! +Def.: helium +Def.: high explosive +Def.: His Eminence +Def.: His (or Her) Excellency +Definitional bias: 0.0242 + +HE/SHE +Def.: the male person or animal being discussed or last mentioned; that male. +Def.: anyone (without reference to gender); that person: He who hesitates is lost. +Def.: any male person or animal; a man: hes and shes. +Def.: male (usually used in combination): a he-goat. +Def.: the fifth letter of the Hebrew alphabet. +Def.: any of the sounds represented by this letter. +Def.: helium. +Def.: high explosive. +Def.: high explosive.: Also HE +Def.: His Eminence. +Def.: His Excellency; Her Excellency. +Def.: refers to a male person or animal: he looks interesting; he's a fine stallion +Def.: refers to an indefinite antecedent such as one, whoever, or anybody: everybody can do as he likes in this country +Def.: refers to a person or animal of unknown or unspecified sex: a member of the party may vote as he sees fit +Def.: +Def.: a male person or animal +Def.: (in combination): he-goat +Def.: +Def.: a children's game in which one player chases the others in an attempt to touch one of them, who then becomes the chaser: Compare tag 2 +Def.: the person chasing: Compare it 1 (def. 7) +Def.: the fifth letter of the Hebrew alphabet (ה), transliterated as h +Def.: an expression of amusement or derision: Also: he-he!, hee-hee! +Def.: helium +Def.: high explosive +Def.: His Eminence +Def.: His (or Her) Excellency +Definitional bias: 0.0242 + +H.E +Def.: the male person or animal being discussed or last mentioned; that male. +Def.: anyone (without reference to gender); that person: He who hesitates is lost. +Def.: any male person or animal; a man: hes and shes. +Def.: male (usually used in combination): a he-goat. +Def.: the fifth letter of the Hebrew alphabet. +Def.: any of the sounds represented by this letter. +Def.: helium. +Def.: high explosive. +Def.: high explosive.: Also HE +Def.: His Eminence. +Def.: His Excellency; Her Excellency. +Def.: refers to a male person or animal: he looks interesting; he's a fine stallion +Def.: refers to an indefinite antecedent such as one, whoever, or anybody: everybody can do as he likes in this country +Def.: refers to a person or animal of unknown or unspecified sex: a member of the party may vote as he sees fit +Def.: +Def.: a male person or animal +Def.: (in combination): he-goat +Def.: +Def.: a children's game in which one player chases the others in an attempt to touch one of them, who then becomes the chaser: Compare tag 2 +Def.: the person chasing: Compare it 1 (def. 7) +Def.: the fifth letter of the Hebrew alphabet (ה), transliterated as h +Def.: an expression of amusement or derision: Also: he-he!, hee-hee! +Def.: helium +Def.: high explosive +Def.: His Eminence +Def.: His (or Her) Excellency +Definitional bias: 0.0242 + diff --git a/DD-GloVe/seed_word_tests/he_seedwords_own_calc.txt b/DD-GloVe/seed_word_tests/he_seedwords_own_calc.txt new file mode 100644 index 0000000000000000000000000000000000000000..3b035ad8de4e899f7d5ecf48ea8f7e4f3a8c952a --- /dev/null +++ b/DD-GloVe/seed_word_tests/he_seedwords_own_calc.txt @@ -0,0 +1,215 @@ +BEREA +Def.: a city in NE Ohio, near Cleveland. +Definitional bias: 0.0821 + +RIVERDALE +Def.: a city in NE Illinois. +Definitional bias: 0.0711 + +HI-HATS +Def.: a set of high-hat cymbals. +Def.: a variant spelling of high-hat (def. 4) +Definitional bias: 0.0694 + +BECKLEY +Def.: a city in SW West Virginia. +Definitional bias: 0.0646 + +HYATTSVILLE +Def.: a city in central Maryland. +Definitional bias: 0.0601 + +MIDAMERICA +Def.: Middle America (def. 2). +Definitional bias: 0.0560 + +AVE. +Def.: hail; welcome. +Def.: farewell; goodbye. +Def.: the salutation “ave.†+Def.: (initial capital letter) Ave Maria. +Def.: avenue. +Def.: welcome or farewell +Def.: short for Ave Maria See Hail Mary +Def.: the time for the Angelus to be recited, so called because of the threefold repetition of the Ave Maria in this devotion +Def.: the beads of the rosary used to count the number of Ave Marias said +Def.: avenue +Definitional bias: 0.0496 + +DIXY +Def.: dixie. +Definitional bias: 0.0474 + +XAVERIAN +Def.: Saint Francis Francisco Javier; "the Apostle of the Indies", 1506–52, Spanish Jesuit missionary, especially in India and Japan. +Def.: a male given name: from an Arabic word meaning “bright.†+Def.: Saint Francis, known as the Apostle of the Indies. 1506–52, Spanish missionary, who was a founding member of the Jesuit society (1534) and later preached in Goa, Ceylon, the East Indies, and Japan. Feast day: Dec 3 +Definitional bias: 0.0456 + +IGNATIUS +Def.: Saint Ignatius Theophorus, a.d. c40–107?, bishop of Antioch and Apostolic Father. +Def.: Saint Nicetas, a.d. 799?–878, patriarch of Constantinople 846–858, 867–878. +Def.: Saint, surnamed Theophorus. died ?110 ad, bishop of Antioch. His seven letters, written on his way to his martyrdom in Rome, give valuable insight into the early Christian Church. Feast day: Oct 17 or Dec 17 or 20 +Definitional bias: 0.0455 + +REVEILLE +Def.: a signal, as of a drum or bugle, sounded early in the morning to awaken military personnel and to alert them for assembly. +Def.: a signal to arise. +Def.: a signal, given by a bugle, drum, etc, to awaken soldiers or sailors in the morning +Def.: the hour at which this takes place +Definitional bias: 0.0446 + +SHAFFER +Def.: Sir Peter. born 1926, British dramatist. His plays include The Royal Hunt of the Sun (1964), Equus (1973), Amadeus (1979), and The Gift of the Gorgon (1992) +Definitional bias: 0.0438 + +JR/SR +Def.: junior. +Def.: Journal. +Def.: Junior. +Def.: junior +Definitional bias: 0.0411 + +WYTHE +Def.: George, 1729–1806, U.S. jurist and statesman. +Definitional bias: 0.0405 + +ASCENDERS +Def.: a person or thing that ascends or causes ascension. +Def.: Printing. +Def.: the part of a lowercase letter, as b, d, f, h, that rises above x-height. +Def.: a letter rising above x-height, as b, d, f, h, etc. +Def.: printing +Def.: the part of certain lower-case letters, such as b or h, that extends above the body of the letter +Def.: any letter having such a part +Def.: a person or thing that ascends +Def.: another word for ascendeur +Definitional bias: 0.0401 + +PUDDINGSTONE +Def.: conglomerate (def. 3). +Def.: a conglomerate rock in which there is a difference in colour or composition between the pebbles and the matrix +Definitional bias: 0.0383 + +YODH +Def.: the 10th letter of the Hebrew alphabet. +Def.: any of the sounds represented by this letter. +Def.: the tenth letter in the Hebrew alphabet (י), transliterated as y +Definitional bias: 0.0368 + +ZAYIN +Def.: the seventh letter of the Hebrew alphabet. +Def.: the consonant sound represented by this letter. +Def.: the seventh letter of the Hebrew alphabet (ז), transliterated as z +Definitional bias: 0.0356 + +NUMERATORS +Def.: Arithmetic. the term of a fraction, usually above the line, that indicates the number of equal parts that are to be added together; the dividend placed over a divisor: The numerator of the fraction 2/3 is 2.: Compare denominator (def. 1). +Def.: a person or thing that numbers. +Def.: maths the dividend of a fraction: the numerator of 7/8 is 7 Compare denominator +Def.: a person or thing that numbers; enumerator +Def.: A number written above or to the left of the line in a common fraction to indicate the number of parts of the whole. For example, 2 is the numerator in the fraction 27. +Definitional bias: 0.0330 + +X-HEIGHT +Def.: the height of a lowercase x. +Def.: printing the height of lower case letters of a typeface, without ascenders or descenders +Definitional bias: 0.0291 + +OVERPRESSURE +Def.: pressure in excess of normal atmospheric pressure, as that caused by an explosion's shock wave or created in an accelerating airplane. +Def.: to cause or expose to overpressure. +Def.: to make undue demands on by a regimen, work load, etc.: students overpressured with heavy academic schedules. +Def.: the blast effect of a nuclear weapon expressed as an amount of pressure greater than normal barometric pressure +Definitional bias: 0.0284 + +ENKEPHALIN +Def.: either of two pentapeptides that bind to morphine receptors in the central nervous system and have opioid properties of relatively short duration; one pentapeptide (Met enkephalin ) has the amino acid sequence Tyr-Gly-Gly-Phe-Met and the other (Leu enkephalin ) has the sequence Tyr-Gly-Gly-Phe-Leu. +Def.: a chemical occurring in the brain, having effects similar to those of morphine: See also endorphin +Definitional bias: 0.0277 + +HE +Def.: the male person or animal being discussed or last mentioned; that male. +Def.: anyone (without reference to gender); that person: He who hesitates is lost. +Def.: any male person or animal; a man: hes and shes. +Def.: male (usually used in combination): a he-goat. +Def.: the fifth letter of the Hebrew alphabet. +Def.: any of the sounds represented by this letter. +Def.: helium. +Def.: high explosive. +Def.: high explosive.: Also HE +Def.: His Eminence. +Def.: His Excellency; Her Excellency. +Def.: refers to a male person or animal: he looks interesting; he's a fine stallion +Def.: refers to an indefinite antecedent such as one, whoever, or anybody: everybody can do as he likes in this country +Def.: refers to a person or animal of unknown or unspecified sex: a member of the party may vote as he sees fit +Def.: +Def.: a male person or animal +Def.: (in combination): he-goat +Def.: +Def.: a children's game in which one player chases the others in an attempt to touch one of them, who then becomes the chaser: Compare tag 2 +Def.: the person chasing: Compare it 1 (def. 7) +Def.: the fifth letter of the Hebrew alphabet (ה), transliterated as h +Def.: an expression of amusement or derision: Also: he-he!, hee-hee! +Def.: helium +Def.: high explosive +Def.: His Eminence +Def.: His (or Her) Excellency +Definitional bias: 0.0242 + +BLIMEY +Def.: (used to express surprise or excitement.) +Def.: British slang an exclamation of surprise or annoyance +Definitional bias: 0.0223 + +SHOTT +Def.: a shallow temporary salt lake or marsh in the North African desert +Def.: the hollow in which it lies +Def.: A shallow lake or marsh with brackish or saline water, especially in northern Africa. Shotts are dry during the summer, at which time they are also characterized by salt deposits and a lack of vegetation. +Definitional bias: 0.0222 + +CAVALIERS +Def.: a horseman, especially a mounted soldier; knight. +Def.: one having the spirit or bearing of a knight; a courtly gentleman; gallant. +Def.: a man escorting a woman or acting as her partner in dancing. +Def.: (initial capital letter) an adherent of Charles I of England in his contest with Parliament. +Def.: haughty, disdainful, or supercilious: an arrogant and cavalier attitude toward others. +Def.: offhand or unceremonious: The very dignified officials were confused by his cavalier manner. +Def.: (initial capital letter) of or relating to the Cavaliers. +Def.: (initial capital letter) of, relating to, or characteristic of the Cavalier poets or their work. +Def.: to play the cavalier. +Def.: to be haughty or domineering. +Def.: showing haughty disregard; offhand +Def.: a gallant or courtly gentleman, esp one acting as a lady's escort +Def.: archaic a horseman, esp one who is armed +Def.: a supporter of Charles I during the English Civil War: Compare Roundhead +Definitional bias: 0.0188 + +LIWAN +Def.: iwan. +Definitional bias: 0.0164 + +DOOBIE +Def.: a marijuana cigarette. +Definitional bias: 0.0110 + +EMBIID +Def.: web spinner. +Definitional bias: 0.0093 + +SCOTTIE +Def.: Scottish terrier. +Def.: a male given name, form of Scott. +Def.: a female given name. +Def.: See Scottish terrier +Def.: informal a Scotsman +Definitional bias: 0.0033 + +DIST +Def.: distance. +Def.: distant. +Def.: distinguish. +Def.: distinguished. +Def.: district. +Definitional bias: 0.0030 + diff --git a/DD-GloVe/seed_word_tests/jogging_seedwords.txt b/DD-GloVe/seed_word_tests/jogging_seedwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..0adf0c53672d952886da505af4c332bacc0598d2 --- /dev/null +++ b/DD-GloVe/seed_word_tests/jogging_seedwords.txt @@ -0,0 +1,155 @@ +HYADEN +Def.: Töchter des Atlas oder des Okeanos, die in ein Sternbild verwandelt werden +Def.: Sternhaufen im Sternbild Stier +Definitional bias: -0.1100 + +EHEMALIGER +Def.: ehemaliger Schüler, Student o. Ä., ehemaliges Mitglied von etwas +Def.: früherer Ehemann oder Freund +Definitional bias: -0.0212 + +ARBEITSTEMPO +Def.: Tempo, in dem jemand arbeitet +Definitional bias: -0.0085 + +ANLAGERN +Def.: an sich binden +Def.: sich mit einem anderen Stoff o. Ä. verbinden +Definitional bias: -0.0038 + +AUSGANGSPRODUKT +Def.: Produkt als Grundlage für die Herstellung eines neuen Artikels o. Ä. +Definitional bias: -0.0014 + +FAMILIENSITZ +Def.: größeres Besitztum einer meist wohlhabenden, adligen o. ä. Familie +Definitional bias: 0.0064 + +MATTE +Def.: Bergwiese +Def.: Unterlage o. Ä. aus grobem Geflecht oder Gewebe aus Binsen, künstlichen Fasern o. Ä. +Def.: Unterlage aus weichem, federndem Material mit festem Überzug (zur Abschwächung von Sprüngen beim Turnen, als Fläche für die Kämpfe im Ringen o. Ä.) +Definitional bias: 0.0082 + +HIMMEL +Def.: scheinbar über dem Horizont liegendes, halbkugelähnliches Gewölbe (an dem die Gestirne erscheinen) +Def.: der Hölle oder der Erde als dem Diesseits gegenübergestellter Aufenthalt Gottes (der Engel und der Seligen) +Def.: Gott, Schicksal, Vorsehung +Def.: [fest angebrachte] zum Teil hinten und an den Seiten heruntergezogene Überdachung aus Stoff, Leder o. Ä.; Baldachin +Def.: innere Bespannung des Verdecks im Auto +Definitional bias: 0.0091 + +AUSGANGSMATERIAL +Def.: Material, das als Grundlage für die Herstellung eines Produkts, für die Erarbeitung einer Konzeption o. Ä. verwendet wird +Definitional bias: 0.0117 + +RÜCKBILDUNG +Def.: funktions- oder altersbedingte Rückentwicklung oder Verkümmerung von Organen o. Ä. +Def.: das Abklingen von Krankheitserscheinungen +Def.: Wort, das historisch gesehen aus einem Verb oder Adjektiv abgeleitet ist, aber wegen seiner Kürze den Anschein erweckt, die Grundlage dieses Verbs oder Adjektivs zu sein; retrograde Bildung +Definitional bias: 0.0163 + +STANDZEIT +Def.: Zeitdauer, während deren man mit einem Werkzeug o. Ä. arbeiten kann, ohne dass erhebliche Verschleißerscheinungen auftreten +Def.: Zeit, in der eine Maschine, ein Fahrzeug o. Ä. nicht arbeitet, nicht läuft, stillsteht +Definitional bias: 0.0193 + +KRIEGSVETERAN +Def.: jemand, der als Soldat einen Krieg mitgemacht hat +Definitional bias: 0.0197 + +AUSGRABEN +Def.: durch Graben wieder aus der Erde o. Ä. hervor-, herausholen +Def.: (unter der Erdoberfläche Liegendes [Verschüttetes]) freilegen +Def.: (Pflanzen, Bäume) grabend aus dem Erdreich, in dem sie verwurzelt sind, herausnehmen +Def.: (Altes, Vergessenes) wieder hervorholen, wieder ans Licht ziehen; (Abgetanes) wieder aufleben lassen +Def.: sich freischaufeln +Def.: (eine Vertiefung o. Ä.) durch Graben herstellen; ausheben +Definitional bias: 0.0249 + +KRIMPEN +Def.: einschrumpfen, eingehen, einlaufen +Def.: einschrumpfen lassen +Def.: (vom Wind) sich (auf der nördlichen Halbkugel) entgegen dem Uhrzeigersinn oder (auf der südlichen Halbkugel) im Uhrzeigersinn drehen +Definitional bias: 0.0289 + +FUSSEN +Def.: etwas als Basis, in etwas seine Grundlage haben +Def.: (von Raubvögeln) sich niederlassen +Definitional bias: 0.0309 + +PACEMAKER +Def.: Pferd, das die Pace macht +Def.: Herzschrittmacher +Def.: Läufer, der bei Langstreckenläufen zunächst an der Spitze des Feldes läuft und das Tempo bestimmt, sodass die stärkeren Läufer möglichst lange in seinem Windschatten laufen können +Definitional bias: 0.0346 + +TUMORGEWEBE +Def.: Gewebe des Tumors +Definitional bias: 0.0347 + +TÜRHÜTER +Def.: jemand, der vor einer Tür steht und darüber wacht, dass kein Unerwünschter, Unbefugter o. Ä. eintritt +Definitional bias: 0.0401 + +UNTERMALEN +Def.: etwas mit Musik, Geräuschen o. Ä. begleiten +Def.: (besonders von Tafelmalereien) die erste Farbschicht auf den [grundierten] Malgrund auftragen +Definitional bias: 0.0437 + +TRABEN +Def.: im Trab laufen, reiten +Def.: in mäßigem Tempo irgendwohin laufen +Definitional bias: 0.0440 + +FITNESSTRAINER +Def.: jemand, der beruflich Fitnesstraining erteilt +Definitional bias: 0.0475 + +PRESTO +Def.: schnell, in eilendem Tempo +Def.: schnelles, eilendes Tempo +Def.: Musikstück mit der Tempobezeichnung «presto» +Definitional bias: 0.0501 + +ANTIBABYPILLE +Def.: empfängnisverhütendes Mittel in Pillenform auf hormonaler Grundlage +Definitional bias: 0.0525 + +FRÜHERKENNUNG +Def.: frühzeitige Erkennung einer Beschaffenheit, Entwicklung o. Ä. +Definitional bias: 0.0754 + +JOGGING +Def.: Fitnesstraining, bei dem man entspannt in mäßigem Tempo läuft +Definitional bias: 0.0767 + +KRIMINALISTIK +Def.: (als Teilbereich der Kriminologie) Wissenschaft, Lehre von der Aufklärung und Verhinderung von Verbrechen +Definitional bias: 0.0982 + +VERWUNDEN +Def.: (besonders im Krieg durch Waffen o. Ä.) jemandem eine Wunde, Wunden beibringen +Definitional bias: 0.0982 + +RELIGION +Def.: (meist von einer größeren Gemeinschaft angenommener) bestimmter, durch Lehre und Satzungen festgelegter Glaube und sein Bekenntnis +Def.: gläubig verehrende Anerkennung einer alles Sein bestimmenden göttlichen Macht; religiöse Weltanschauung +Def.: Religionslehre als Schulfach, Religionsunterricht +Definitional bias: 0.1074 + +RENNEN +Def.: sportlicher Wettbewerb, bei dem die Schnelligkeit, mit der eine Strecke zurückgelegt wird, über den Sieg entscheidet +Def.: schnell, in großem Tempo, meist mit ausholenden Schritten laufen +Def.: sich zum Missfallen, Ärger o. Ä. anderer zu einem bestimmten Zweck irgendwohin begeben, jemanden aufsuchen +Def.: unversehens, mit einer gewissen Wucht an jemanden, etwas stoßen, gegen jemanden, etwas prallen +Def.: sich durch Anstoßen, durch einen Aufprall an einem Körperteil eine Verletzung zuziehen +Def.: jemanden, sich, einen Körperteil stoßen [und dabei verletzen] +Def.: jemandem, sich mit Heftigkeit einen [spitzen] Gegenstand in einen Körperteil stoßen +Definitional bias: 0.1278 + +GESCHWINDIGKEIT +Def.: Verhältnis von zurückgelegtem Weg zu aufgewendeter Zeit +Def.: Schnelligkeit, Tempo +Definitional bias: 0.1374 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords.txt b/DD-GloVe/seed_word_tests/mann_seedwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..600653a3df90ebbd486fd1300eb2dccb50e0e4e0 --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords.txt @@ -0,0 +1,144 @@ +MANN +Def.: erwachsene Person männlichen Geschlechts +Def.: Ehemann +Def.: Lehns-, Gefolgsleute +Def.: als burschikose Anrede, ohne persönlichen Bezug in Ausrufen des Staunens, Erschreckens, der Bewunderung +Definitional bias: 0.1405 + +GEFRIEREN +Def.: durch Kälte zu Eis erstarren, fest und hart werden +Def.: einfrieren, tiefgefrieren +Definitional bias: 0.0623 + +METTE +Def.: mitternächtlicher oder frühmorgendlicher Gottesdienst vor einem hohen kirchlichen Fest +Definitional bias: 0.0553 + +ERLEGEN +Def.: (ein Tier) [durch einen Schuss] töten, niederstrecken +Def.: (einen Geldbetrag) hergeben, bezahlen +Definitional bias: 0.0491 + +STIMMBRUCH +Def.: Stimmwechsel bei männlichen Jugendlichen in der Pubertät, der sich in einer zwischen Höhe und Tiefe unkontrolliert schwankenden, leicht überschnappenden Stimme ausdrückt und zu einem allmählichen Tieferwerden der Stimme führt +Definitional bias: 0.0418 + +SCHRIFTSATZ +Def.: Satz +Def.: (im gerichtlichen Verfahren) schriftliche Erklärung der am Verfahren beteiligten Parteien +Definitional bias: 0.0364 + +EINHELLIG +Def.: gänzlich, in allen Punkten übereinstimmend; von allen ausnahmslos vertreten +Definitional bias: 0.0260 + +TEAMBEWERB +Def.: Mannschaftswettbewerb +Definitional bias: 0.0232 + +SCHWULENSZENE +Def.: Milieu, Szene der männlichen Homosexuellen +Definitional bias: 0.0205 + +HERRENZIMMER +Def.: Zimmer, in dem der Hausherr seine [männlichen] Gäste empfängt [und in dem geraucht wird] +Definitional bias: 0.0202 + +GRILLE +Def.: den Heuschrecken ähnliches, besonders in der Nacht aktives Insekt, bei dem die männlichen Tiere einen zirpenden Laut hervorbringen +Def.: sehr sonderbarer, schrulliger Gedanke, Einfall +Definitional bias: 0.0202 + +FEMINISMUS +Def.: Richtung der Frauenbewegung, die, von den Bedürfnissen der Frau ausgehend, eine grundlegende Veränderung der gesellschaftlichen Normen (z. B. der traditionellen Rollenverteilung) und der patriarchalischen Kultur anstrebt +Def.: das Vorhandensein oder die Ausbildung weiblicher Geschlechtsmerkmale beim Mann oder bei einem männlichen Tier; Verweiblichung +Definitional bias: -0.0011 + +URSPRÜNGLICHKEIT +Def.: ursprüngliche Beschaffenheit +Def.: ursprüngliches Wesen, Natürlichkeit +Def.: etwas Ursprüngliches +Definitional bias: -0.0100 + +ZIKADE +Def.: kleines, der Grille ähnliches Insekt, bei dem die männlichen Tiere laute, zirpende Töne hervorbringen +Definitional bias: -0.0121 + +KUDU +Def.: (in Afrika heimische) Antilope mit braunrotem, weiße Querstreifen aufweisendem Fell, vom Hals zum Rücken verlaufender kurzer Mähne und (beim männlichen Tier) gedrehten Hörnern +Definitional bias: -0.0127 + +FRAKTIONSSPRECHER +Def.: Sprecher einer Fraktion +Definitional bias: -0.0179 + +FRENULUM +Def.: kleine Haut- bzw. Schleimhautfalte +Def.: Hautfalte, die die Eichel des männlichen Gliedes mit der Vorhaut verbindet +Definitional bias: -0.0202 + +SCHAF +Def.: mittelgroßes Säugetier mit dickem, wolligem Fell und beim männlichen Tier oft großen, gewundenen Hörnern, das als Wolle, Fleisch, auch Milch lieferndes Nutztier gehalten wird +Def.: gutmütig-einfältiger Mensch +Def.: Kosewort, besonders für Kinder +Definitional bias: -0.0237 + +GEGENÜBER +Def.: auf der entgegengesetzten Seite +Def.: Person, die jemandem gegenübersitzt oder -steht +Def.: Bewohner[in] der gegenüberliegenden Häuserfront +Def.: das Entgegengesetztsein +Definitional bias: -0.0246 + +AASFRESSER +Def.: Tier, das sich von Aas ernährt +Definitional bias: -0.0408 + +ZIMT +Def.: Gewürz aus der getrockneten Rinde des Zimtbaumes, das zum Würzen von Süßspeisen, Glühwein o. Ä. verwendet wird +Def.: etwas, was für dumm, unsinnig, wertlos gehalten wird, was jemandem lästig o. ä. ist +Definitional bias: -0.0415 + +ATLANT +Def.: Gebälkträger in Form einer männlichen Figur +Definitional bias: -0.0488 + +KNOBLAUCH +Def.: (zu den Lauchen gehörende) Pflanze mit Doldenblüten, die zahlreiche um die Sprossachse herum angeordnete Brutzwiebeln ausbildet +Def.: als Gewürz und Heilmittel verwendete Brutzwiebeln des Knoblauchs mit strengem, durchdringendem Geruch und Geschmack +Definitional bias: -0.0526 + +MOSCHUS +Def.: stark riechendes Sekret der männlichen Moschustiere, das besonders bei der Herstellung von Parfums verwendet wird +Def.: aus Moschus gewonnener oder ähnlicher synthetisch hergestellter Duftstoff +Definitional bias: -0.0533 + +BROKKOLI +Def.: dem Blumenkohl ähnlicher Gemüsekohl mit grünem Blütenstand +Definitional bias: -0.0580 + +NEBENHODEN +Def.: den Samen speicherndes und ableitendes Organ des männlichen Geschlechtsapparates +Definitional bias: -0.0582 + +ER +Def.: Person oder Tier männlichen Geschlechts +Definitional bias: -0.0702 + +EICHEL +Def.: länglich runde Frucht der Eiche +Def.: vorderster Teil des männlichen Gliedes +Def.: vorderster Teil des Kitzlers +Def.: Farbe im deutschen Kartenspiel; Eckern +Definitional bias: -0.0722 + +PANZERABWEHRRAKETE +Def.: zur Panzerabwehr eingesetzte Rakete +Definitional bias: -0.0755 + +ANIS +Def.: (zu den Doldengewächsen gehörende) Pflanze mit kleinen weißen Doldenblüten, die als Gewürz- und Heilpflanze verwendet wird +Def.: als Gewürz verwendete dem Kümmel ähnliche getrocknete Früchte des Anis +Def.: auf der Grundlage von Anis hergestellter Branntwein +Definitional bias: -0.1150 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_cap30000.txt b/DD-GloVe/seed_word_tests/mann_seedwords_cap30000.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a629e21993bffe45eff6d7f78fe5b9d8b526a5a --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_cap30000.txt @@ -0,0 +1,168 @@ +MANN +Def.: erwachsene Person männlichen Geschlechts +Def.: Ehemann +Def.: Lehns-, Gefolgsleute +Def.: als burschikose Anrede, ohne persönlichen Bezug in Ausrufen des Staunens, Erschreckens, der Bewunderung +Definitional bias: 0.1571 + +MÄRTYRER +Def.: jemand, der um des christlichen Glaubens willen Verfolgungen, schweres körperliches Leid, den Tod auf sich nimmt +Def.: jemand, der sich für seine Überzeugung opfert oder Verfolgungen auf sich nimmt +Definitional bias: 0.1413 + +ENTLASTEN +Def.: die Beanspruchung einer Person oder Sache mindern +Def.: von einer seelischen Belastung befreien +Def.: [teilweise] von einer zur Last gelegten Schuld befreien +Def.: jemandes Geschäftsführung nach Prüfung gutheißen +Def.: durch Tilgung einer Schuld ausgleichen +Definitional bias: 0.1123 + +VISTA +Def.: das Vorzeigen eines Wechsels +Definitional bias: 0.1081 + +PRODUZIEREN +Def.: erzeugen, herstellen +Def.: für die Herstellung, Finanzierung von etwas sorgen, jemandes Musik-, Filmproduzent sein +Def.: machen; hervorbringen +Def.: sich [in einer bestimmten Weise] auffallend benehmen [um sein Können zu zeigen] +Def.: [herausnehmen und] vorzeigen, vorlegen, präsentieren +Definitional bias: 0.0854 + +RECHTSSTREIT +Def.: zwischen zwei Parteien bzw. Beteiligten in einem gerichtlichen Verfahren ausgetragene Auseinandersetzung über ein Rechtsverhältnis; Prozess +Definitional bias: 0.0769 + +ERNEUERUNG +Def.: das Erneuern; das Erneuertwerden +Definitional bias: 0.0758 + +VERSCHLÜSSELUNG +Def.: das Verschlüsseln; das Verschlüsseltwerden +Definitional bias: 0.0701 + +DIÖZESE +Def.: Amtsgebiet eines katholischen Bischofs; Bistum +Definitional bias: 0.0667 + +EXAMEN +Def.: Prüfung (besonders als Studienabschluss) +Definitional bias: 0.0358 + +KRIEGSBEDINGT +Def.: durch den Krieg bedingt, verursacht +Definitional bias: 0.0344 + +OSTERN +Def.: Fest der christlichen Kirche, mit dem die Auferstehung Christi gefeiert wird +Definitional bias: 0.0317 + +ÜBERZEUGT +Def.: fest an etwas Bestimmtes glaubend +Definitional bias: 0.0268 + +GEOGRAFIE +Def.: Wissenschaft von der Erde und ihrem Aufbau, von der Verteilung und Verknüpfung der verschiedensten Erscheinungen und Sachverhalte der Erdoberfläche, besonders hinsichtlich der Wechselwirkung zwischen Erde und Mensch; Erdkunde +Def.: geografisch bestimmter Raum +Definitional bias: 0.0198 + +AMBO +Def.: erhöhtes Pult in christlichen Kirchen für gottesdienstliche Lesungen +Def.: Verbindung zweier Größen in der Kombinationsrechnung +Def.: Lottotreffer mit zwei gezogenen Nummern +Definitional bias: 0.0193 + +KOMMEND +Def.: bevorstehend, nächst... +Definitional bias: -0.0054 + +FEMINISMUS +Def.: Richtung der Frauenbewegung, die, von den Bedürfnissen der Frau ausgehend, eine grundlegende Veränderung der gesellschaftlichen Normen (z. B. der traditionellen Rollenverteilung) und der patriarchalischen Kultur anstrebt +Def.: das Vorhandensein oder die Ausbildung weiblicher Geschlechtsmerkmale beim Mann oder bei einem männlichen Tier; Verweiblichung +Definitional bias: -0.0056 + +SCHNEIDER +Def.: Handwerker, der (aus Stoffen nach Maß) Kleidung anfertigt, näht +Def.: das Erreichen der Punktzahl 30 (als Verlierer) +Def.: (in einem Satz) das Erreichen von 11 Punkten (als Verlierer) +Def.: (in Bezug auf Hirsche, auch Auerhähne und Birkhähne) schwach entwickeltes Tier +Def.: Jäger, der auf der Treibjagd ohne Beute geblieben ist +Def.: kastrierter Eber +Def.: langbeiniges Insekt +Def.: Weberknecht +Def.: kleiner Karpfenfisch mit bräunlich grünem Rücken und gelblichen Bauch- und Brustflossen +Definitional bias: -0.0064 + +SCHWERPUNKT +Def.: Punkt, der als Angriffspunkt der (auf einen Körper oder ein anderes physikalisches System wirkenden) Schwerkraft zu denken ist +Def.: Zentrum +Definitional bias: -0.0079 + +ORTSMITTE +Def.: Mittelpunkt, Zentrum eines Ortes; Ortskern +Definitional bias: -0.0220 + +GEGENÜBER +Def.: auf der entgegengesetzten Seite +Def.: Person, die jemandem gegenübersitzt oder -steht +Def.: Bewohner[in] der gegenüberliegenden Häuserfront +Def.: das Entgegengesetztsein +Definitional bias: -0.0334 + +EIFÖRMIG +Def.: die Form eines Eis aufweisend +Definitional bias: -0.0468 + +KNOBLAUCH +Def.: (zu den Lauchen gehörende) Pflanze mit Doldenblüten, die zahlreiche um die Sprossachse herum angeordnete Brutzwiebeln ausbildet +Def.: als Gewürz und Heilmittel verwendete Brutzwiebeln des Knoblauchs mit strengem, durchdringendem Geruch und Geschmack +Definitional bias: -0.0519 + +JUNGE +Def.: Kind männlichen Geschlechts; Knabe +Def.: [junger] Mann +Def.: Bube +Definitional bias: -0.0568 + +MOHN +Def.: Milchsaft enthaltende Pflanze mit roten, violetten, gelben oder weißen Blüten und Kapselfrüchten, aus deren ölhaltigen Samen beruhigende und betäubende Stoffe gewonnen werden +Def.: Klatschmohn +Def.: Samen des Mohns +Definitional bias: -0.0636 + +LÖWE +Def.: (in Afrika heimisches) großes katzenartiges Raubtier mit kurzem graugelbem bis ockerfarbenem Fell, langem Schwanz und beim männlichen Tier langer Mähne um Nacken und Schultern +Def.: Wappentier in Gestalt eines Löwen +Def.: Tierkreiszeichen für die Zeit vom 23. 7. bis 23. 8. +Def.: jemand, der im Zeichen Löwe geboren ist +Def.: Sternbild beiderseits des Himmelsäquators +Definitional bias: -0.0711 + +ER +Def.: Person oder Tier männlichen Geschlechts +Definitional bias: -0.0732 + +ANDERWEITIG +Def.: sonst noch vorhanden, sonstig, weiter..., ander... +Def.: anderswo erfolgend, an anderer Stelle +Def.: anderswohin erfolgend, an eine andere Stelle, Person +Definitional bias: -0.0768 + +HERR +Def.: Mann (auch als übliche höfliche Bezeichnung für eine männliche Person im gesellschaftlichen Verkehr) +Def.: gebildeter, kultivierter, gepflegter Mann +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person männlichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Def.: jemand, der über andere oder über etwas herrscht; Gebieter; Besitzer +Def.: jemand, der jemanden, etwas unter Kontrolle hat, beherrscht +Def.: Gott +Definitional bias: -0.0809 + +APFEL +Def.: rundliche, fest-fleischige, aromatisch schmeckende Frucht mit Kerngehäuse; Frucht des Apfelbaums +Def.: Apfelbaum +Def.: Apfelsorte +Def.: Brüste +Definitional bias: -0.0940 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_cap30000_top10.txt b/DD-GloVe/seed_word_tests/mann_seedwords_cap30000_top10.txt new file mode 100644 index 0000000000000000000000000000000000000000..eacd7f7704e1e87a5307103f0a0f88a023ad7cde --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_cap30000_top10.txt @@ -0,0 +1,61 @@ +MANN +Def.: erwachsene Person männlichen Geschlechts +Def.: Ehemann +Def.: Lehns-, Gefolgsleute +Def.: als burschikose Anrede, ohne persönlichen Bezug in Ausrufen des Staunens, Erschreckens, der Bewunderung +Definitional bias: 0.0910 + +FEMINISMUS +Def.: Richtung der Frauenbewegung, die, von den Bedürfnissen der Frau ausgehend, eine grundlegende Veränderung der gesellschaftlichen Normen (z. B. der traditionellen Rollenverteilung) und der patriarchalischen Kultur anstrebt +Def.: das Vorhandensein oder die Ausbildung weiblicher Geschlechtsmerkmale beim Mann oder bei einem männlichen Tier; Verweiblichung +Definitional bias: 0.0829 + +PRODUZIEREN +Def.: erzeugen, herstellen +Def.: für die Herstellung, Finanzierung von etwas sorgen, jemandes Musik-, Filmproduzent sein +Def.: machen; hervorbringen +Def.: sich [in einer bestimmten Weise] auffallend benehmen [um sein Können zu zeigen] +Def.: [herausnehmen und] vorzeigen, vorlegen, präsentieren +Definitional bias: 0.0683 + +JUNGE +Def.: Kind männlichen Geschlechts; Knabe +Def.: [junger] Mann +Def.: Bube +Definitional bias: 0.0465 + +SCHWERPUNKT +Def.: Punkt, der als Angriffspunkt der (auf einen Körper oder ein anderes physikalisches System wirkenden) Schwerkraft zu denken ist +Def.: Zentrum +Definitional bias: 0.0398 + +KOMMEND +Def.: bevorstehend, nächst... +Definitional bias: -0.0181 + +KNOBLAUCH +Def.: (zu den Lauchen gehörende) Pflanze mit Doldenblüten, die zahlreiche um die Sprossachse herum angeordnete Brutzwiebeln ausbildet +Def.: als Gewürz und Heilmittel verwendete Brutzwiebeln des Knoblauchs mit strengem, durchdringendem Geruch und Geschmack +Definitional bias: -0.0212 + +GEGENÜBER +Def.: auf der entgegengesetzten Seite +Def.: Person, die jemandem gegenübersitzt oder -steht +Def.: Bewohner[in] der gegenüberliegenden Häuserfront +Def.: das Entgegengesetztsein +Definitional bias: -0.0274 + +ER +Def.: Person oder Tier männlichen Geschlechts +Definitional bias: -0.0348 + +HERR +Def.: Mann (auch als übliche höfliche Bezeichnung für eine männliche Person im gesellschaftlichen Verkehr) +Def.: gebildeter, kultivierter, gepflegter Mann +Def.: titelähnliche, auch als Anrede verwendete Bezeichnung für eine erwachsene Person männlichen Geschlechts +Def.: als Zusatz bei Verwandtschaftsbezeichnungen +Def.: jemand, der über andere oder über etwas herrscht; Gebieter; Besitzer +Def.: jemand, der jemanden, etwas unter Kontrolle hat, beherrscht +Def.: Gott +Definitional bias: -0.0574 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_custom_def.txt b/DD-GloVe/seed_word_tests/mann_seedwords_custom_def.txt new file mode 100644 index 0000000000000000000000000000000000000000..372ac7d9d1d0194649956943468ae9fed3b01485 --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_custom_def.txt @@ -0,0 +1,137 @@ +GROSSONKEL +Def.: Bruder eines Großelternteils +Def.: Ehemann einer Großtante +Definitional bias: 0.1305 + +CHARMEUR +Def.: Mann, der mit gezieltem Charme Frauen für sich einzunehmen versteht +Definitional bias: 0.1141 + +MANN +Def.: erwachsene Person männlichen Geschlechts +Def.: Ehemann +Def.: Lehns-, Gefolgsleute +Def.: als burschikose Anrede, ohne persönlichen Bezug in Ausrufen des Staunens, Erschreckens, der Bewunderung +Definitional bias: 0.1054 + +ZUSAMMENHÄNGEN +Def.: mit etwas, miteinander fest verbunden sein +Def.: mit etwas in Beziehung, in Zusammenhang stehen +Definitional bias: 0.0872 + +TEMPERATURGEFÄLLE +Def.: Gefälle der Temperatur +Definitional bias: 0.0698 + +THRONSAAL +Def.: Saal, in dem der Thron steht +Definitional bias: 0.0698 + +FRAUENFEINDLICH +Def.: den Frauen schadend, sie benachteiligend; die Benachteiligung der Frauen akzeptierend +Definitional bias: 0.0606 + +ZUSAMMENHALTEN +Def.: (von den Teilen eines Ganzen) fest miteinander verbunden bleiben +Def.: fest zueinanderstehen; eine (gegen äußere Gefahren o. Ä.) fest gefügte Einheit bilden +Def.: (Teile) miteinander verbinden; in einer festen Verbindung halten +Def.: am Auseinanderstreben hindern +Def.: vergleichend eins neben das andere halten, nebeneinanderhalten +Definitional bias: 0.0534 + +RINDERPEST +Def.: durch Viren hervorgerufene, meist tödlich verlaufende, sehr ansteckende Krankheit bei Rindern, die besonders mit einer Entzündung der Schleimhäute verbunden ist +Definitional bias: 0.0483 + +SUBVERSIV +Def.: Subversion betreibend; umstürzlerisch +Definitional bias: 0.0385 + +HERRSCHAFTSSYSTEM +Def.: Herrschaftsform +Definitional bias: 0.0329 + +UNTERABSCHNITT +Def.: kleinerer Abschnitt in einem größeren +Definitional bias: 0.0184 + +WEIBLICHKEIT +Def.: weibliches Geschlecht, weibliches Wesen, weibliche Art +Def.: Gesamtheit der [anwesenden] Frauen +Definitional bias: 0.0146 + +GLEICHSTELLUNGSBEAUFTRAGTE +Def.: von einer Behörde, einer Institution oder einem Unternehmen angestellte weibliche Person, die für die Durchsetzung der Gleichstellung von Männern und Frauen zuständig ist; Frauenbeauftragte +Definitional bias: 0.0143 + +MACHO +Def.: in der Art eines Machos; einem Macho entsprechend; machomäßig +Def.: sich [übertrieben] männlich gebender Mann +Definitional bias: -0.0055 + +BETREIBERGESELLSCHAFT +Def.: Betreiber +Definitional bias: -0.0064 + +KABELNETZBETREIBER +Def.: Betreiber eines Kabelnetzes +Definitional bias: -0.0064 + +KLEINUNTERNEHMER +Def.: Betreiber eines Kleinunternehmens +Definitional bias: -0.0064 + +UNTERGLIEDERUNG +Def.: das Untergliedern +Def.: kleinerer Abschnitt einer Gliederung +Definitional bias: -0.0270 + +ZEITUNGSJUNGE +Def.: Junge, der auf der Straße Zeitungen verkauft +Def.: Junge, der als Zeitungsausträger arbeitet +Definitional bias: -0.0320 + +KUH +Def.: weibliches Hausrind (nach dem ersten Kalben) +Def.: weibliches Tier von Rindern, Hirschen, Elefanten, Giraffen, Flusspferden u. a. +Def.: weibliche Person, über die sich jemand ärgert +Definitional bias: -0.0335 + +HEMAN +Def.: besonders männlich und potent wirkender Mann +Definitional bias: -0.0385 + +WITWE +Def.: Frau, deren Ehemann gestorben ist +Definitional bias: -0.0439 + +MUTTERTIER +Def.: weibliches Zuchttier +Def.: weibliches Tier, das gerade Junge geboren hat [und sie säugt und betreut] +Definitional bias: -0.0473 + +BETREIBERIN +Def.: weibliche Form zu Betreiber +Definitional bias: -0.0699 + +LEHRSTUHLINHABERIN +Def.: weibliche Form zu Lehrstuhlinhaber +Definitional bias: -0.0765 + +FISCHERIN +Def.: weibliche Form zu Fischer +Def.: Fischerfrau +Definitional bias: -0.0807 + +ZUGEZOGENE +Def.: weibliche Person, die zugezogen ist +Definitional bias: -0.0934 + +BETRUNKENE +Def.: weibliche Person, die betrunken ist +Definitional bias: -0.1024 + +POLIN +Def.: weibliche Form zu Pole +Definitional bias: -0.1151 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_full_def.txt b/DD-GloVe/seed_word_tests/mann_seedwords_full_def.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b77ed4d837c01282c8b29f0797340e34212c9e4 --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_full_def.txt @@ -0,0 +1,156 @@ +ABFÄLLIG +Def.: (in Bezug auf Äußerungen) ablehnend, missbilligend, abschätzig +Definitional bias: 0.1942 + +APHASIE +Def.: Verlust des Sprechvermögens oder Sprachverstehens infolge einer Erkrankung des Sprachzentrums im Gehirn +Def.: Enthaltung des Urteils in Bezug auf Dinge, über die nichts Sicheres bekannt ist +Definitional bias: 0.1659 + +IGNORANZ +Def.: tadelnswerte Unwissenheit, Kenntnislosigkeit in Bezug auf jemanden, etwas +Def.: das Ignorieren +Definitional bias: 0.1443 + +MUSTERGATTE +Def.: vorbildlicher Ehemann +Definitional bias: 0.1274 + +BESCHÖNIGEN +Def.: etwas [Schlechtes, Fehlerhaftes] als nicht so schwerwiegend darstellen, etwas allzu günstig darstellen; schönfärben +Definitional bias: 0.1193 + +SCHWULENSZENE +Def.: Milieu, Szene der männlichen Homosexuellen +Definitional bias: 0.1041 + +MANN +Def.: erwachsene Person männlichen Geschlechts +Def.: Ehemann +Def.: Lehns-, Gefolgsleute +Def.: als burschikose Anrede, ohne persönlichen Bezug in Ausrufen des Staunens, Erschreckens, der Bewunderung +Definitional bias: 0.1028 + +ZURÜCKKOMMEN +Def.: wieder am Ausgangsort, -punkt ankommen; wiederkommen +Def.: sich wieder einstellen +Def.: zurückgelegt, -gebracht o. Ä. werden +Def.: wieder an den Ausgangspunkt gelangen +Def.: etwas wieder aufgreifen; auf jemanden, etwas wieder Bezug nehmen +Definitional bias: 0.1005 + +EINTRETEN +Def.: einlaufen +Def.: in einen Raum hineingehen oder hereinkommen; einen Raum durch eine Tür betreten +Def.: durch Tritte zerstören [und sich dadurch Zugang zu etwas verschaffen] +Def.: jemandem, einem Tier eine Zeit lang ohne Unterbrechung [unbeherrscht] Fußtritte versetzen +Def.: versehentlich auf etwas Spitzes treten, sodass es in die Fußsohle dringt +Def.: durch Darauftreten in den Boden drücken +Def.: einer Gemeinschaft, Organisation o. Ä. beitreten, Mitglied werden +Def.: (in einem Bewegungsablauf o. Ä.) in einen bestimmten Bereich gelangen +Def.: mit etwas beginnen; etwas [offiziell] eröffnen, anfangen lassen +Def.: [unerwartet] in einen Ablauf eingreifend, eine Situation verändernd sich ereignen, geschehen +Def.: sich für jemanden, etwas mit Entschiedenheit öffentlich einsetzen +Def.: auf eine Angelegenheit, ein Thema näher eingehen, sich damit befassen +Definitional bias: 0.1003 + +PRAXISBEZUG +Def.: Bezug zur Praxis +Definitional bias: 0.0990 + +KORREKTERWEISE +Def.: (in Bezug auf ein Verhalten o. Ä.) wie es richtig ist und auch erwartet wird +Definitional bias: 0.0988 + +LESBISCH +Def.: zu Lesbos +Def.: (in Bezug auf Frauen) homosexuell +Definitional bias: 0.0987 + +KIRCHENPOLITIK +Def.: Gesamtheit der Maßnahmen eines Staates in Bezug auf die Kirche +Definitional bias: 0.0901 + +GATTE +Def.: Ehemann +Def.: Eheleute +Definitional bias: 0.0900 + +GEHÖRNTER +Def.: betrogener Ehemann +Def.: Teufel +Definitional bias: 0.0822 + +EHEGATTE +Def.: Ehemann +Def.: einer der beiden Partner einer Ehe +Definitional bias: 0.0771 + +ZUTREFFEND +Def.: (in Bezug auf eine Feststellung o. Ä.) richtig +Definitional bias: 0.0631 + +SCHWIEGERSOHN +Def.: Ehemann der Tochter +Definitional bias: 0.0572 + +DROGENABHÄNGIGER +Def.: jemand, der drogenabhängig ist +Definitional bias: 0.0563 + +HEROINABHÄNGIG +Def.: drogenabhängig +Definitional bias: 0.0563 + +RENTENREFORM +Def.: Reform der gesetzlichen Rentenversicherung +Definitional bias: 0.0426 + +SCHNEIDER +Def.: Handwerker, der (aus Stoffen nach Maß) Kleidung anfertigt, näht +Def.: das Erreichen der Punktzahl 30 (als Verlierer) +Def.: (in einem Satz) das Erreichen von 11 Punkten (als Verlierer) +Def.: (in Bezug auf Hirsche, auch Auerhähne und Birkhähne) schwach entwickeltes Tier +Def.: Jäger, der auf der Treibjagd ohne Beute geblieben ist +Def.: kastrierter Eber +Def.: langbeiniges Insekt +Def.: Weberknecht +Def.: kleiner Karpfenfisch mit bräunlich grünem Rücken und gelblichen Bauch- und Brustflossen +Definitional bias: 0.0197 + +LUFTIG +Def.: (besonders in Bezug auf einen Raum) [hell und groß und] mit genügend Luftzufuhr +Def.: hoch in der Luft, in der Höhe angesiedelt o. Ä. +Def.: (besonders in Bezug auf Kleidung) leicht und luftdurchlässig +Definitional bias: 0.0168 + +LAUFLEISTUNG +Def.: Leistung, die ein Fahrzeug, Reifen o. Ä. in Bezug auf die zurückgelegte Strecke erbringt +Def.: Leistung, die eine Maschine in Bezug auf die zeitliche Dauer erbringt +Definitional bias: 0.0124 + +AUSSERORTS +Def.: (in Bezug auf den Straßenverkehr) außerhalb des Ortes +Definitional bias: 0.0091 + +KAISERADLER +Def.: großer, schwarzbrauner Adler im Mittelmeerraum und in Osteuropa +Definitional bias: -0.0203 + +ÜBERROLLBÜGEL +Def.: (besonders bei Sport- oder Rennwagen) über dem Sitz verlaufender breiter Bügel aus Stahl, der dem Fahrer Schutz bieten soll, falls sich der Wagen bei einem Unfall überschlägt +Definitional bias: -0.0203 + +RAINFARN +Def.: (zu den Korbblütlern gehörende) Pflanze mit fiederteiligen Blättern und zahlreichen halbkugeligen, gelben Blütenköpfchen +Definitional bias: -0.0333 + +WITWE +Def.: Frau, deren Ehemann gestorben ist +Definitional bias: -0.0515 + +JUTE +Def.: (in tropischen Gebieten heimische) hochwachsende Pflanze mit gesägten Blättern und kleinen, gelben Blüten, deren Stängel Bast enthält +Def.: aus dem Stängel der Jute gewonnene Bastfaser, die besonders zur Herstellung von Garn, Säcken o. Ä. verwendet wird +Definitional bias: -0.0517 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_less_stopwords.txt b/DD-GloVe/seed_word_tests/mann_seedwords_less_stopwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..86b7c3bcd1a144994ad3bb68f616c8d83510cae6 --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_less_stopwords.txt @@ -0,0 +1,49 @@ +ABFÄLLIG +Def.: (in Bezug auf Äußerungen) ablehnend, missbilligend, abschätzig +Definitional bias: 0.2061 + +BODENTURNEN +Def.: Gesamtheit der ohne Gerät auf einer Matte am Boden ausgeführten turnerischen Übungen +Definitional bias: 0.1205 + +MANN +Def.: erwachsene Person männlichen Geschlechts +Def.: Ehemann +Def.: Lehns-, Gefolgsleute +Def.: als burschikose Anrede, ohne persönlichen Bezug in Ausrufen des Staunens, Erschreckens, der Bewunderung +Definitional bias: 0.1028 + +LESBISCH +Def.: zu Lesbos +Def.: (in Bezug auf Frauen) homosexuell +Definitional bias: 0.0934 + +GEHÖRNTER +Def.: betrogener Ehemann +Def.: Teufel +Definitional bias: 0.0804 + +SCHNEIDER +Def.: Handwerker, der (aus Stoffen nach Maß) Kleidung anfertigt, näht +Def.: das Erreichen der Punktzahl 30 (als Verlierer) +Def.: (in einem Satz) das Erreichen von 11 Punkten (als Verlierer) +Def.: (in Bezug auf Hirsche, auch Auerhähne und Birkhähne) schwach entwickeltes Tier +Def.: Jäger, der auf der Treibjagd ohne Beute geblieben ist +Def.: kastrierter Eber +Def.: langbeiniges Insekt +Def.: Weberknecht +Def.: kleiner Karpfenfisch mit bräunlich grünem Rücken und gelblichen Bauch- und Brustflossen +Definitional bias: 0.0729 + +KABELLOS +Def.: ohne Kabel funktionierend +Definitional bias: 0.0592 + +ÜBERROLLBÜGEL +Def.: (besonders bei Sport- oder Rennwagen) über dem Sitz verlaufender breiter Bügel aus Stahl, der dem Fahrer Schutz bieten soll, falls sich der Wagen bei einem Unfall überschlägt +Definitional bias: 0.0437 + +WITWE +Def.: Frau, deren Ehemann gestorben ist +Definitional bias: -0.0565 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_own_calc_projection.txt b/DD-GloVe/seed_word_tests/mann_seedwords_own_calc_projection.txt new file mode 100644 index 0000000000000000000000000000000000000000..a029735a65ebbf41e4c0bbf20842bd62f6c33ffb --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_own_calc_projection.txt @@ -0,0 +1,51 @@ +BETROFFENHEIT +Def.: das Betroffensein; Bestürzung +Definitional bias: 0.1971 + +BEWUNDERUNG +Def.: das Bewundern; große Anerkennung, Hochachtung +Definitional bias: 0.1671 + +BEDAUERN +Def.: mitfühlende Anteilnahme, Mitleid, Mitgefühl +Def.: Betrübnis +Def.: Mitgefühl mit jemandem empfinden; jemanden bemitleiden +Def.: unerfreulich, schade finden +Definitional bias: 0.1579 + +LOYALITÄT +Def.: loyale Gesinnung, Haltung, Verhaltensweise +Definitional bias: 0.1292 + +DANKBARKEIT +Def.: Gefühl, Ausdruck des Dankes; dankbare Empfindung, Gesinnung +Def.: das Lohnendsein +Def.: Haltbarkeit, Strapazierfähigkeit +Def.: (von [Topf]pflanzen) Anspruchslosigkeit +Definitional bias: 0.1175 + +AUSRUFEN +Def.: spontan, in einem Ausruf äußern +Def.: [laut rufend] nennen, mitteilen, bekannt geben +Def.: öffentlich, offiziell verkünden, proklamieren +Def.: rufend zum Kauf anbieten, feilbieten +Definitional bias: 0.1166 + +BEREICHERUNG +Def.: das Bereichern +Def.: das Sichbereichern +Def.: Nutzen, Gewinn +Definitional bias: 0.0933 + +GEFOLGSLEUTE +Def.: Plural von Gefolgsmann +Def.: Gesamtheit der Anhängerinnen und Anhänger +Definitional bias: 0.0933 + +ABNEIGUNG +Def.: deutlich bewusste Empfindung, jemanden oder etwas nicht zu mögen +Definitional bias: 0.0890 + +PERSÖNLICHEN +Definitional bias: 0.0000 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_own_calculations.txt b/DD-GloVe/seed_word_tests/mann_seedwords_own_calculations.txt new file mode 100644 index 0000000000000000000000000000000000000000..b93db20a3dab125ad087801db54993b581353855 --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_own_calculations.txt @@ -0,0 +1,134 @@ +BETROFFENHEIT +Def.: das Betroffensein; Bestürzung +Definitional bias: 0.1971 + +BEWUNDERUNG +Def.: das Bewundern; große Anerkennung, Hochachtung +Definitional bias: 0.1671 + +BEDAUERN +Def.: mitfühlende Anteilnahme, Mitleid, Mitgefühl +Def.: Betrübnis +Def.: Mitgefühl mit jemandem empfinden; jemanden bemitleiden +Def.: unerfreulich, schade finden +Definitional bias: 0.1579 + +ERGEBENHEIT +Def.: Treue, Fügsamkeit, Hingegebensein +Def.: klagloses Sichfügen +Definitional bias: 0.1545 + +BESTÜRZUNG +Def.: Erschütterung +Definitional bias: 0.1370 + +LOYALITÄT +Def.: loyale Gesinnung, Haltung, Verhaltensweise +Definitional bias: 0.1292 + +VERBUNDENHEIT +Def.: [Gefühl der] Zusammengehörigkeit mit jemandem, miteinander +Definitional bias: 0.1276 + +SELBSTZWEIFEL +Def.: auf sich selbst, sein eigenes Denken und Tun gerichteter Zweifel +Definitional bias: 0.1264 + +FEINDSCHAFT +Def.: Haltung einem anderen Menschen gegenüber, die von dem Wunsch bestimmt ist, diesem zu schaden, ihn zu bekämpfen oder sogar zu vernichten +Def.: durch gegenseitige Feindschaft geprägte Beziehung zwischen Menschen +Definitional bias: 0.1225 + +MISSTRAUEN +Def.: kritische, das Selbstverständliche bezweifelnde Einstellung gegenüber einem Sachverhalt, das Zweifeln an der Vertrauenswürdigkeit einer Person; Argwohn, Skepsis +Def.: nicht trauen +Definitional bias: 0.1196 + +DANKBARKEIT +Def.: Gefühl, Ausdruck des Dankes; dankbare Empfindung, Gesinnung +Def.: das Lohnendsein +Def.: Haltbarkeit, Strapazierfähigkeit +Def.: (von [Topf]pflanzen) Anspruchslosigkeit +Definitional bias: 0.1175 + +AUSRUFEN +Def.: spontan, in einem Ausruf äußern +Def.: [laut rufend] nennen, mitteilen, bekannt geben +Def.: öffentlich, offiziell verkünden, proklamieren +Def.: rufend zum Kauf anbieten, feilbieten +Definitional bias: 0.1166 + +SORGLOSIGKEIT +Def.: das Sorglossein +Def.: Nachlässigkeit, Unachtsamkeit +Definitional bias: 0.1154 + +WEIGERUNG +Def.: das [Sich]weigern +Definitional bias: 0.1115 + +EHRGEIZ +Def.: starkes oder übertriebenes Streben nach Erfolg und Ehren +Definitional bias: 0.1109 + +UNFÄHIGKEIT +Def.: das Unfähigsein; Mangel an Kompetenz, an Handlungsmöglichkeiten +Definitional bias: 0.1101 + +ZWEIFELN +Def.: unsicher sein in Bezug auf einen Sachverhalt oder ein [künftiges] Geschehen; infrage stellen, in Zweifel ziehen +Definitional bias: 0.1074 + +ENTTÄUSCHUNG +Def.: Nichterfüllung einer Hoffnung oder Erwartung, die jemanden unzufrieden o. ä. stimmt +Def.: das Enttäuschtsein +Definitional bias: 0.1023 + +BEREICHERUNG +Def.: das Bereichern +Def.: das Sichbereichern +Def.: Nutzen, Gewinn +Definitional bias: 0.0933 + +GEFOLGSLEUTE +Def.: Plural von Gefolgsmann +Def.: Gesamtheit der Anhängerinnen und Anhänger +Definitional bias: 0.0933 + +MISSFALLEN +Def.: Unzufriedenheit, Nichteinverstandensein mit einem Vorgang, einer Verhaltensweise o. Ä. +Def.: Missfallen auslösen, hervorrufen +Definitional bias: 0.0930 + +ABNEIGUNG +Def.: deutlich bewusste Empfindung, jemanden oder etwas nicht zu mögen +Definitional bias: 0.0890 + +SACHVERSTAND +Def.: genaue, zuverlässige Kenntnisse auf einem bestimmten Gebiet, die zu einer entsprechenden Tätigkeit, der Beurteilung, Einschätzung o. Ä. von etwas befähigen +Definitional bias: 0.0473 + +ZUFRIEDENHEIT +Def.: das Zufriedensein +Def.: Grad, Ausmaß des Zufriedenseins +Definitional bias: 0.0291 + +MITSTREITER +Def.: jemand, der mit anderen zusammen für oder gegen etwas eintritt, sich einsetzt, kämpft +Definitional bias: 0.0080 + +PERSÖNLICHEN +Definitional bias: 0.0000 + +PERSÖNLICHER +Definitional bias: 0.0000 + +VERTRAUTEN +Definitional bias: 0.0000 + +WEGGEFÄHRTEN +Definitional bias: 0.0000 + +SCHUTZAUSRÜSTUNGEN +Definitional bias: 0.0000 + diff --git a/DD-GloVe/seed_word_tests/mann_seedwords_top10.txt b/DD-GloVe/seed_word_tests/mann_seedwords_top10.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a966e448ba3fd48eda84d9ea50d565f2bc6f2aa --- /dev/null +++ b/DD-GloVe/seed_word_tests/mann_seedwords_top10.txt @@ -0,0 +1,45 @@ +SCHWULENSZENE +Def.: Milieu, Szene der männlichen Homosexuellen +Definitional bias: 0.0890 + +STIMMBRUCH +Def.: Stimmwechsel bei männlichen Jugendlichen in der Pubertät, der sich in einer zwischen Höhe und Tiefe unkontrolliert schwankenden, leicht überschnappenden Stimme ausdrückt und zu einem allmählichen Tieferwerden der Stimme führt +Definitional bias: 0.0811 + +ZIKADE +Def.: kleines, der Grille ähnliches Insekt, bei dem die männlichen Tiere laute, zirpende Töne hervorbringen +Definitional bias: 0.0314 + +MOSCHUS +Def.: stark riechendes Sekret der männlichen Moschustiere, das besonders bei der Herstellung von Parfums verwendet wird +Def.: aus Moschus gewonnener oder ähnlicher synthetisch hergestellter Duftstoff +Definitional bias: 0.0130 + +ATLANT +Def.: Gebälkträger in Form einer männlichen Figur +Definitional bias: -0.0024 + +KUDU +Def.: (in Afrika heimische) Antilope mit braunrotem, weiße Querstreifen aufweisendem Fell, vom Hals zum Rücken verlaufender kurzer Mähne und (beim männlichen Tier) gedrehten Hörnern +Definitional bias: -0.0105 + +FRENULUM +Def.: kleine Haut- bzw. Schleimhautfalte +Def.: Hautfalte, die die Eichel des männlichen Gliedes mit der Vorhaut verbindet +Definitional bias: -0.0280 + +ER +Def.: Person oder Tier männlichen Geschlechts +Definitional bias: -0.0308 + +NEBENHODEN +Def.: den Samen speicherndes und ableitendes Organ des männlichen Geschlechtsapparates +Definitional bias: -0.0349 + +EICHEL +Def.: länglich runde Frucht der Eiche +Def.: vorderster Teil des männlichen Gliedes +Def.: vorderster Teil des Kitzlers +Def.: Farbe im deutschen Kartenspiel; Eckern +Definitional bias: -0.0622 + diff --git a/DD-GloVe/seed_word_tests/seed_word_algorithm_reimplementation.py b/DD-GloVe/seed_word_tests/seed_word_algorithm_reimplementation.py new file mode 100644 index 0000000000000000000000000000000000000000..b03d2c4d4015fd1cdf02675178413b5ac9f04222 --- /dev/null +++ b/DD-GloVe/seed_word_tests/seed_word_algorithm_reimplementation.py @@ -0,0 +1,77 @@ +"""Determine seed words as described by algorithm in the paper""" + +import ast +import pandas as pd +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity as cosine +from tqdm import tqdm + +# load embeddings +embeddings = pd.read_csv("/workspace/students/reichelt/BA/data/dd-glove/english_vectors_gender.txt", + skiprows=1, header=None, sep=" ") +embeddings.rename(columns={0: "token"}, inplace=True) # name first column "token" +embeddings["vector"] = embeddings.iloc[:, 1:].values.tolist() # convert other columns into one, containing 300-dim vec as a list + +# load definition indexes +definition_indexes = pd.read_csv("/workspace/students/reichelt/BA/data/dd-glove/english_definitions.txt", sep="\t", usecols=[1], header=None, names=["def_words"]) +definition_indexes = definition_indexes["def_words"].tolist() +definition_indexes = [ast.literal_eval(item) for item in definition_indexes] + +def get_definition_embedding(word_index: int) -> np.array: + """Calculate definition embedding by averaging embeddings of + words occurring in given definition. Definition is given by + a word index, i.e. 88 which is token 'mann'.""" + emb_sum = np.zeros(300) + try: + for i in definition_indexes[word_index]: # look at all definitional words + vec = np.array(embeddings["vector"].iloc[i]) # get embedding of a definitional word + emb_sum += vec # add to sum of all definitional embeddings + except TypeError as exc: + print(word_index) + print(definition_indexes[word_index]) + raise TypeError from exc + definitional_embedding = (1/len(definition_indexes[word_index])) * emb_sum # build mean + return definitional_embedding + +def calculate_definitional_bias(v_1: np.array, v_2: np.array, w: np.array) -> float: + """Calculate bias of a word using is definitional embedding. + Calculate the projection of the word's definitional embedding + along the difference between the seed words' definitional embeddings.""" + boy_def = v_1.reshape(1, -1) # reshape to fit sklearn's cosine function + girl_def = v_2.reshape(1, -1) + word_def = w.reshape(1, -1) + bias = cosine(word_def, boy_def) - cosine(word_def, girl_def) + return bias[0].item() # because of how sklearn's cosine works + +def alternate_bias(v_1: np.array, v_2: np.array, w: np.array) -> float: + """Alternatively try out using actual projection as described + textually in the paper instead of cosine - cosine. + DON'T convert w to a unit vector through division by ||w||. + Result: bias = w * (v1-v2) / ||(v1-v2)|| + """ + #unit_w = w / np.linalg.norm(w) + diff = v_1 - v_2 + dot_product = np.dot(w, diff) + norm = np.linalg.norm(diff) + return dot_product / norm + +# look up def embedding for initial seed words +mann_vec = get_definition_embedding(16) +frau_vec = get_definition_embedding(41) + +# for each word in vocab, calculate bias and save results in vocab dataframe +def def_bias_application(vector_column_value): + emb = np.array(vector_column_value) # dataframe contains vectors as lists in column "vector" + b = alternate_bias(mann_vec, frau_vec, emb) + return b + +# apply calculation to whole table with progress bar +tqdm.pandas() +embeddings["definitional_bias"] = embeddings["vector"].progress_apply(def_bias_application) + +# get top 10 highest and lowest indices (as described in paper) +boy_indices = embeddings["definitional_bias"].nlargest(30).index +girl_indices = embeddings["definitional_bias"].nsmallest(30).index + +print(f"boy indices: {boy_indices}") +print(f"girl indices {girl_indices}") diff --git a/DD-GloVe/seed_word_tests/she_seedwords.txt b/DD-GloVe/seed_word_tests/she_seedwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..3b89116ee4e89e54320f674f982a8a0f72084236 --- /dev/null +++ b/DD-GloVe/seed_word_tests/she_seedwords.txt @@ -0,0 +1,45 @@ +SHE +Def.: the female person or animal being discussed or last mentioned; that female. +Def.: the woman: She who listens learns. +Def.: anything considered, as by personification, to be feminine: spring, with all the memories she conjures up. +Def.: a female person or animal. +Def.: an object or device considered as female or feminine. +Def.: she or he: used as an orthographic device to avoid a gender-specific pronoun when the gender of the antecedent is unknown or irrelevant. +Def.: refers to a female person or animal: she is a doctor; she's a fine mare +Def.: refers to things personified as feminine, such as cars, ships, and nations +Def.: Australian and NZ an informal word for it 1 (def. 3) she's apples; she'll be right +Def.: +Def.: a female person or animal +Def.: (in combination): she-cat +Definitional bias: -0.0242 + +SHE/HE +Def.: the female person or animal being discussed or last mentioned; that female. +Def.: the woman: She who listens learns. +Def.: anything considered, as by personification, to be feminine: spring, with all the memories she conjures up. +Def.: a female person or animal. +Def.: an object or device considered as female or feminine. +Def.: she or he: used as an orthographic device to avoid a gender-specific pronoun when the gender of the antecedent is unknown or irrelevant. +Def.: refers to a female person or animal: she is a doctor; she's a fine mare +Def.: refers to things personified as feminine, such as cars, ships, and nations +Def.: Australian and NZ an informal word for it 1 (def. 3) she's apples; she'll be right +Def.: +Def.: a female person or animal +Def.: (in combination): she-cat +Definitional bias: -0.0242 + +SHE/HER +Def.: the female person or animal being discussed or last mentioned; that female. +Def.: the woman: She who listens learns. +Def.: anything considered, as by personification, to be feminine: spring, with all the memories she conjures up. +Def.: a female person or animal. +Def.: an object or device considered as female or feminine. +Def.: she or he: used as an orthographic device to avoid a gender-specific pronoun when the gender of the antecedent is unknown or irrelevant. +Def.: refers to a female person or animal: she is a doctor; she's a fine mare +Def.: refers to things personified as feminine, such as cars, ships, and nations +Def.: Australian and NZ an informal word for it 1 (def. 3) she's apples; she'll be right +Def.: +Def.: a female person or animal +Def.: (in combination): she-cat +Definitional bias: -0.0242 + diff --git a/DD-GloVe/seed_word_tests/she_seedwords_alternate_calc.txt b/DD-GloVe/seed_word_tests/she_seedwords_alternate_calc.txt new file mode 100644 index 0000000000000000000000000000000000000000..c1fb4498f361f95a5ff9a2020bdd7739f5b2e799 --- /dev/null +++ b/DD-GloVe/seed_word_tests/she_seedwords_alternate_calc.txt @@ -0,0 +1,669 @@ +WOMEN +Def.: the plural of woman. +Def.: the plural of woman +Definitional bias: -0.0437 + +SHE +Def.: the female person or animal being discussed or last mentioned; that female. +Def.: the woman: She who listens learns. +Def.: anything considered, as by personification, to be feminine: spring, with all the memories she conjures up. +Def.: a female person or animal. +Def.: an object or device considered as female or feminine. +Def.: she or he: used as an orthographic device to avoid a gender-specific pronoun when the gender of the antecedent is unknown or irrelevant. +Def.: refers to a female person or animal: she is a doctor; she's a fine mare +Def.: refers to things personified as feminine, such as cars, ships, and nations +Def.: Australian and NZ an informal word for it 1 (def. 3) she's apples; she'll be right +Def.: +Def.: a female person or animal +Def.: (in combination): she-cat +Definitional bias: -0.0242 + +FEMININE +Def.: being or relating to to a woman or girl: feminine beauty;feminine dress. +Def.: having qualities traditionally ascribed to women, such as sensitivity or gentleness. +Def.: effeminate; womanish: Growing up, he had been told he had a feminine walk. +Def.: Grammar. noting or pertaining to that one of the three genders of Latin, Greek, German, etc., or one of the two genders of French, Spanish, Hebrew, etc., having among its members most nouns referring to females, as well as other nouns, as Latin stella “star,†or German Zeit “time.†+Def.: the feminine gender. +Def.: a noun or other element in or marking the feminine gender. +Def.: suitable to or characteristic of a woman: a feminine fashion +Def.: possessing qualities or characteristics considered typical of or appropriate to a woman +Def.: effeminate; womanish +Def.: grammar +Def.: denoting or belonging to a gender of nouns, occurring in many inflected languages, that includes all kinds of referents as well as some female animate referents +Def.: (as noun): German Zeit ``time'' and Ehe ``marriage'' are feminines +Definitional bias: -0.0207 + +MASCULINE +Def.: pertaining to or characteristic of a man or men: masculine attire. +Def.: having qualities traditionally ascribed to men, as strength and boldness. +Def.: Grammar. noting or pertaining to the gender of Latin, Greek, German, French, Spanish, Hebrew, etc., which has among its members most nouns referring to males, as well as other nouns, as Spanish dedo, “finger,†German Bleistift, “pencil.†+Def.: (of a woman) mannish. +Def.: the masculine gender. +Def.: a noun or other element in or marking that gender. +Def.: possessing qualities or characteristics considered typical of or appropriate to a man; manly +Def.: unwomanly +Def.: grammar +Def.: denoting a gender of nouns, occurring in many inflected languages, that includes all kinds of referents as well as some male animate referents +Def.: (as noun): German ``Weg'' is a masculine +Definitional bias: -0.0179 + +GODDESS +Def.: a female god or deity. +Def.: a woman of extraordinary beauty and charm. +Def.: a greatly admired or adored woman: a domestic goddess who hosts lavish dinner parties. +Def.: a female divinity +Def.: a woman who is adored or idealized, esp by a man +Definitional bias: -0.0171 + +SEXUAL +Def.: of, relating to, or for sex: Publicly discussing sexual matters was frowned upon in those days.The store is known for selling sexual aids. +Def.: occurring between or involving the sexes: In humans, melatonin concentrations are related to sexual maturation, with significant decreases occurring at the onset of puberty. +Def.: having sexual organs or reproducing by processes involving both sexes: The patterns of genetic variation in sexual species tend to be very different from those in asexual species. +Def.: a combining form extracted from heterosexual and homosexual, used to describe a person's sexual orientation or identity:pansexual;polysexual. +Def.: a combining form extracted from metrosexual, used to describe a person's lifestyle, appearance, or way of dressing with regard to attracting or choosing a romantic partner:vegansexual;lumbersexual. +Def.: of, relating to, or characterized by sex or sexuality +Def.: (of reproduction) characterized by the union of male and female gametes: Compare asexual (def. 2) +Definitional bias: -0.0135 + +MYTHOLOGY +Def.: a body of myths, as that of a particular people or that relating to a particular person: Greek mythology. +Def.: myths collectively. +Def.: the science or study of myths. +Def.: a set of stories, traditions, or beliefs associated with a particular group or the history of an event, arising naturally or deliberately fostered: the Fascist mythology of the interwar years. +Def.: a body of myths, esp one associated with a particular culture, institution, person, etc +Def.: a body of stories about a person, institution, etc: the mythology of Hollywood +Def.: myths collectively +Def.: the study or collecting of myths +Definitional bias: -0.0061 + +FEMINIST +Def.: advocating social, political, legal, and economic rights for women equal to those of men. +Def.: an advocate of such rights. +Def.: a person who advocates equal rights for women +Def.: of, relating to, or advocating feminism +Definitional bias: -0.0057 + +THEORY +Def.: a coherent group of tested general propositions, commonly regarded as correct, that can be used as principles of explanation and prediction for a class of phenomena: Einstein's theory of relativity. +Def.: a proposed explanation whose status is still conjectural and subject to experimentation, in contrast to well-established propositions that are regarded as reporting matters of actual fact. +Def.: Mathematics. a body of principles, theorems, or the like, belonging to one subject: number theory. +Def.: the branch of a science or art that deals with its principles or methods, as distinguished from its practice: music theory. +Def.: a particular conception or view of something to be done or of the method of doing it; a system of rules or principles: conflicting theories of how children best learn to read. +Def.: contemplation or speculation: the theory that there is life on other planets. +Def.: guess or conjecture: My theory is that he never stops to think words have consequences. +Def.: in theory, ideally; hypothetically: In theory, mapping the human genome may lead to thousands of cures. +Def.: a system of rules, procedures, and assumptions used to produce a result +Def.: abstract knowledge or reasoning +Def.: a speculative or conjectural view or idea: I have a theory about that +Def.: an ideal or hypothetical situation (esp in the phrase in theory) +Def.: a set of hypotheses related by logical or mathematical arguments to explain and predict a wide variety of connected phenomena in general terms: the theory of relativity +Def.: a nontechnical name for hypothesis (def. 1) +Def.: A set of statements or principles devised to explain a group of facts or phenomena. Most theories that are accepted by scientists have been repeatedly tested by experiments and can be used to make predictions about natural phenomena. See Note at hypothesis. +Definitional bias: -0.0049 + +FEMALE +Def.: relating to or being a woman or girl. +Def.: Biology. +Def.: of, relating to, or being a person with a certain combination of sex characteristics, commonly including two X chromosomes in the cell nuclei, a vagina, a uterus and ovaries, and enlarged breasts developed at puberty. +Def.: of, relating to, or being an animal, plant, or plant structure of the sex or sexual phase that normally produces egg cells during reproduction. +Def.: of, relating to, or characteristic of a female person; feminine: female suffrage;female charm. +Def.: comprising women or girls: a female readership. +Def.: Botany. +Def.: designating or pertaining to a plant or its reproductive structure that produces or contains elements requiring fertilization. +Def.: (of seed plants) pistillate. +Def.: Machinery. being or having a recessed part into which a corresponding part fits: a female plug.: Compare male (def. 3). +Def.: a female person.: See Usage note at the current entry. +Def.: Biology. an animal, plant, or plant structure of the sex or sexual phase that normally produces egg cells during reproduction. +Def.: of, relating to, or designating the sex producing gametes (ova) that can be fertilized by male gametes (spermatozoa) +Def.: of, relating to, or characteristic of a woman: female charm +Def.: for or composed of women or girls: female suffrage; a female choir +Def.: (of reproductive organs such as the ovary and carpel) capable of producing female gametes +Def.: (of gametes such as the ovum) capable of being fertilized by a male gamete in sexual reproduction +Def.: (of flowers) lacking, or having nonfunctional, stamens +Def.: having an internal cavity into which a projecting male counterpart can be fitted: a female thread +Def.: +Def.: a female animal or plant +Def.: derogatory a woman or girl +Def.: In organisms that reproduce sexually, being the gamete that is larger and less motile than the other corresponding gamete (the male gamete) of the same species. The egg cells of higher animals and plants are female gametes. +Def.: Possessing or being a structure that produces only female gametes. The ovaries of humans are female reproductive organs. Female flowers possess only carpels and no stamens. +Def.: Having the genitalia or other structures typical of a female organism. Worker ants are female but sterile. +Def.: A female organism. +Definitional bias: -0.0044 + +GENDER +Def.: either the male or female division of a species, especially as differentiated by social and cultural roles and behavior: the feminine gender. : Compare sex1 (def. 1). +Def.: a similar category of human beings that is outside the male/female binary classification.: See also third gender (def. 1), genderqueer (def. 3), nonbinary (def. 3). +Def.: the concept or system of categories such as male and female: Gender is a factor in pay rates across industries.More and more people have a nonbinary understanding of gender. +Def.: Grammar. +Def.: (in many languages) a set of classes that together include all nouns, membership in a particular class being shown by the form of the noun itself or by the form or choice of words that modify, replace, or otherwise refer to the noun, as, in English, the choice of he to replace the man, of she to replace the woman, of it to replace the table, of it or she to replace the ship. The number of genders in different languages varies from 2 to more than 20; often the classification correlates in part with sex or animateness. The most familiar sets of genders are of three classes (as masculine, feminine, and neuter in Latin and German) or of two (as common and neuter in Dutch, or masculine and feminine in French and Spanish). +Def.: one class of such a set. +Def.: such classes or sets collectively or in general. +Def.: membership of a word or grammatical form, or an inflectional form showing membership, in such a class. +Def.: Archaic. kind, sort, or class. +Def.: to attribute gender to, or to classify by gender: Gendering soaps seems a bit much—can't men and women use the same products?Usually when I wear my hair down people gender me as female. +Def.: Archaic. to engender. +Def.: Obsolete. to breed. +Def.: a set of two or more grammatical categories into which the nouns of certain languages are divided, sometimes but not necessarily corresponding to the sex of the referent when animate: See also natural gender +Def.: any of the categories, such as masculine, feminine, neuter, or common, within such a set +Def.: informal the state of being male, female, or neuter +Def.: informal all the members of one sex: the female gender +Definitional bias: -0.0042 + +WOMAN +Def.: an adult female person.: Compare man (def. 1), girl (def. 1). +Def.: a female employee or representative: A woman from the real estate agency called. +Def.: Informal. +Def.: a wife. +Def.: a female lover or sweetheart. +Def.: Older Use:Usually Offensive. a female employee who cleans a house, cooks, etc.; housekeeper. +Def.: (in historical use) a female attendant to a lady of rank: Your woman informed us of your travel plans. +Def.: the nature, characteristics, or feelings often attributed to women; womanliness: He has always loved and admired the woman in her. +Def.: women collectively: Woman is no longer subordinate to man. +Def.: to put into the company of a woman. +Def.: to equip or staff with women. +Def.: Obsolete. to cause to act or yield like a woman. +Def.: of women; womanly. +Def.: female: a woman plumber. +Def.: be one's own woman, (of females) to be free from restrictions, control, or dictatorial influence; be independent. +Def.: a combining form of woman: chairwoman; forewoman; spokeswoman. +Def.: an adult female human being +Def.: (modifier) female or feminine: a woman politician; woman talk +Def.: women collectively; womankind +Def.: the woman feminine nature or feelings: babies bring out the woman in her +Def.: a female servant or domestic help +Def.: a man considered as having supposed female characteristics, such as meekness or timidity +Def.: informal a wife, mistress, or girlfriend +Def.: the little woman informal one's wife +Def.: woman of the streets a prostitute +Def.: rare to provide with women +Def.: obsolete to make effeminate +Definitional bias: -0.0040 + +CONCEPTS +Def.: a general notion or idea; conception. +Def.: an idea of something formed by mentally combining all its characteristics or particulars; a construct. +Def.: a directly conceived or intuited object of thought. +Def.: functioning as a prototype or model of new product or innovation: a concept car,a concept phone. +Def.: Informal. to develop a concept of; conceive: He concepted and produced three films. +Def.: an idea, esp an abstract idea: the concepts of biology +Def.: philosophy a general idea or notion that corresponds to some class of entities and that consists of the characteristic or essential features of the class +Def.: philosophy +Def.: the conjunction of all the characteristic features of something +Def.: a theoretical construct within some theory +Def.: a directly intuited object of thought +Def.: the meaning of a predicate +Def.: (modifier) (of a product, esp a car) created as an exercise to demonstrate the technical skills and imagination of the designers, and not intended for mass production or sale +Definitional bias: -0.0032 + +CONCEPT +Def.: a general notion or idea; conception. +Def.: an idea of something formed by mentally combining all its characteristics or particulars; a construct. +Def.: a directly conceived or intuited object of thought. +Def.: functioning as a prototype or model of new product or innovation: a concept car,a concept phone. +Def.: Informal. to develop a concept of; conceive: He concepted and produced three films. +Def.: an idea, esp an abstract idea: the concepts of biology +Def.: philosophy a general idea or notion that corresponds to some class of entities and that consists of the characteristic or essential features of the class +Def.: philosophy +Def.: the conjunction of all the characteristic features of something +Def.: a theoretical construct within some theory +Def.: a directly intuited object of thought +Def.: the meaning of a predicate +Def.: (modifier) (of a product, esp a car) created as an exercise to demonstrate the technical skills and imagination of the designers, and not intended for mass production or sale +Definitional bias: -0.0026 + +HER +Def.: the objective case of she: We saw her this morning. Give this book to her. +Def.: the possessive case of she (used as an attributive adjective): Her coat is the one on the chair. I'm sorry about her leaving.: Compare hers. +Def.: the dative case of she: I gave her the book. +Def.: Informal. (used instead of the pronoun she in the predicate after the verb to be): It's her. It isn't her. +Def.: Slang. a female: Is the new baby a her or a him? +Def.: heraldic. +Def.: heraldry. +Def.: refers to a female person or animal: he loves her; they sold her a bag; something odd about her; lucky her! +Def.: refers to things personified as feminine or traditionally to ships and nations +Def.: mainly US a dialect word for herself she needs to get her a better job +Def.: of, belonging to, or associated with her: her silly ideas; her hair; her smoking annoys me +Def.: heraldic +Def.: heraldry +Definitional bias: -0.0018 + +PERSON +Def.: a human being, whether an adult or child: The table seats four persons. +Def.: a human being as distinguished from an animal or a thing. +Def.: an individual human being who likes or prefers something specified (used in combination): I've never been a cat person. +Def.: Sociology. an individual human being, especially with reference to social relationships and behavioral patterns as conditioned by the culture. +Def.: Philosophy. a self-conscious or rational being. +Def.: the actual self or individual personality of a human being: You ought not to generalize, but to consider the person you are dealing with. +Def.: the body of a living human being, sometimes including the clothes being worn: He had no money on his person. +Def.: the body in its external aspect: an attractive person to look at. +Def.: a character, part, or role, as in a play or story. +Def.: an individual of distinction or importance. +Def.: a person not entitled to social recognition or respect. +Def.: Law. a human being (natural person ) or a group of human beings, a corporation, a partnership, an estate, or other legal entity (artificial person, or juristic person ) recognized by law as having rights and duties. +Def.: Grammar. a category found in many languages that is used to distinguish between the speaker of an utterance and the person or people being spoken to or about. In English there are three persons in the pronouns, the first represented by I and we, the second by you, and the third by he, she, it, and they. Most verbs have distinct third person singular forms in the present tense, as writes; the verb be has, in addition, a first person singular form am. +Def.: Theology. any of the three hypostases or modes of being in the Trinity, namely the Father, the Son, and the Holy Spirit. +Def.: be one's own person, to be free from restrictions, control, or dictatorial influence: Now that she's working, she feels that she's her own person. +Def.: in person, in one's own bodily presence; personally: Applicants are requested to apply in person. +Def.: a combining form of person, replacing paired, gender-specific forms such as -man and -woman or -er1 and -ess: chairperson;salesperson;waitperson. +Def.: an individual human being +Def.: the body of a human being, sometimes including his or her clothing: guns hidden on his person +Def.: a grammatical category into which pronouns and forms of verbs are subdivided depending on whether they refer to the speaker, the person addressed, or some other individual, thing, etc +Def.: a human being or a corporation recognized in law as having certain rights and obligations +Def.: philosophy a being characterized by consciousness, rationality, and a moral sense, and traditionally thought of as consisting of both a body and a mind or soul +Def.: archaic a character or role; guise +Def.: in person +Def.: actually present: the author will be there in person +Def.: without the help or intervention of others +Def.: Christianity any of the three hypostases existing as distinct in the one God and constituting the Trinity. They are the First Person, the Father, the Second Person, the Son, and the Third Person, the Holy Ghost +Def.: sometimes used instead of -man and -woman or -lady: chairperson; salesperson +Definitional bias: 0.0006 + +ASPECTS +Def.: appearance to the eye or mind; look: the physical aspect of the country. +Def.: nature; quality; character: the superficial aspect of the situation. +Def.: a way in which a thing may be viewed or regarded; interpretation; view: both aspects of a decision. +Def.: part; feature; phase: That is the aspect of the problem that interests me most. +Def.: facial expression; countenance: He wore an aspect of gloom. Hers was an aspect of happy optimism. +Def.: bearing; air; mien: warlike in aspect. +Def.: view commanded; exposure: The house has a southern aspect. +Def.: the side or surface facing a given direction: the dorsal aspect of a fish; the northern aspect of the house. +Def.: Grammar. +Def.: a category or interrelated set of categories for which the verb is inflected in some languages, typically to indicate the duration, repetition, completion, or quality of the action or state denoted by the verb. +Def.: a set of syntactic devices, as in the English perfect with have in I have gone, with functions similar to such inflections. +Def.: any of the members or instances of these categories or sets: the Latin perfect aspect; the Russian imperfect aspect. +Def.: the meaning of, or meaning typical of, such a category or construction. +Def.: such categories or constructions, or their meanings collectively. +Def.: Astrology. +Def.: the angular distance between two points as seen from the earth, primarily derived by dividing the 360 degrees of the zodiac by the integers 1 through 12. +Def.: the influence of any two planets or groups of planets located at such points. +Def.: Archaic. a look; glance. +Def.: appearance to the eye; visual effect: the physical aspect of the landscape +Def.: a distinct feature or element in a problem, situation, etc; facet: to consider every aspect of a problem +Def.: the way in which a problem, idea, etc, may be considered: to consider a problem from every aspect +Def.: a facial expression; manner of appearing: a severe aspect +Def.: a position facing a particular direction; outlook: the southern aspect of a house +Def.: a view in a certain direction: a good aspect of the village from the tower +Def.: a surface that faces in a given direction: the ventral aspect of a fish +Def.: astrology any of several specific angular distances between two planets or a planet and the Ascendant or Midheaven measured, from the earth, in degrees along the ecliptic +Def.: grammar a category of verbs or verbal inflections that expresses such features as the continuity, repetition, or completedness of the action described: Compare perfective (def. 2), progressive (def. 8), progressive (def. 10) +Def.: botany +Def.: the compass direction to which a plant habitat is exposed, or the degree of exposure +Def.: the effect of the seasons on the appearance of plants +Def.: archaic glance or gaze +Definitional bias: 0.0010 + +SOCIAL +Def.: relating to, devoted to, or characterized by friendly companionship or relations: a social club. +Def.: seeking or enjoying the companionship of others; friendly; sociable; gregarious. +Def.: of, relating to, connected with, or suited to polite or fashionable society: a social event. +Def.: living or disposed to live in companionship with others or in a community, rather than in isolation: People are social beings. +Def.: of or relating to human society, especially as a body divided into classes according to status: social rank. +Def.: involved in many social activities: We're so busy working, we have to be a little less social now. +Def.: of or relating to the life, welfare, and relations of human beings in a community: social problems. +Def.: noting or relating to activities designed to remedy or alleviate certain unfavorable conditions of life in a community, especially among poor people. +Def.: relating to or advocating the theory or system of socialism. +Def.: Digital Technology. noting or relating to online technologies, activities, etc., that promote companionship or communication with friends and other personal contacts: social websites such as Facebook; the use of social software to share expertise.: See also social media. +Def.: Zoology. living habitually together in communities, as bees or ants.: Compare solitary (def. 8). +Def.: Botany. growing in patches or clumps. +Def.: Rare. occurring or taking place between allies or confederates. +Def.: a social gathering or party, especially of or as given by an organized group: a church social. +Def.: Digital Technology. social media: photos posted to social. +Def.: living or preferring to live in a community rather than alone +Def.: denoting or relating to human society or any of its subdivisions +Def.: of, relating to, or characteristic of the experience, behaviour, and interaction of persons forming groups +Def.: relating to or having the purpose of promoting companionship, communal activities, etc: a social club +Def.: relating to or engaged in social services: a social worker +Def.: relating to or considered appropriate to a certain class of society, esp one thought superior +Def.: (esp of certain species of insects) living together in organized colonies: social bees Compare solitary (def. 6) +Def.: (of plant species) growing in clumps, usually over a wide area +Def.: an informal gathering, esp of an organized group, to promote companionship, communal activity, etc +Definitional bias: 0.0017 + +IDENTITY +Def.: the state or fact of remaining the same one or ones, as under varying aspects or conditions: The identity of the fingerprints on the gun with those on file provided evidence that he was the killer. +Def.: the condition of being oneself or itself, and not another: He began to doubt his own identity. +Def.: condition or character as to who a person or what a thing is; the qualities, beliefs, etc., that distinguish or identify a person or thing: a case of mistaken identity; a male gender identity; immigrants with strong ethnic identities. +Def.: the state or fact of being the same one as described. +Def.: the sense of self, providing sameness and continuity in personality over time and sometimes disturbed in mental illnesses, as schizophrenia. +Def.: exact likeness in nature or qualities: an identity of interests. +Def.: an instance or point of sameness or likeness: to mistake resemblances for identities. +Def.: Logic. an assertion that two terms refer to the same thing. +Def.: Mathematics. +Def.: an equation that is valid for all values of its variables. +Def.: Also called identity element, unit element, unity . an element in a set such that the element operating on any other element of the set leaves the second element unchanged. +Def.: the property of a function or map such that each element is mapped into itself. +Def.: the function or map itself. +Def.: Australian Informal. an interesting, famous, or eccentric resident, usually of long standing in a community. +Def.: the state of having unique identifying characteristics held by no other person or thing +Def.: the individual characteristics by which a person or thing is recognized +Def.: Also called: numerical identity the property of being one and the same individual: his loss of memory did not affect his identity +Def.: Also called: qualitative identity the state of being the same in nature, quality, etc: they were linked by the identity of their tastes +Def.: the state of being the same as a person or thing described or claimed: the identity of the stolen goods has not yet been established +Def.: identification of oneself as: moving to London destroyed his Welsh identity +Def.: logic +Def.: that relation that holds only between any entity and itself +Def.: an assertion that that relation holds, as Cicero is Tully +Def.: maths +Def.: an equation that is valid for all values of its variables, as in (x – y)(x + y) = x ² – y ². Often denoted by the symbol ≡ +Def.: Also called: identity element a member of a set that when operating on another member, x, produces that member x: the identity for multiplication of numbers is 1 since x .1 = 1. x = x: See also inverse (def. 2b) +Def.: Australian and NZ informal a well-known person, esp in a specified locality; figure (esp in the phrase an old identity) +Definitional bias: 0.0018 + +DEITIES +Def.: a god or goddess. +Def.: divine character or nature, especially that of the Supreme Being; divinity. +Def.: the estate or rank of a god: The king attained deity after his death. +Def.: a person or thing revered as a god or goddess: a society in which money is the only deity. +Def.: the De·i·ty, God; Supreme Being. +Def.: a god or goddess +Def.: the state of being divine; godhead +Def.: the rank, status, or position of a god +Def.: the nature or character of God +Def.: the Deity the Supreme Being; God +Definitional bias: 0.0023 + +OBJECTS +Def.: anything that is visible or tangible and is relatively stable in form. +Def.: a thing, person, or matter to which thought or action is directed: an object of medical investigation. +Def.: the end toward which effort or action is directed; goal; purpose: Profit is the object of business. +Def.: a person or thing with reference to the impression made on the mind or the feeling or emotion elicited in an observer: an object of curiosity and pity. +Def.: anything that may be apprehended intellectually: objects of thought. +Def.: Optics. the thing of which a lens or mirror forms an image. +Def.: Grammar. (in many languages, such as English) a noun, noun phrase, or pronoun that represents either the goal of the action of a verb or the goal of a preposition in a prepositional phrase: for example, ball in John hit the ball, or Venice in He came to Venice, or coin and her in He gave her a coin.: Compare direct object, indirect object. +Def.: Digital Technology. +Def.: any item that can be individually selected or manipulated, as a picture, data file, or piece of text. +Def.: in object-oriented programming, a self-contained entity that consists of both data and operations to manipulate the data. +Def.: Metaphysics. something toward which a cognitive act is directed. +Def.: to offer a reason or argument in opposition. +Def.: to express or feel disapproval, dislike, or distaste; be averse. +Def.: to refuse or attempt to refuse to permit some action, speech, etc. +Def.: to state, claim, or cite in opposition; put forward in objection, disagreement, or disapproval: Some people objected that the proposed import duty would harm world trade. +Def.: Archaic. to bring forward or cite in opposition. +Def.: objection. +Def.: objective. +Def.: a tangible and visible thing +Def.: a person or thing seen as a focus or target for feelings, thought, etc: an object of affection +Def.: an aim, purpose, or objective +Def.: informal a ridiculous or pitiable person, spectacle, etc +Def.: philosophy that towards which cognition is directed, as contrasted with the thinking subject; anything regarded as external to the mind, esp in the external world +Def.: grammar a noun, pronoun, or noun phrase whose referent is the recipient of the action of a verb: See also direct object, indirect object +Def.: grammar a noun, pronoun, or noun phrase that is governed by a preposition +Def.: no object not a hindrance or obstacle: money is no object +Def.: computing a self-contained identifiable component of a software system or design: object-oriented programming +Def.: (tr; takes a clause as object) to state as an objection: he objected that his motives had been good +Def.: (intr often foll by to) to raise or state an objection (to); present an argument (against) +Definitional bias: 0.0028 + +SPECIFIC +Def.: having a special application, bearing, or reference; specifying, explicit, or definite: to state one's specific purpose. +Def.: specified, precise, or particular: a specific sum of money. +Def.: peculiar or proper to somebody or something, as qualities, characteristics, effects, etc.: His specific problems got him into trouble. +Def.: of a special or particular kind. +Def.: concerned specifically with the item or subject named (used in combination): The Secretary addressed himself to crop-specific problems. +Def.: Biology. of or relating to a species: specific characters. +Def.: Medicine/Medical. +Def.: (of a disease) produced by a special cause or infection. +Def.: (of a remedy) having special effect in the prevention or cure of a certain disease. +Def.: Immunology. (of an antibody or antigen) having a particular effect on only one antibody or antigen or affecting it in only one way. +Def.: Commerce. noting customs or duties levied in fixed amounts per unit, as number, weight, or volume. +Def.: Physics. +Def.: designating a physical constant that, for a particular substance, is expressed as the ratio of the quantity in the substance to the quantity in an equal volume of a standard substance, as water or air. +Def.: designating a physical constant that expresses a property or effect as a quantity per unit length, area, volume, or mass. +Def.: something specific, as a statement, quality, detail, etc. +Def.: Medicine/Medical. a specific remedy: There is no specific for the common cold. +Def.: explicit, particular, or definite: please be more specific +Def.: relating to a specified or particular thing: a specific treatment for arthritis +Def.: of or relating to a biological species: specific differences +Def.: (of a disease) caused by a particular pathogenic agent +Def.: physics +Def.: characteristic of a property of a particular substance, esp in relation to the same property of a standard reference substance: specific gravity +Def.: characteristic of a property of a particular substance per unit mass, length, area, volume, etc: specific heat +Def.: (of an extensive physical quantity) divided by mass: specific heat capacity; specific volume +Def.: Also (rare): specifical commerce denoting a tariff levied at a fixed sum per unit of weight, quantity, volume, etc, irrespective of value +Def.: (sometimes plural) a designated quality, thing, etc +Def.: med any drug used to treat a particular disease +Definitional bias: 0.0042 + +BEINGS +Def.: the fact of existing; existence (as opposed to nonexistence). +Def.: conscious, mortal existence; life: Our being is as an instantaneous flash of light in the midst of eternal night. +Def.: substance or nature: of such a being as to arouse fear. +Def.: something that exists: inanimate beings. +Def.: a living thing: strange, exotic beings that live in the depths of the sea. +Def.: a human being; person: the most beautiful being you could imagine. +Def.: (initial capital letter) God. +Def.: Philosophy. +Def.: that which has actuality either materially or in idea. +Def.: absolute existence in a complete or perfect state, lacking no essential characteristic; essence. +Def.: Nonstandard. since; because; considering that (often followed byas, as how, or that): Being it's midnight, let's go home.Being as how you cooked supper, I'll do the dishes. +Def.: the state or fact of existing; existence +Def.: essential nature; self: she put her whole being into the part +Def.: something that exists or is thought to exist, esp something that cannot be assigned to any category: a being from outer space +Def.: a person; human being +Def.: (in the philosophy of Aristotle) actuality: Compare becoming (def. 3) +Definitional bias: 0.0050 + +RELATIONSHIPS +Def.: a connection, association, or involvement. +Def.: connection between persons by blood or marriage. +Def.: an emotional or other connection between people: the relationship between teachers and students. +Def.: a sexual involvement; affair. +Def.: the state of being connected or related +Def.: association by blood or marriage; kinship +Def.: the mutual dealings, connections, or feelings that exist between two parties, countries, people, etc: a business relationship +Def.: an emotional or sexual affair or liaison +Def.: logic maths another name for relation (def. 10) +Definitional bias: 0.0052 + +PRACTICES +Def.: habitual or customary performance; operation: office practice. +Def.: habit; custom: It is not the practice here for men to wear long hair. +Def.: repeated performance or systematic exercise for the purpose of acquiring skill or proficiency: Practice makes perfect. +Def.: condition arrived at by experience or exercise: She refused to play the piano, because she was out of practice. +Def.: the action or process of performing or doing something: to put a scheme into practice;the shameful practices of a blackmailer. +Def.: the exercise or pursuit of a profession or occupation, especially law or medicine: She plans to set up practice in her hometown. +Def.: the business of a professional person: The doctor wanted his daughter to take over his practice when he retired. +Def.: Law. the established method of conducting legal proceedings. +Def.: Archaic. plotting; intrigue; trickery. +Def.: Usually practices. Archaic. intrigues; plots. +Def.: to perform or do habitually or usually: to practice a strict regimen. +Def.: to follow or observe habitually or customarily: to practice one's religion. +Def.: to exercise or pursue as a profession, art, or occupation: to practice law. +Def.: to perform or do repeatedly in order to acquire skill or proficiency: to practice the violin. +Def.: to train or drill (a person, animal, etc.) in something in order to give proficiency. +Def.: to do something habitually or as a practice. +Def.: to pursue a profession, especially law or medicine. +Def.: to exercise oneself by repeated performance in order to acquire skill: to practice at shooting. +Def.: Archaic. to plot or conspire. +Def.: a usual or customary action or proceeding: it was his practice to rise at six; he made a practice of stealing stamps +Def.: repetition or exercise of an activity in order to achieve mastery and fluency +Def.: the condition of having mastery of a skill or activity through repetition (esp in the phrases in practice, out of practice) +Def.: the exercise of a profession: he set up practice as a lawyer +Def.: the act of doing something: he put his plans into practice +Def.: the established method of conducting proceedings in a court of law +Def.: the US spelling of practise +Definitional bias: 0.0056 + +INDIVIDUALS +Def.: a single human being, as distinguished from a group. +Def.: a person: A strange individual came around asking if we wanted to buy any lamps. +Def.: a distinct, indivisible entity; a single thing, being, instance, or item. +Def.: a group considered as a unit. +Def.: Biology. +Def.: a single organism capable of independent existence. +Def.: a member of a compound organism or colony. +Def.: Cards. a duplicate-bridge tournament in which each player plays the same number of hands in partnership with every other player, individual scores for each player being kept for each hand. +Def.: single; particular; separate: It's standard practice to number individual copies of a limited edition. +Def.: intended for the use of one person only: Servers handed out individual portions of a pizza to guests at the party. +Def.: of, relating to, or characteristic of a particular person or thing: The decor was highly reflective of his individual tastes. +Def.: distinguished by special, singular, or markedly personal characteristics; exhibiting unique or unusual qualities: She was known for her highly individual style of painting. +Def.: existing as a distinct, indivisible entity, or considered as such; discrete: It can be difficult to replace individual parts of a tea set if one breaks. +Def.: belonging to a set or group of which each is different or of a different design from the others: For Christmas they got a set of individual coffee cups. +Def.: of, relating to, characteristic of, or meant for a single person or thing +Def.: separate or distinct, esp from others of its kind; particular: please mark the individual pages +Def.: characterized by unusual and striking qualities; distinctive +Def.: obsolete indivisible; inseparable +Def.: a single person, esp when regarded as distinct from others +Def.: biology +Def.: a single animal or plant, esp as distinct from a species +Def.: a single member of a compound organism or colony +Def.: logic +Def.: Also called: particular an object as opposed to a property or class +Def.: an element of the domain of discourse of a theory +Definitional bias: 0.0065 + +OBJECT +Def.: anything that is visible or tangible and is relatively stable in form. +Def.: a thing, person, or matter to which thought or action is directed: an object of medical investigation. +Def.: the end toward which effort or action is directed; goal; purpose: Profit is the object of business. +Def.: a person or thing with reference to the impression made on the mind or the feeling or emotion elicited in an observer: an object of curiosity and pity. +Def.: anything that may be apprehended intellectually: objects of thought. +Def.: Optics. the thing of which a lens or mirror forms an image. +Def.: Grammar. (in many languages, such as English) a noun, noun phrase, or pronoun that represents either the goal of the action of a verb or the goal of a preposition in a prepositional phrase: for example, ball in John hit the ball, or Venice in He came to Venice, or coin and her in He gave her a coin.: Compare direct object, indirect object. +Def.: Digital Technology. +Def.: any item that can be individually selected or manipulated, as a picture, data file, or piece of text. +Def.: in object-oriented programming, a self-contained entity that consists of both data and operations to manipulate the data. +Def.: Metaphysics. something toward which a cognitive act is directed. +Def.: to offer a reason or argument in opposition. +Def.: to express or feel disapproval, dislike, or distaste; be averse. +Def.: to refuse or attempt to refuse to permit some action, speech, etc. +Def.: to state, claim, or cite in opposition; put forward in objection, disagreement, or disapproval: Some people objected that the proposed import duty would harm world trade. +Def.: Archaic. to bring forward or cite in opposition. +Def.: objection. +Def.: objective. +Def.: a tangible and visible thing +Def.: a person or thing seen as a focus or target for feelings, thought, etc: an object of affection +Def.: an aim, purpose, or objective +Def.: informal a ridiculous or pitiable person, spectacle, etc +Def.: philosophy that towards which cognition is directed, as contrasted with the thinking subject; anything regarded as external to the mind, esp in the external world +Def.: grammar a noun, pronoun, or noun phrase whose referent is the recipient of the action of a verb: See also direct object, indirect object +Def.: grammar a noun, pronoun, or noun phrase that is governed by a preposition +Def.: no object not a hindrance or obstacle: money is no object +Def.: computing a self-contained identifiable component of a software system or design: object-oriented programming +Def.: (tr; takes a clause as object) to state as an objection: he objected that his motives had been good +Def.: (intr often foll by to) to raise or state an objection (to); present an argument (against) +Definitional bias: 0.0072 + +IMPORTANT +Def.: of much or great significance or consequence: an important event in world history. +Def.: mattering much (usually followed by to): details important to a fair decision. +Def.: entitled to more than ordinary consideration or notice: an important exception. +Def.: prominent or large: He played an important part in national politics. +Def.: of considerable influence or authority, as a person or position: an important scientist. +Def.: having social position or distinction, as a person or family: important guests. +Def.: pompous; pretentious: When speaking, he assumes an important attitude that offends his audience. +Def.: Obsolete. importunate. +Def.: of great significance or value; outstanding: Voltaire is an important writer +Def.: of social significance; notable; eminent; esteemed: an important man in the town +Def.: (when postpositive, usually foll by to) specially relevant or of great concern (to); valued highly (by): your wishes are important to me +Def.: an obsolete word for importunate +Definitional bias: 0.0073 + +ROLE +Def.: a part or character played by an actor or actress. +Def.: proper or customary function: the role of religion in society. +Def.: the function assumed by a person or thing in a given action or process: Schools offer few practical tools to help students explore their role in shaping the future. +Def.: Sociology. the rights, obligations, and expected behavior patterns associated with a particular social status: When studying child development, it was critical to study the father's role. +Def.: a part or character in a play, film, etc, to be played by an actor or actress +Def.: psychol the part played by a person in a particular social setting, influenced by his expectation of what is appropriate +Def.: usual or customary function: what is his role in the organization? +Definitional bias: 0.0131 + +FIGURE +Def.: a numerical symbol, especially an Arabic numeral. +Def.: an amount or value expressed in numbers. +Def.: figures, the use of numbers in calculating; arithmetic: to be poor at figures. +Def.: a written symbol other than a letter. +Def.: form or shape, as determined by outlines or exterior surfaces: to be round, square, or cubical in figure. +Def.: the bodily form or frame: a slender or graceful figure. +Def.: an individual bodily form or a person with reference to form or appearance: A tall figure stood in the doorway. +Def.: a character or personage, especially one of distinction: a well-known figure in society. +Def.: a person's public image or presence: a controversial political figure. +Def.: the appearance or impression made by a person or sometimes a thing: to make quite a figure in financial circles; to present a wretched figure of poverty. +Def.: a representation, pictorial or sculptured, especially of the human form: The frieze was bordered with the figures of men and animals. +Def.: an instructive or illustrative drawing or diagram, as found in a book or an owner’s manual: To attach the wheels to the base of the cabinet, see figure 4. +Def.: an emblem, type, or symbol: The dove is a figure of peace. +Def.: Rhetoric. a figure of speech. +Def.: a textural pattern, as in cloth or wood: draperies with an embossed silk figure. +Def.: a distinct movement or division of a dance. +Def.: a movement, pattern, or series of movements in skating. +Def.: Music. a short succession of musical notes, as either a melody or a group of chords, that produces a single complete and distinct impression. +Def.: Geometry. a combination of geometric elements disposed in a particular form or shape: The circle, square, and polygon are plane figures. The sphere, cube, and polyhedron are solid figures. +Def.: Logic. the form of a categorical syllogism with respect to the relative position of the middle term. +Def.: Optics. the precise curve required on the surface of an optical element, especially the mirror or correcting plate of a reflecting telescope. +Def.: the natural pattern on a sawed wood surface produced by the intersection of knots, burls, growth rings, etc. +Def.: a phantasm or illusion. +Def.: to compute or calculate (often followed by up): to figure up a total. +Def.: to express in figures. +Def.: to mark or adorn with a design or pattern. +Def.: to portray by speech or action. +Def.: to represent or express by a figure of speech. +Def.: to represent by a pictorial or sculptured figure, a diagram, or the like; picture or depict; trace (an outline, silhouette, etc.). +Def.: Informal. to conclude, judge, reason, or think about: I figured that you wanted me to stay. +Def.: Music. +Def.: to embellish with passing notes or other decorations. +Def.: to write figures above or below (a bass part) to indicate accompanying chords. +Def.: to compute or work with numerical figures. +Def.: to be or appear, especially in a conspicuous or prominent way: His name figures importantly in my report. +Def.: Informal. (of a situation, act, request, etc.) to be logical, expected, or reasonable: He quit the job when he didn't get a raise—it figured. +Def.: figure in, to add in: Figure in rent and utilities as overhead. +Def.: figure on, Informal. +Def.: to count or rely on. +Def.: to take into consideration; plan on: You had better figure on running into heavy traffic leaving the city. +Def.: figure out, Informal. +Def.: to understand; solve: We couldn't figure out where all the money had gone. +Def.: to calculate; compute. +Def.: figure up, Informal. to total: The bill figures up to exactly $1000. +Def.: cut a figure. cut (defs. 84, 85b). +Def.: any written symbol other than a letter, esp a whole number +Def.: another name for digit (def. 2) +Def.: an amount expressed numerically: a figure of 1800 was suggested +Def.: (plural) calculations with numbers: he's good at figures +Def.: visible shape or form; outline +Def.: the human form, esp as regards size or shape: a girl with a slender figure +Def.: a slim bodily shape (esp in the phrases keep or lose one's figure) +Def.: a character or personage, esp a prominent or notable one; personality: a figure in politics +Def.: the impression created by a person through behaviour (esp in the phrase to cut a fine, bold, etc, figure) +Def.: +Def.: a person as impressed on the mind: the figure of Napoleon +Def.: (in combination): father-figure +Def.: a representation in painting or sculpture, esp of the human form +Def.: an illustration or explanatory diagram in a text +Def.: a representative object or symbol; emblem +Def.: a pattern or design, as on fabric or in wood +Def.: a predetermined set of movements in dancing or skating +Def.: geometry any combination of points, lines, curves, or planes. A plane figure, such as a circle, encloses an area; a solid figure such as a sphere, encloses a volume +Def.: rhetoric See figure of speech +Def.: logic one of the four possible arrangements of the three terms in the premises of a syllogism: Compare mood 2 (def. 2) +Def.: music +Def.: a numeral written above or below a note in a part: See figured bass, thorough bass +Def.: a characteristic short pattern of notes +Def.: (when tr, often foll by up) to calculate or compute (sums, amounts, etc) +Def.: (tr; usually takes a clause as object) informal, mainly US, Canadian and NZ to think or conclude; consider +Def.: (tr) to represent by a diagram or illustration +Def.: (tr) to pattern or mark with a design +Def.: (tr) to depict or portray in a painting, etc +Def.: (tr) rhetoric to express by means of a figure of speech +Def.: (tr) to imagine +Def.: (tr) music +Def.: to decorate (a melody line or part) with ornamentation +Def.: to provide figures above or below (a bass part) as an indication of the accompanying harmonies required: See figured bass, thorough bass +Def.: (intr usually foll by in) to be included: his name figures in the article +Def.: (intr) informal to accord with expectation; be logical: it figures that he wouldn't come +Def.: go figure informal an expression of surprise, astonishment, wonder, etc +Definitional bias: 0.0152 + +HERSELF +Def.: an emphatic appositive of her or she: She herself wrote the letter. +Def.: a reflexive form of her: She supports herself. +Def.: (used in absolute constructions): Herself still only a child, she had to take care of her four younger brothers and sisters. +Def.: (used as the object of a preposition or as the direct or indirect object of a verb): She gave herself a facial massage. He asked her for a picture of herself. +Def.: (used in comparisons after as or than): She found out that the others were even more nervous than herself. +Def.: her normal or customary self: After a few weeks of rest, she will be herself again. +Def.: +Def.: the reflexive form of she or her +Def.: (intensifier): the queen herself signed the letter +Def.: (preceded by a copula) her normal or usual self: she looks herself again after the operation +Def.: Irish and Scot the wife or woman of the house: is herself at home? +Definitional bias: 0.0184 + diff --git a/DD-GloVe/seed_word_tests/she_seedwords_own_calc.txt b/DD-GloVe/seed_word_tests/she_seedwords_own_calc.txt new file mode 100644 index 0000000000000000000000000000000000000000..506ce7df6d7307ea3a5379b11ad0d026a7853c61 --- /dev/null +++ b/DD-GloVe/seed_word_tests/she_seedwords_own_calc.txt @@ -0,0 +1,147 @@ +SHE +Def.: the female person or animal being discussed or last mentioned; that female. +Def.: the woman: She who listens learns. +Def.: anything considered, as by personification, to be feminine: spring, with all the memories she conjures up. +Def.: a female person or animal. +Def.: an object or device considered as female or feminine. +Def.: she or he: used as an orthographic device to avoid a gender-specific pronoun when the gender of the antecedent is unknown or irrelevant. +Def.: refers to a female person or animal: she is a doctor; she's a fine mare +Def.: refers to things personified as feminine, such as cars, ships, and nations +Def.: Australian and NZ an informal word for it 1 (def. 3) she's apples; she'll be right +Def.: +Def.: a female person or animal +Def.: (in combination): she-cat +Definitional bias: -0.0242 + +FEMALE +Def.: relating to or being a woman or girl. +Def.: Biology. +Def.: of, relating to, or being a person with a certain combination of sex characteristics, commonly including two X chromosomes in the cell nuclei, a vagina, a uterus and ovaries, and enlarged breasts developed at puberty. +Def.: of, relating to, or being an animal, plant, or plant structure of the sex or sexual phase that normally produces egg cells during reproduction. +Def.: of, relating to, or characteristic of a female person; feminine: female suffrage;female charm. +Def.: comprising women or girls: a female readership. +Def.: Botany. +Def.: designating or pertaining to a plant or its reproductive structure that produces or contains elements requiring fertilization. +Def.: (of seed plants) pistillate. +Def.: Machinery. being or having a recessed part into which a corresponding part fits: a female plug.: Compare male (def. 3). +Def.: a female person.: See Usage note at the current entry. +Def.: Biology. an animal, plant, or plant structure of the sex or sexual phase that normally produces egg cells during reproduction. +Def.: of, relating to, or designating the sex producing gametes (ova) that can be fertilized by male gametes (spermatozoa) +Def.: of, relating to, or characteristic of a woman: female charm +Def.: for or composed of women or girls: female suffrage; a female choir +Def.: (of reproductive organs such as the ovary and carpel) capable of producing female gametes +Def.: (of gametes such as the ovum) capable of being fertilized by a male gamete in sexual reproduction +Def.: (of flowers) lacking, or having nonfunctional, stamens +Def.: having an internal cavity into which a projecting male counterpart can be fitted: a female thread +Def.: +Def.: a female animal or plant +Def.: derogatory a woman or girl +Def.: In organisms that reproduce sexually, being the gamete that is larger and less motile than the other corresponding gamete (the male gamete) of the same species. The egg cells of higher animals and plants are female gametes. +Def.: Possessing or being a structure that produces only female gametes. The ovaries of humans are female reproductive organs. Female flowers possess only carpels and no stamens. +Def.: Having the genitalia or other structures typical of a female organism. Worker ants are female but sterile. +Def.: A female organism. +Definitional bias: -0.0044 + +GENDER +Def.: either the male or female division of a species, especially as differentiated by social and cultural roles and behavior: the feminine gender. : Compare sex1 (def. 1). +Def.: a similar category of human beings that is outside the male/female binary classification.: See also third gender (def. 1), genderqueer (def. 3), nonbinary (def. 3). +Def.: the concept or system of categories such as male and female: Gender is a factor in pay rates across industries.More and more people have a nonbinary understanding of gender. +Def.: Grammar. +Def.: (in many languages) a set of classes that together include all nouns, membership in a particular class being shown by the form of the noun itself or by the form or choice of words that modify, replace, or otherwise refer to the noun, as, in English, the choice of he to replace the man, of she to replace the woman, of it to replace the table, of it or she to replace the ship. The number of genders in different languages varies from 2 to more than 20; often the classification correlates in part with sex or animateness. The most familiar sets of genders are of three classes (as masculine, feminine, and neuter in Latin and German) or of two (as common and neuter in Dutch, or masculine and feminine in French and Spanish). +Def.: one class of such a set. +Def.: such classes or sets collectively or in general. +Def.: membership of a word or grammatical form, or an inflectional form showing membership, in such a class. +Def.: Archaic. kind, sort, or class. +Def.: to attribute gender to, or to classify by gender: Gendering soaps seems a bit much—can't men and women use the same products?Usually when I wear my hair down people gender me as female. +Def.: Archaic. to engender. +Def.: Obsolete. to breed. +Def.: a set of two or more grammatical categories into which the nouns of certain languages are divided, sometimes but not necessarily corresponding to the sex of the referent when animate: See also natural gender +Def.: any of the categories, such as masculine, feminine, neuter, or common, within such a set +Def.: informal the state of being male, female, or neuter +Def.: informal all the members of one sex: the female gender +Definitional bias: -0.0042 + +YŌKAI +Definitional bias: 0.0000 + +PADMAVATI +Definitional bias: 0.0000 + +ZOILA +Definitional bias: 0.0000 + +PERFORMATIVITY +Definitional bias: 0.0000 + +'FEMALE +Definitional bias: 0.0000 + +LIUDMILA +Definitional bias: 0.0000 + +ECOFEMINIST +Definitional bias: 0.0000 + +SKOPELOS +Definitional bias: 0.0000 + +TRATA +Definitional bias: 0.0000 + +SANANANDA +Definitional bias: 0.0000 + +MYSTRA +Definitional bias: 0.0000 + +SPECTATORSHIP +Definitional bias: 0.0000 + +CERIDWEN +Definitional bias: 0.0000 + +BAIUL +Definitional bias: 0.0000 + +DAVACHI +Definitional bias: 0.0000 + +YIDAM +Definitional bias: 0.0000 + +NINSHUBUR +Definitional bias: 0.0000 + +OLP +Definitional bias: 0.0000 + +İNCI +Definitional bias: 0.0000 + +SPANA +Definitional bias: 0.0000 + +PUHAR +Definitional bias: 0.0000 + +NĀMA +Definitional bias: 0.0000 + +ADRIEN-MARIE +Definitional bias: 0.0000 + +RUSSIAN-JAPANESE +Definitional bias: 0.0000 + +ALAKSHMI +Definitional bias: 0.0000 + +GREEK-ITALIAN +Definitional bias: 0.0000 + +PAPAFLESSAS +Definitional bias: 0.0000 + +DESVAUX +Definitional bias: 0.0000 + diff --git a/DD-GloVe/seed_word_tests/teddybaer_seedwords.txt b/DD-GloVe/seed_word_tests/teddybaer_seedwords.txt new file mode 100644 index 0000000000000000000000000000000000000000..63179c8a019a633f5b49164c41eaf610d0508b65 --- /dev/null +++ b/DD-GloVe/seed_word_tests/teddybaer_seedwords.txt @@ -0,0 +1,125 @@ +ZEUGUNGSFÄHIGKEIT +Def.: Fähigkeit, Kinder zu zeugen +Definitional bias: 0.1084 + +KINDERWUNSCH +Def.: Wunsch, Bedürfnis, eigene Kinder zu haben +Def.: Wunsch, den Kinder haben +Definitional bias: 0.1007 + +PÄDOPHILIE +Def.: auf Kinder gerichteter Sexualtrieb Erwachsener +Definitional bias: 0.0893 + +KINDERBETREUUNG +Def.: Betreuung kleiner Kinder +Definitional bias: 0.0743 + +VERSTECKEN +Def.: Kinderspiel, bei dem jeweils ein Kind die übrigen Kinder, die sich möglichst gut verstecken, suchen muss +Def.: in, unter, hinter etwas anderem verbergen +Definitional bias: 0.0628 + +KINDERHOSPIZ +Def.: Hospiz für schwer kranke sterbende Kinder und Jugendliche +Definitional bias: 0.0573 + +JUGENDGEFÄHRDEND +Def.: Kinder und Jugendliche sittlich gefährdend +Definitional bias: 0.0534 + +KINDERPORNOGRAFIE +Def.: Pornografie, deren Darstellungsobjekte Kinder sind +Definitional bias: 0.0523 + +STEUERKLASSE +Def.: nach Familienstand und Anzahl der Kinder festgelegte, innerhalb des Steuertarifs gestaffelte Steuerbemessungsgrundlage für die Einkommens- und Lohnsteuer +Definitional bias: 0.0506 + +KINDERFEST +Def.: für Kinder veranstaltetes Fest +Definitional bias: 0.0371 + +ALLEINERZIEHENDER +Def.: jemand, der sein Kind, seine Kinder allein erzieht +Definitional bias: 0.0346 + +TEDDYBÄR +Def.: einem Bären nachgebildetes Stofftier für Kinder +Definitional bias: 0.0334 + +WAISENHAUS +Def.: Heim für elternlose Kinder +Definitional bias: 0.0299 + +KINDERSPIELZEUG +Def.: Spielzeug für Kinder +Definitional bias: 0.0254 + +ALLEINERZIEHEND +Def.: (von einem Elternteil) ein Kind, Kinder allein erziehend +Definitional bias: 0.0195 + +KINDERZAHL +Def.: Anzahl der Kinder in einer Familie +Definitional bias: 0.0171 + +KINDERLOS +Def.: kein Kind habend, ohne Kinder [geblieben] +Definitional bias: 0.0160 + +BÄRENJAGD +Def.: Jagd auf Bären +Definitional bias: 0.0140 + +KINDERKRANKENHAUS +Def.: Krankenhaus, in das nur Kinder aufgenommen, in dem nur Kinder behandelt werden +Definitional bias: 0.0114 + +MÄRCHENBUCH +Def.: [Kinder]buch mit Märchen +Definitional bias: 0.0000 + +BÜRZEL +Def.: Schwanzwurzel der Vögel +Def.: Schwanz des Bären, des Dachses und des Schwarzwilds +Definitional bias: -0.0084 + +KINDERBALLETT +Def.: Ballett, in dem nur Kinder tanzen +Definitional bias: -0.0130 + +KINDERTHEATER +Def.: Theater, in dem Stücke für Kinder gespielt werden +Def.: Theater für Kinder +Definitional bias: -0.0186 + +KINDERFERNSEHEN +Def.: für Kinder produzierte Fernsehsendung; für Kinder gestaltetes Fernsehprogramm +Def.: Abteilung einer Fernsehanstalt, in der die Sendungen für Kinder produziert werden +Definitional bias: -0.0268 + +KINDERKANAL +Def.: Radio-, Fernsehprogramm für Kinder und Jugendliche +Definitional bias: -0.0302 + +KINDERSTATION +Def.: Station für Kinder in einem Krankenhaus +Definitional bias: -0.0414 + +NIKOLAUSTAG +Def.: Tag (6. Dezember), an dem die Kinder vom Nikolaus beschenkt werden +Definitional bias: -0.0448 + +PIONIERORGANISATION +Def.: kommunistische Massenorganisation für Kinder zwischen 6 und 14 Jahren +Definitional bias: -0.0482 + +KINDERLITERATUR +Def.: Literatur für Kinder +Definitional bias: -0.0495 + +BÄRENFELL +Def.: Fell eines Bären +Definitional bias: -0.0569 + diff --git "a/DD-GloVe/seed_word_tests/t\303\274rke_seedwords_gloss5.txt" "b/DD-GloVe/seed_word_tests/t\303\274rke_seedwords_gloss5.txt" new file mode 100644 index 0000000000000000000000000000000000000000..3e8d86ad476295e642d8ef6c42a2084306616c9f --- /dev/null +++ "b/DD-GloVe/seed_word_tests/t\303\274rke_seedwords_gloss5.txt" @@ -0,0 +1,45 @@ +BIENENSTOCK +Def.: kastenförmiges Behältnis, das als Behausung für ein Bienenvolk dient +Definitional bias: 0.0011 + +INFORMATIONSVERANSTALTUNG +Def.: Veranstaltung, die der Information dient +Definitional bias: 0.0065 + +WANDSCHMUCK +Def.: etwas, was zum Schmuck einer Wand dient +Definitional bias: 0.0169 + +ABSPERRGITTER +Def.: Gitter, das dazu dient, etwas abzusperren +Definitional bias: 0.0176 + +TÜRKE +Def.: Einwohnerbezeichnung +Def.: etwas, was dazu dient, etwas nicht Vorhandenes, einen nicht existierenden Sachverhalt vorzuspiegeln +Def.: wie eine dokumentarische Aufnahme präsentierte, in Wahrheit aber nachgestellte Aufnahme +Definitional bias: 0.0625 + +SELBSTINSZENIERUNG +Def.: das Sich-selbst-in-Szene-Setzen +Def.: Handlung, Äußerung, die der Selbstinszenierung dient +Definitional bias: 0.0651 + +ORIENTIERUNGSHILFE +Def.: etwas, was der Orientierung, dem Sichorientieren dient +Definitional bias: 0.0724 + +WÄRMEDÄMMUNG +Def.: Schutz gegen Wärme oder gegen Wärmeverluste +Def.: etwas, was zur Wärmedämmung dient +Definitional bias: 0.0997 + +HERRSCHAFTSINSTRUMENT +Def.: Mittel, das dazu dient, etwas, jemanden zu beherrschen +Definitional bias: 0.1146 + +FUNKTIONÄR +Def.: hauptberuflicher oder ehrenamtlicher Beauftragter eines politischen, wirtschaftlichen, sozialen oder sportlichen Verbandes, der in Abhängigkeit von einer solchen Organisation handelt und ihren Interessen dient +Def.: Beamter +Definitional bias: 0.1330 + diff --git "a/DD-GloVe/seed_word_tests/t\303\274rke_seedwords_top10.txt" "b/DD-GloVe/seed_word_tests/t\303\274rke_seedwords_top10.txt" new file mode 100644 index 0000000000000000000000000000000000000000..d1dc405b5ace527c85442416272fb793c1a6405a --- /dev/null +++ "b/DD-GloVe/seed_word_tests/t\303\274rke_seedwords_top10.txt" @@ -0,0 +1,47 @@ +ZIEHHARMONIKA +Def.: einfachere Handharmonika +Definitional bias: -0.0178 + +NACHBAU +Def.: das Nachbauen +Def.: das Nachgebaute +Definitional bias: 0.0005 + +BLUTZUCKER +Def.: im Blutserum vorhandener Traubenzucker +Definitional bias: 0.0255 + +KONSTRUKT +Def.: Arbeitshypothese oder gedankliche Hilfskonstruktion für die Beschreibung erschlossener Phänomene +Def.: etwas Konstruiertes; Konstruktion +Definitional bias: 0.0280 + +TÜRKE +Def.: Einwohnerbezeichnung +Def.: etwas, was dazu dient, etwas nicht Vorhandenes, einen nicht existierenden Sachverhalt vorzuspiegeln +Def.: wie eine dokumentarische Aufnahme präsentierte, in Wahrheit aber nachgestellte Aufnahme +Definitional bias: 0.0299 + +VERSTEIFUNG +Def.: das Versteifen, Sichversteifen; das Versteiftwerden +Def.: etwas, was dazu dient, etwas zu versteifen +Definitional bias: 0.0509 + +VORFERTIGUNG +Def.: das Vorfertigen +Def.: das Vorgefertigte +Definitional bias: 0.0641 + +PARADOXIE +Def.: paradoxer Sachverhalt; etwas Widersinniges, Widersprüchliches +Definitional bias: 0.0847 + +BRAUCHBARKEIT +Def.: das Brauchbarsein; Nutzen +Def.: etwas Brauchbares +Definitional bias: 0.0868 + +PROVISORISCH +Def.: nur als einstweiliger Notbehelf, nur zur Überbrückung eines noch nicht endgültigen Zustands dienend; vorläufig; behelfsmäßig +Definitional bias: 0.1034 + diff --git a/DD-GloVe/src/README.md b/DD-GloVe/src/README.md new file mode 100755 index 0000000000000000000000000000000000000000..61a2dd0007133b4bac6774158695e429db72137b --- /dev/null +++ b/DD-GloVe/src/README.md @@ -0,0 +1,17 @@ +### Package Contents + +To train your own GloVe vectors, first you'll need to prepare your corpus as a single text file with all words separated by one or more spaces or tabs. If your corpus has multiple documents, the documents (only) should be separated by new line characters. Cooccurrence contexts for words do not extend past newline characters. Once you create your corpus, you can train GloVe vectors using the following 4 tools. An example is included in `demo.sh`, which you can modify as necessary. + +The four main tools in this package are: + +#### 1) vocab_count +This tool requires an input corpus that should already consist of whitespace-separated tokens. Use something like the [Stanford Tokenizer](https://nlp.stanford.edu/software/tokenizer.html) first on raw text. From the corpus, it constructs unigram counts from a corpus, and optionally thresholds the resulting vocabulary based on total vocabulary size or minimum frequency count. + +#### 2) cooccur +Constructs word-word cooccurrence statistics from a corpus. The user should supply a vocabulary file, as produced by `vocab_count`, and may specify a variety of parameters, as described by running `./build/cooccur`. + +#### 3) shuffle +Shuffles the binary file of cooccurrence statistics produced by `cooccur`. For large files, the file is automatically split into chunks, each of which is shuffled and stored on disk before being merged and shuffled together. The user may specify a number of parameters, as described by running `./build/shuffle`. + +#### 4) glove +Train the GloVe model on the specified cooccurrence data, which typically will be the output of the `shuffle` tool. The user should supply a vocabulary file, as given by `vocab_count`, and may specify a number of other parameters, which are described by running `./build/glove`. diff --git a/DD-GloVe/src/common.c b/DD-GloVe/src/common.c new file mode 100755 index 0000000000000000000000000000000000000000..895b724c4839bb9f4ca342cee59f4c454e7c4f8b --- /dev/null +++ b/DD-GloVe/src/common.c @@ -0,0 +1,154 @@ +// Common code for cooccur.c, vocab_count.c, +// glove.c and shuffle.c +// +// GloVe: Global Vectors for Word Representation +// Copyright (c) 2014 The Board of Trustees of +// The Leland Stanford Junior University. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +// For more information, bug reports, fixes, contact: +// Jeffrey Pennington (jpennin@stanford.edu) +// Christopher Manning (manning@cs.stanford.edu) +// https://github.com/stanfordnlp/GloVe/ +// GlobalVectors@googlegroups.com +// http://nlp.stanford.edu/projects/glove/ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include "common.h" + +#ifdef _MSC_VER +#define STRERROR(ERRNO, BUF, BUFSIZE) strerror_s((BUF), (BUFSIZE), (ERRNO)) +#else +#define STRERROR(ERRNO, BUF, BUFSIZE) strerror_r((ERRNO), (BUF), (BUFSIZE)) +#endif + +/* Efficient string comparison */ +int scmp( char *s1, char *s2 ) { + while (*s1 != '\0' && *s1 == *s2) {s1++; s2++;} + return (*s1 - *s2); +} + +/* Move-to-front hashing and hash function from Hugh Williams, http://www.seg.rmit.edu.au/code/zwh-ipl/ */ + +/* Simple bitwise hash function */ +unsigned int bitwisehash(char *word, int tsize, unsigned int seed) { + char c; + unsigned int h; + h = seed; + for ( ; (c = *word) != '\0'; word++) h ^= ((h << 5) + c + (h >> 2)); + return (unsigned int)((h & 0x7fffffff) % tsize); +} + +/* Create hash table, initialise pointers to NULL */ +HASHREC ** inithashtable() { + int i; + HASHREC **ht; + ht = (HASHREC **) malloc( sizeof(HASHREC *) * TSIZE ); + for (i = 0; i < TSIZE; i++) ht[i] = (HASHREC *) NULL; + return ht; +} + +/* Read word from input stream. Return 1 when encounter '\n' or EOF (but separate from word), 0 otherwise. + Words can be separated by space(s), tab(s), or newline(s). Carriage return characters are just ignored. + (Okay for Windows, but not for Mac OS 9-. Ignored even if by themselves or in words.) + A newline is taken as indicating a new document (contexts won't cross newline). + Argument word array is assumed to be of size MAX_STRING_LENGTH. + words will be truncated if too long. They are truncated with some care so that they + cannot truncate in the middle of a utf-8 character, but + still little to no harm will be done for other encodings like iso-8859-1. + (This function appears identically copied in vocab_count.c and cooccur.c.) + */ +int get_word(char *word, FILE *fin) { + int i = 0, ch; + for ( ; ; ) { + ch = fgetc(fin); + if (ch == '\r') continue; + if (i == 0 && ((ch == '\n') || (ch == EOF))) { + word[i] = 0; + return 1; + } + if (i == 0 && ((ch == ' ') || (ch == '\t'))) continue; // skip leading space + if ((ch == EOF) || (ch == ' ') || (ch == '\t') || (ch == '\n')) { + if (ch == '\n') ungetc(ch, fin); // return the newline next time as document ender + break; + } + if (i < MAX_STRING_LENGTH - 1) + word[i++] = ch; // don't allow words to exceed MAX_STRING_LENGTH + } + word[i] = 0; //null terminate + // avoid truncation destroying a multibyte UTF-8 char except if only thing on line (so the i > x tests won't overwrite word[0]) + // see https://en.wikipedia.org/wiki/UTF-8#Description + if (i == MAX_STRING_LENGTH - 1 && (word[i-1] & 0x80) == 0x80) { + if ((word[i-1] & 0xC0) == 0xC0) { + word[i-1] = '\0'; + } else if (i > 2 && (word[i-2] & 0xE0) == 0xE0) { + word[i-2] = '\0'; + } else if (i > 3 && (word[i-3] & 0xF8) == 0xF0) { + word[i-3] = '\0'; + } + } + return 0; +} + +int find_arg(char *str, int argc, char **argv) { + int i; + for (i = 1; i < argc; i++) { + if (!scmp(str, argv[i])) { + if (i == argc - 1) { + printf("No argument given for %s\n", str); + exit(1); + } + return i; + } + } + return -1; +} + +void free_table(HASHREC **ht) { + int i; + HASHREC* current; + HASHREC* tmp; + for (i = 0; i < TSIZE; i++) { + current = ht[i]; + while (current != NULL) { + tmp = current; + current = current->next; + free(tmp->word); + free(tmp); + } + } + free(ht); +} + +void free_fid(FILE **fid, const int num) { + int i; + for(i = 0; i < num; i++) { + if(fid[i] != NULL) + fclose(fid[i]); + } + free(fid); +} + + +int log_file_loading_error(char *file_description, char *file_name) { + fprintf(stderr, "Unable to open %s %s.\n", file_description, file_name); + fprintf(stderr, "Errno: %d\n", errno); + char error[MAX_STRING_LENGTH]; + STRERROR(errno, error, MAX_STRING_LENGTH); + fprintf(stderr, "Error description: %s\n", error); + return errno; +} diff --git a/DD-GloVe/src/common.h b/DD-GloVe/src/common.h new file mode 100755 index 0000000000000000000000000000000000000000..41c78afdedb41221b16e57614568e421c9692c6a --- /dev/null +++ b/DD-GloVe/src/common.h @@ -0,0 +1,63 @@ +#ifndef COMMON_H +#define COMMON_H + +// Common code for cooccur.c, vocab_count.c, +// glove.c and shuffle.c +// +// GloVe: Global Vectors for Word Representation +// Copyright (c) 2014 The Board of Trustees of +// The Leland Stanford Junior University. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +// For more information, bug reports, fixes, contact: +// Jeffrey Pennington (jpennin@stanford.edu) +// Christopher Manning (manning@cs.stanford.edu) +// https://github.com/stanfordnlp/GloVe/ +// GlobalVectors@googlegroups.com +// http://nlp.stanford.edu/projects/glove/ + +#include <stdio.h> + +#define MAX_STRING_LENGTH 1000 +#define TSIZE 1048576 +#define SEED 1159241 +#define HASHFN bitwisehash + +typedef double real; +typedef struct cooccur_rec { + int word1; + int word2; + real val; +} CREC; +typedef struct hashrec { + char *word; + long long num; //count or id + struct hashrec *next; +} HASHREC; + + +int scmp( char *s1, char *s2 ); +unsigned int bitwisehash(char *word, int tsize, unsigned int seed); +HASHREC **inithashtable(); +int get_word(char *word, FILE *fin); +void free_table(HASHREC **ht); +int find_arg(char *str, int argc, char **argv); +void free_fid(FILE **fid, const int num); + +// logs errors when loading files. call after a failed load +int log_file_loading_error(char *file_description, char *file_name); + +#endif /* COMMON_H */ + diff --git a/DD-GloVe/src/cooccur.c b/DD-GloVe/src/cooccur.c new file mode 100755 index 0000000000000000000000000000000000000000..4b3efd5125cb2ad2bd0f441fde262525d71beb22 --- /dev/null +++ b/DD-GloVe/src/cooccur.c @@ -0,0 +1,445 @@ +// Tool to calculate word-word cooccurrence statistics +// +// Copyright (c) 2014, 2018 The Board of Trustees of +// The Leland Stanford Junior University. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +// For more information, bug reports, fixes, contact: +// Jeffrey Pennington (jpennin@stanford.edu) +// Christopher Manning (manning@cs.stanford.edu) +// https://github.com/stanfordnlp/GloVe/ +// GlobalVectors@googlegroups.com +// http://nlp.stanford.edu/projects/glove/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include "common.h" + +typedef struct cooccur_rec_id { + int word1; + int word2; + real val; + int id; +} CRECID; + +int verbose = 3; // 0, 1, or 2 +long long max_product; // Cutoff for product of word frequency ranks below which cooccurrence counts will be stored in a compressed full array +long long overflow_length; // Number of cooccurrence records whose product exceeds max_product to store in memory before writing to disk +int window_size = 15; // default context window size +int symmetric = 1; // 0: asymmetric, 1: symmetric +real memory_limit = 3; // soft limit, in gigabytes, used to estimate optimal array sizes +int distance_weighting = 1; // Flag to control the distance weighting of cooccurrence counts +char *vocab_file, *file_head; + +/* Search hash table for given string, return record if found, else NULL */ +HASHREC *hashsearch(HASHREC **ht, char *w) { + HASHREC *htmp, *hprv; + unsigned int hval = HASHFN(w, TSIZE, SEED); + for (hprv = NULL, htmp=ht[hval]; htmp != NULL && scmp(htmp->word, w) != 0; hprv = htmp, htmp = htmp->next); + if ( htmp != NULL && hprv!=NULL ) { // move to front on access + hprv->next = htmp->next; + htmp->next = ht[hval]; + ht[hval] = htmp; + } + return(htmp); +} + +/* Insert string in hash table, check for duplicates which should be absent */ +void hashinsert(HASHREC **ht, char *w, long long id) { + HASHREC *htmp, *hprv; + unsigned int hval = HASHFN(w, TSIZE, SEED); + for (hprv = NULL, htmp = ht[hval]; htmp != NULL && scmp(htmp->word, w) != 0; hprv = htmp, htmp = htmp->next); + if (htmp == NULL) { + htmp = (HASHREC *) malloc(sizeof(HASHREC)); + htmp->word = (char *) malloc(strlen(w) + 1); + strcpy(htmp->word, w); + htmp->num = id; + htmp->next = NULL; + if (hprv == NULL) ht[hval] = htmp; + else hprv->next = htmp; + } + else fprintf(stderr, "Error, duplicate entry located: %s.\n",htmp->word); + return; +} + +/* Write sorted chunk of cooccurrence records to file, accumulating duplicate entries */ +int write_chunk(CREC *cr, long long length, FILE *fout) { + if (length == 0) return 0; + + long long a = 0; + CREC old = cr[a]; + + for (a = 1; a < length; a++) { + if (cr[a].word1 == old.word1 && cr[a].word2 == old.word2) { + old.val += cr[a].val; + continue; + } + fwrite(&old, sizeof(CREC), 1, fout); + old = cr[a]; + } + fwrite(&old, sizeof(CREC), 1, fout); + return 0; +} + +/* Check if two cooccurrence records are for the same two words, used for qsort */ +int compare_crec(const void *a, const void *b) { + int c; + if ( (c = ((CREC *) a)->word1 - ((CREC *) b)->word1) != 0) return c; + else return (((CREC *) a)->word2 - ((CREC *) b)->word2); + +} + +/* Check if two cooccurrence records are for the same two words */ +int compare_crecid(CRECID a, CRECID b) { + int c; + if ( (c = a.word1 - b.word1) != 0) return c; + else return a.word2 - b.word2; +} + +/* Swap two entries of priority queue */ +void swap_entry(CRECID *pq, int i, int j) { + CRECID temp = pq[i]; + pq[i] = pq[j]; + pq[j] = temp; +} + +/* Insert entry into priority queue */ +void insert(CRECID *pq, CRECID new, int size) { + int j = size - 1, p; + pq[j] = new; + while ( (p=(j-1)/2) >= 0 ) { + if (compare_crecid(pq[p],pq[j]) > 0) {swap_entry(pq,p,j); j = p;} + else break; + } +} + +/* Delete entry from priority queue */ +void delete(CRECID *pq, int size) { + int j, p = 0; + pq[p] = pq[size - 1]; + while ( (j = 2*p+1) < size - 1 ) { + if (j == size - 2) { + if (compare_crecid(pq[p],pq[j]) > 0) swap_entry(pq,p,j); + return; + } + else { + if (compare_crecid(pq[j], pq[j+1]) < 0) { + if (compare_crecid(pq[p],pq[j]) > 0) {swap_entry(pq,p,j); p = j;} + else return; + } + else { + if (compare_crecid(pq[p],pq[j+1]) > 0) {swap_entry(pq,p,j+1); p = j + 1;} + else return; + } + } + } +} + +/* Write top node of priority queue to file, accumulating duplicate entries */ +int merge_write(CRECID new, CRECID *old, FILE *fout) { + if (new.word1 == old->word1 && new.word2 == old->word2) { + old->val += new.val; + return 0; // Indicates duplicate entry + } + fwrite(old, sizeof(CREC), 1, fout); + *old = new; + return 1; // Actually wrote to file +} + +/* Merge [num] sorted files of cooccurrence records */ +int merge_files(int num) { + int i, size; + long long counter = 0; + CRECID *pq, new, old; + char filename[200]; + FILE **fid, *fout; + fid = calloc(num, sizeof(FILE)); + pq = malloc(sizeof(CRECID) * num); + fout = stdout; + if (verbose > 1) fprintf(stderr, "Merging cooccurrence files: processed 0 lines."); + + /* Open all files and add first entry of each to priority queue */ + for (i = 0; i < num; i++) { + sprintf(filename,"%s_%04d.bin",file_head,i); + fid[i] = fopen(filename,"rb"); + if (fid[i] == NULL) {log_file_loading_error("file", filename); free_fid(fid, num); free(pq); return 1;} + fread(&new, sizeof(CREC), 1, fid[i]); + new.id = i; + insert(pq,new,i+1); + } + + /* Pop top node, save it in old to see if the next entry is a duplicate */ + size = num; + old = pq[0]; + i = pq[0].id; + delete(pq, size); + fread(&new, sizeof(CREC), 1, fid[i]); + if (feof(fid[i])) size--; + else { + new.id = i; + insert(pq, new, size); + } + + /* Repeatedly pop top node and fill priority queue until files have reached EOF */ + while (size > 0) { + counter += merge_write(pq[0], &old, fout); // Only count the lines written to file, not duplicates + if ((counter%100000) == 0) if (verbose > 1) fprintf(stderr,"\033[39G%lld lines.",counter); + i = pq[0].id; + delete(pq, size); + fread(&new, sizeof(CREC), 1, fid[i]); + if (feof(fid[i])) size--; + else { + new.id = i; + insert(pq, new, size); + } + } + fwrite(&old, sizeof(CREC), 1, fout); + fprintf(stderr,"\033[0GMerging cooccurrence files: processed %lld lines.\n",++counter); + for (i=0;i<num;i++) { + sprintf(filename,"%s_%04d.bin",file_head,i); + remove(filename); + } + fprintf(stderr,"\n"); + free_fid(fid, num); + free(pq); + return 0; +} + +void free_resources(HASHREC** vocab_hash, CREC *cr, long long *lookup, + long long *history, real *bigram_table) { + free_table(vocab_hash); + free(cr); + free(lookup); + free(history); + free(bigram_table); +} + +/* Collect word-word cooccurrence counts from input stream */ +int get_cooccurrence() { + int flag, x, y, fidcounter = 1; + long long a, j = 0, k, id, counter = 0, ind = 0, vocab_size, w1, w2, *lookup = NULL, *history = NULL; + char format[20], filename[200], str[MAX_STRING_LENGTH + 1]; + FILE *fid, *foverflow; + real *bigram_table = NULL, r; + HASHREC *htmp, **vocab_hash = inithashtable(); + CREC *cr = malloc(sizeof(CREC) * (overflow_length + 1)); + history = malloc(sizeof(long long) * window_size); + + fprintf(stderr, "COUNTING COOCCURRENCES\n"); + if (verbose > 0) { + fprintf(stderr, "window size: %d\n", window_size); + if (symmetric == 0) fprintf(stderr, "context: asymmetric\n"); + else fprintf(stderr, "context: symmetric\n"); + } + if (verbose > 1) fprintf(stderr, "max product: %lld\n", max_product); + if (verbose > 1) fprintf(stderr, "overflow length: %lld\n", overflow_length); + sprintf(format,"%%%ds %%lld", MAX_STRING_LENGTH); // Format to read from vocab file, which has (irrelevant) frequency data + if (verbose > 1) fprintf(stderr, "Reading vocab from file \"%s\"...", vocab_file); + fid = fopen(vocab_file,"r"); + if (fid == NULL) { + log_file_loading_error("vocab file", vocab_file); + free_resources(vocab_hash, cr, lookup, history, bigram_table); + return 1; + } + while (fscanf(fid, format, str, &id) != EOF) hashinsert(vocab_hash, str, ++j); // Here id is not used: inserting vocab words into hash table with their frequency rank, j + fclose(fid); + vocab_size = j; + j = 0; + if (verbose > 1) fprintf(stderr, "loaded %lld words.\nBuilding lookup table...", vocab_size); + + /* Build auxiliary lookup table used to index into bigram_table */ + lookup = (long long *)calloc( vocab_size + 1, sizeof(long long) ); + if (lookup == NULL) { + fprintf(stderr, "Couldn't allocate memory!"); + free_resources(vocab_hash, cr, lookup, history, bigram_table); + return 1; + } + lookup[0] = 1; + for (a = 1; a <= vocab_size; a++) { + if ((lookup[a] = max_product / a) < vocab_size) lookup[a] += lookup[a-1]; + else lookup[a] = lookup[a-1] + vocab_size; + } + if (verbose > 1) fprintf(stderr, "table contains %lld elements.\n",lookup[a-1]); + + /* Allocate memory for full array which will store all cooccurrence counts for words whose product of frequency ranks is less than max_product */ + bigram_table = (real *)calloc( lookup[a-1] , sizeof(real) ); + if (bigram_table == NULL) { + fprintf(stderr, "Couldn't allocate memory!"); + free_resources(vocab_hash, cr, lookup, history, bigram_table); + return 1; + } + + fid = stdin; + // sprintf(format,"%%%ds",MAX_STRING_LENGTH); + sprintf(filename,"%s_%04d.bin", file_head, fidcounter); + foverflow = fopen(filename,"wb"); + if (verbose > 1) fprintf(stderr,"Processing token: 0"); + + // if symmetric > 0, we can increment ind twice per iteration, + // meaning up to 2x window_size in one loop + int overflow_threshold = symmetric == 0 ? overflow_length - window_size : overflow_length - 2 * window_size; + + /* For each token in input stream, calculate a weighted cooccurrence sum within window_size */ + while (1) { + if (ind >= overflow_threshold) { + // If overflow buffer is (almost) full, sort it and write it to temporary file + qsort(cr, ind, sizeof(CREC), compare_crec); + write_chunk(cr,ind,foverflow); + fclose(foverflow); + fidcounter++; + sprintf(filename,"%s_%04d.bin",file_head,fidcounter); + foverflow = fopen(filename,"wb"); + ind = 0; + } + flag = get_word(str, fid); + if (verbose > 2) fprintf(stderr, "Maybe processing token: %s\n", str); + if (flag == 1) { + // Newline, reset line index (j); maybe eof. + if (feof(fid)) { + if (verbose > 2) fprintf(stderr, "Not getting coocurs as at eof\n"); + break; + } + j = 0; + if (verbose > 2) fprintf(stderr, "Not getting coocurs as at newline\n"); + continue; + } + counter++; + if ((counter%100000) == 0) if (verbose > 1) fprintf(stderr,"\033[19G%lld",counter); + htmp = hashsearch(vocab_hash, str); + if (htmp == NULL) { + if (verbose > 2) fprintf(stderr, "Not getting coocurs as word not in vocab\n"); + continue; // Skip out-of-vocabulary words + } + w2 = htmp->num; // Target word (frequency rank) + for (k = j - 1; k >= ( (j > window_size) ? j - window_size : 0 ); k--) { // Iterate over all words to the left of target word, but not past beginning of line + w1 = history[k % window_size]; // Context word (frequency rank) + if (verbose > 2) fprintf(stderr, "Adding cooccur between words %lld and %lld.\n", w1, w2); + if ( w1 < max_product/w2 ) { // Product is small enough to store in a full array + bigram_table[lookup[w1-1] + w2 - 2] += distance_weighting ? 1.0/((real)(j-k)) : 1.0; // Weight by inverse of distance between words if needed + if (symmetric > 0) bigram_table[lookup[w2-1] + w1 - 2] += distance_weighting ? 1.0/((real)(j-k)) : 1.0; // If symmetric context is used, exchange roles of w2 and w1 (ie look at right context too) + } + else { // Product is too big, data is likely to be sparse. Store these entries in a temporary buffer to be sorted, merged (accumulated), and written to file when it gets full. + cr[ind].word1 = w1; + cr[ind].word2 = w2; + cr[ind].val = distance_weighting ? 1.0/((real)(j-k)) : 1.0; + ind++; // Keep track of how full temporary buffer is + if (symmetric > 0) { // Symmetric context + cr[ind].word1 = w2; + cr[ind].word2 = w1; + cr[ind].val = distance_weighting ? 1.0/((real)(j-k)) : 1.0; + ind++; + } + } + } + history[j % window_size] = w2; // Target word is stored in circular buffer to become context word in the future + j++; + } + + /* Write out temp buffer for the final time (it may not be full) */ + if (verbose > 1) fprintf(stderr,"\033[0GProcessed %lld tokens.\n",counter); + qsort(cr, ind, sizeof(CREC), compare_crec); + write_chunk(cr,ind,foverflow); + sprintf(filename,"%s_0000.bin",file_head); + + /* Write out full bigram_table, skipping zeros */ + if (verbose > 1) fprintf(stderr, "Writing cooccurrences to disk"); + fid = fopen(filename,"wb"); + j = 1e6; + for (x = 1; x <= vocab_size; x++) { + if ( (long long) (0.75*log(vocab_size / x)) < j) { + j = (long long) (0.75*log(vocab_size / x)); + if (verbose > 1) fprintf(stderr,"."); + } // log's to make it look (sort of) pretty + for (y = 1; y <= (lookup[x] - lookup[x-1]); y++) { + if ((r = bigram_table[lookup[x-1] - 2 + y]) != 0) { + fwrite(&x, sizeof(int), 1, fid); + fwrite(&y, sizeof(int), 1, fid); + fwrite(&r, sizeof(real), 1, fid); + } + } + } + + if (verbose > 1) fprintf(stderr,"%d files in total.\n",fidcounter + 1); + fclose(fid); + fclose(foverflow); + free_resources(vocab_hash, cr, lookup, history, bigram_table); + return merge_files(fidcounter + 1); // Merge the sorted temporary files +} + +int main(int argc, char **argv) { + int i; + real rlimit, n = 1e5; + vocab_file = malloc(sizeof(char) * MAX_STRING_LENGTH); + file_head = malloc(sizeof(char) * MAX_STRING_LENGTH); + + if (argc == 1) { + printf("Tool to calculate word-word cooccurrence statistics\n"); + printf("Author: Jeffrey Pennington (jpennin@stanford.edu)\n\n"); + printf("Usage options:\n"); + printf("\t-verbose <int>\n"); + printf("\t\tSet verbosity: 0, 1, 2 (default), or 3\n"); + printf("\t-symmetric <int>\n"); + printf("\t\tIf <int> = 0, only use left context; if <int> = 1 (default), use left and right\n"); + printf("\t-window-size <int>\n"); + printf("\t\tNumber of context words to the left (and to the right, if symmetric = 1); default 15\n"); + printf("\t-vocab-file <file>\n"); + printf("\t\tFile containing vocabulary (truncated unigram counts, produced by 'vocab_count'); default vocab.txt\n"); + printf("\t-memory <float>\n"); + printf("\t\tSoft limit for memory consumption, in GB -- based on simple heuristic, so not extremely accurate; default 4.0\n"); + printf("\t-max-product <int>\n"); + printf("\t\tLimit the size of dense cooccurrence array by specifying the max product <int> of the frequency counts of the two cooccurring words.\n\t\tThis value overrides that which is automatically produced by '-memory'. Typically only needs adjustment for use with very large corpora.\n"); + printf("\t-overflow-length <int>\n"); + printf("\t\tLimit to length <int> the sparse overflow array, which buffers cooccurrence data that does not fit in the dense array, before writing to disk. \n\t\tThis value overrides that which is automatically produced by '-memory'. Typically only needs adjustment for use with very large corpora.\n"); + printf("\t-overflow-file <file>\n"); + printf("\t\tFilename, excluding extension, for temporary files; default overflow\n"); + printf("\t-distance-weighting <int>\n"); + printf("\t\tIf <int> = 0, do not weight cooccurrence count by distance between words; if <int> = 1 (default), weight the cooccurrence count by inverse of distance between words\n"); + + printf("\nExample usage:\n"); + printf("./cooccur -verbose 2 -symmetric 0 -window-size 10 -vocab-file vocab.txt -memory 8.0 -overflow-file tempoverflow < corpus.txt > cooccurrences.bin\n\n"); + free(vocab_file); + free(file_head); + return 0; + } + + if ((i = find_arg((char *)"-verbose", argc, argv)) > 0) verbose = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-symmetric", argc, argv)) > 0) symmetric = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-window-size", argc, argv)) > 0) window_size = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-vocab-file", argc, argv)) > 0) strcpy(vocab_file, argv[i + 1]); + else strcpy(vocab_file, (char *)"vocab.txt"); + if ((i = find_arg((char *)"-overflow-file", argc, argv)) > 0) strcpy(file_head, argv[i + 1]); + else strcpy(file_head, (char *)"overflow"); + if ((i = find_arg((char *)"-memory", argc, argv)) > 0) memory_limit = atof(argv[i + 1]); + if ((i = find_arg((char *)"-distance-weighting", argc, argv)) > 0) distance_weighting = atoi(argv[i + 1]); + + /* The memory_limit determines a limit on the number of elements in bigram_table and the overflow buffer */ + /* Estimate the maximum value that max_product can take so that this limit is still satisfied */ + rlimit = 0.85 * (real)memory_limit * 1073741824/(sizeof(CREC)); + while (fabs(rlimit - n * (log(n) + 0.1544313298)) > 1e-3) n = rlimit / (log(n) + 0.1544313298); + max_product = (long long) n; + overflow_length = (long long) rlimit/6; // 0.85 + 1/6 ~= 1 + + /* Override estimates by specifying limits explicitly on the command line */ + if ((i = find_arg((char *)"-max-product", argc, argv)) > 0) max_product = atoll(argv[i + 1]); + if ((i = find_arg((char *)"-overflow-length", argc, argv)) > 0) overflow_length = atoll(argv[i + 1]); + + const int returned_value = get_cooccurrence(); + free(vocab_file); + free(file_head); + return returned_value; +} + diff --git a/DD-GloVe/src/glove.c b/DD-GloVe/src/glove.c new file mode 100755 index 0000000000000000000000000000000000000000..0f6d63d4f24826f123f1e0d0c4c4f44bb29e0a26 --- /dev/null +++ b/DD-GloVe/src/glove.c @@ -0,0 +1,1199 @@ +// Author: Haozhe An (haozhe@umd.edu) +// Implementation of DD-GloVe, a train-time debiasing algorithm to learn GloVe +// word embeddings by leveraging dictionary definitions. +// Our work is to appear in Findings of ACL 2022. +// +// The code is adapted from the original release of GloVe. +// ========== Begin of original message from the GloVe authors ========== +// GloVe: Global Vectors for Word Representation +// +// Copyright (c) 2014 The Board of Trustees of +// The Leland Stanford Junior University. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +// For more information, bug reports, fixes, contact: +// Jeffrey Pennington (jpennin@stanford.edu) +// GlobalVectors@googlegroups.com +// http://nlp.stanford.edu/projects/glove/ +// ========== End of original message from the GloVe authors ========== + +#define _CRT_SECURE_NO_WARNINGS + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <time.h> + +// windows pthread.h is buggy, but this #define fixes it +#define HAVE_STRUCT_TIMESPEC +#include <pthread.h> + +#include "common.h" + +#define _FILE_OFFSET_BITS 64 + +int write_header = 0; //0=no, 1=yes; writes vocab_size/vector_size as first line for use with some libraries, such as gensim. +int verbose = 2; // 0, 1, or 2 +int seed = 0; +int use_unk_vec = 1; // 0 or 1 +int num_threads = 8; // pthreads +int num_iter = 25; // Number of full passes through cooccurrence matrix +int vector_size = 50; // Word vector size +int save_gradsq = 0; // By default don't save squared gradient values +int use_binary = 0; // 0: save as text files; 1: save as binary; 2: both. For binary, save both word and context word vectors. +int model = 2; // For text file output only. 0: concatenate word and context vectors (and biases) i.e. save everything; 1: Just save word vectors (no bias); 2: Save (word + context word) vectors (no biases) +int checkpoint_every = 0; // checkpoint the model for every checkpoint_every iterations. Do nothing if checkpoint_every <= 0 +int load_init_param = 0; // if 1 initial paramters are loaded from -init-param-file +int save_init_param = 0; // if 1 initial paramters are saved (i.e., in the 0 checkpoint) +int load_init_gradsq = 0; // if 1 initial squared gradients are loaded from -init-gradsq-file +real eta = 0.05; // Initial learning rate +real alpha = 0.75, x_max = 100.0; // Weighting function parameters, not extremely sensitive to corpus, though may need adjustment for very small or very large corpora +real grad_clip_value = 100.0; // Clipping parameter for gradient components. Values will be clipped to [-grad_clip_value, grad_clip_value] interval. +real *W, *gradsq, *cost, *def_cost, *ortho_cost, *proj_cost, *context_ortho_cost; +long long num_lines, *lines_per_thread, vocab_size; +char vocab_file[MAX_STRING_LENGTH]; +char input_file[MAX_STRING_LENGTH]; +char save_W_file[MAX_STRING_LENGTH]; +char save_gradsq_file[MAX_STRING_LENGTH]; +char init_param_file[MAX_STRING_LENGTH]; +char init_gradsq_file[MAX_STRING_LENGTH]; + +char definition_file[MAX_STRING_LENGTH]; +int **word_to_def; +real lambda = 0.001, beta = 0.001, gamma_ = 0.001; // Hyper-param to balance weights +int def_word_num, use_def_loss = 1, use_ortho_loss = 1, use_proj_loss = 1; // Indicate if using the loss or not +int SEED_WORD_1 = 60, SEED_WORD_2 = 12204; // Hard-coded indices: deutscher(60)/türke(12204)/he(16)/she(41)/pole(5882)/italiener(3404) +int seed_1_top_num = 10, seed_2_top_num = 10; +long long cap = 399999; // Must be smaller than vocab size. + +// For bias-aware weight schedule and finding bias direction from initial seeds +real *bias_word_value, *bias_def_value, *context_bias_word_value, *context_bias_def_value; +real *bias_word_value_temp, *bias_def_value_temp, *context_bias_word_value_temp, *context_bias_def_value_temp; +long long *boy_candidate_words, *girl_candidate_words; +int girl_added = 0, boy_added = 0; + +/** + * Loads a save file for use as the initial values for the parameters or gradsq + * Return value: 0 if success, -1 if fail + */ +int load_init_file(char *file_name, real *array, long long array_size) { + FILE *fin; + long long a; + fin = fopen(file_name, "rb"); + if (fin == NULL) { + log_file_loading_error("init file", file_name); + return -1; + } + for (a = 0; a < array_size; a++) { + if (feof(fin)) { + fprintf(stderr, "EOF reached before data fully loaded in %s.\n", file_name); + fclose(fin); + return -1; + } + fread(&array[a], sizeof(real), 1, fin); + } + fclose(fin); + return 0; +} + +void initialize_parameters() { + // TODO: return an error code when an error occurs, clean up in the calling routine + if (seed == 0) { + seed = time(0); + } + fprintf(stderr, "Using random seed %d\n", seed); + srand(seed); + long long a; + long long W_size = 2 * vocab_size * (vector_size + 1); // +1 to allocate space for bias + + /* Allocate space for word vectors and context word vectors, and correspodning gradsq */ + a = posix_memalign((void **)&W, 128, W_size * sizeof(real)); // Might perform better than malloc + if (W == NULL) { + fprintf(stderr, "Error allocating memory for W\n"); + exit(1); + } + a = posix_memalign((void **)&gradsq, 128, W_size * sizeof(real)); // Might perform better than malloc + if (gradsq == NULL) { + fprintf(stderr, "Error allocating memory for gradsq\n"); + free(W); + exit(1); + } + if (load_init_param) { + // Load existing parameters + fprintf(stderr, "\nLoading initial parameters from %s \n", init_param_file); + if (load_init_file(init_param_file, W, W_size)) { + free(W); + free(gradsq); + exit(1); + } + } else { + // Initialize new parameters + for (a = 0; a < W_size; ++a) { + W[a] = (rand() / (real)RAND_MAX - 0.5) / vector_size; + } + } + + if (load_init_gradsq) { + // Load existing squared gradients + fprintf(stderr, "\nLoading initial squared gradients from %s \n", init_gradsq_file); + if (load_init_file(init_gradsq_file, gradsq, W_size)) { + free(W); + free(gradsq); + exit(1); + } + } else { + // Initialize new squared gradients + for (a = 0; a < W_size; ++a) { + gradsq[a] = 1.0; // So initial value of eta is equal to initial learning rate + } + } +} + +inline real check_nan(real update) { + if (isnan(update) || isinf(update)) { + fprintf(stderr,"\ncaught NaN in update"); + return 0.; + } else { + return update; + } +} + +int compare( const void* a, const void* b) { + // usage: qsort( a, 6, sizeof(real), compare ) a is array; 6 is array length + real real_a = * ( (real*) a ); + real real_b = * ( (real*) b ); + + // an easy expression for comparing + return (real_a > real_b) - (real_a < real_b); +} + +/* Train the GloVe model */ +void *glove_thread(void *vid) { + long long a, b ,l1, l2; + long long id = *(long long*)vid; + CREC cr; + real diff, fdiff, temp1, temp2, diff_def, diff_ortho, diff_proj; + FILE *fin; + + fin = fopen(input_file, "rb"); + if (fin == NULL) { + // TODO: exit all the threads or somehow mark that glove failed + log_file_loading_error("input file", input_file); + pthread_exit(NULL); + } + fseeko(fin, (num_lines / num_threads * id) * (sizeof(CREC)), SEEK_SET); //Threads spaced roughly equally throughout file + cost[id] = 0, def_cost[id] = 0, ortho_cost[id] = 0, proj_cost[id] = 0, context_ortho_cost[id] = 0; + + real* W_updates1 = (real*)malloc(vector_size * sizeof(real)); + if (NULL == W_updates1){ + fclose(fin); + pthread_exit(NULL); + } + real* W_updates2 = (real*)malloc(vector_size * sizeof(real)); + if (NULL == W_updates2){ + fclose(fin); + free(W_updates1); + pthread_exit(NULL); + } + + long long fweight_increased = 0, fweight_decreased = 0; + real fweight_total = 0, fweight_max = -1, fweight_min = 1; + for (a = 0; a < lines_per_thread[id]; a++) { + fread(&cr, sizeof(CREC), 1, fin); + if (feof(fin)) break; + if (cr.word1 < 1 || cr.word2 < 1) { continue; } + + /* Get location of words in W & gradsq */ + l1 = (cr.word1 - 1LL) * (vector_size + 1); // cr word indices start at 1 + l2 = ((cr.word2 - 1LL) + vocab_size) * (vector_size + 1); // shift by vocab_size to get separate vectors for context words + + /* Calculate cost, save diff for gradients */ + diff = 0; + for (b = 0; b < vector_size; b++) diff += W[b + l1] * W[b + l2]; // dot product of word and context word vector + diff += W[vector_size + l1] + W[vector_size + l2] - log(cr.val); // add separate bias for each word + fdiff = (cr.val > x_max) ? diff : pow(cr.val / x_max, alpha) * diff; // multiply weighting function (f) with diff + + // Bias-aware co-occurrence weight schedule + real word1_bias_difference = bias_word_value[cr.word1 - 1LL] - bias_def_value[cr.word1 - 1LL]; + real word2_bias_difference = context_bias_word_value[cr.word2 - 1LL] - context_bias_def_value[cr.word2 - 1LL]; + real fweight = 1, difference_thresh = 0.08; + + if (fabs(word1_bias_difference) > difference_thresh || fabs(word2_bias_difference) > difference_thresh) { + real sign_word_bias_1 = bias_word_value[cr.word1 - 1LL] > 0 ? 1 : -1; + real sign_word_bias_2 = context_bias_word_value[cr.word2 - 1LL] > 0 ? 1 : -1; + real max_difference = fabs(word1_bias_difference) > fabs(word2_bias_difference) ? fabs(word1_bias_difference) : fabs(word2_bias_difference); + fweight = 1 - sign_word_bias_1 * sign_word_bias_2 * max_difference * 0.4; + } + + fdiff *= fweight; + if (fweight > 1) fweight_increased++; + if (fweight < 1) fweight_decreased++; + fweight_total += fweight; + if (fweight > fweight_max) fweight_max = fweight; + if (fweight < fweight_min) fweight_min = fweight; + + // Check for NaN and inf() in the diffs. + if (isnan(diff) || isnan(fdiff) || isinf(diff) || isinf(fdiff)) { + fprintf(stderr,"Caught NaN in diff for kdiff for thread. Skipping update\n"); + fprintf(stderr, "l1 %lld, l2 %lld\n", l1, l2); + continue; + } + cost[id] += 0.5 * fdiff * diff; // weighted squared error + + /* Adaptive gradient updates*/ + fdiff *= eta; + for(b = 0; b < vector_size; b++) { + // learning rate times gradient for word vectors + temp1 = fdiff * W[b + l2]; + temp2 = fdiff * W[b + l1]; + // adaptive updates + W[b + l1] -= temp1 / sqrt(gradsq[b + l1]); + W[b + l2] -= temp2 / sqrt(gradsq[b + l2]); + gradsq[b + l1] += temp1 * temp1; + gradsq[b + l2] += temp2 * temp2; + + // Clip values to avoid numerical difficulties + if (W[b + l1] > 1) W[b + l1] = 1; + if (W[b + l1] < -1) W[b + l1] = -1; + if (W[b + l2] > 1) W[b + l2] = 1; + if (W[b + l2] < -1) W[b + l2] = -1; + } + + // updates for bias terms + W[vector_size + l1] -= fdiff / sqrt(gradsq[vector_size + l1]); + W[vector_size + l2] -= fdiff / sqrt(gradsq[vector_size + l2]); + fdiff *= fdiff; + gradsq[vector_size + l1] += fdiff; + gradsq[vector_size + l2] += fdiff; + } // Finish original GloVe cost updates + + // Print out seed word IDs + if (id == 0) { + fprintf(stderr, "%d girls words (id): ", girl_added); + for (int c = 0; c < girl_added; c++) { + fprintf(stderr, "%lld, ", girl_candidate_words[c]); + } + fprintf(stderr, "\n"); + + fprintf(stderr, "%d boys words (id): ", boy_added); + for (int c = 0; c < boy_added; c++) { + fprintf(stderr, "%lld, ", boy_candidate_words[c]); + } + fprintf(stderr, "\n"); + } + + real debias_direction[vector_size], context_debias_direction[vector_size]; + for (int c = 0; c < vector_size; c++) { + debias_direction[c] = 0; + context_debias_direction[c] = 0; + + // Use candidates words to approximate bias direction + for (int d = 0; d < boy_added; d++) { + debias_direction[c] += W[c + boy_candidate_words[d] * (vector_size + 1)] / (boy_added + 1); + context_debias_direction[c] += W[c + (boy_candidate_words[d] + vocab_size) * (vector_size + 1)] / (boy_added + 1); + } + debias_direction[c] += W[c + SEED_WORD_1 * (vector_size + 1)] / (boy_added + 1); + context_debias_direction[c] += W[c + (SEED_WORD_1 + vocab_size) * (vector_size + 1)] / (boy_added + 1); + for (int d = 0; d < girl_added; d++) { + debias_direction[c] -= W[c + girl_candidate_words[d] * (vector_size + 1)] / (girl_added + 1); + context_debias_direction[c] -= W[c + (girl_candidate_words[d] + vocab_size) * (vector_size + 1)] / (girl_added + 1); + } + debias_direction[c] -= W[c + SEED_WORD_2 * (vector_size + 1)] / (girl_added + 1); + context_debias_direction[c] -= W[c + (SEED_WORD_2 + vocab_size) * (vector_size + 1)] / (girl_added + 1); + + } + + // other loss updates were here + def_cost[id] = 0; + ortho_cost[id] = 0; + context_ortho_cost[id] = 0; + proj_cost[id] = 0; + + real def_loss = 0, ortho_loss = 0, context_ortho_loss = 0, proj_loss = 0; + real s_dot_s = 0, w_dot_s = 0, s_dot_g = 0, g_dot_g = 0, w_dot_g = 0; + real proj_w_s[vector_size], proj_s_g[vector_size], proj_w_g[vector_size]; // projection of w onto s etc. + real proj_difference[vector_size], context_proj_difference[vector_size]; + real context_s_dot_s = 0, context_w_dot_s = 0, context_s_dot_g = 0, context_g_dot_g = 0, context_w_dot_g = 0; + real context_proj_w_s[vector_size], context_proj_s_g[vector_size], context_proj_w_g[vector_size]; // projection of w onto s etc. + real temp; + + // Store the products for computing gradients later + real debias_direction_products[vector_size*vector_size], context_debias_direction_products[vector_size*vector_size]; + for (b = 0; b < vector_size; b++) { + for (int j = 0; j < vector_size; j++) { + debias_direction_products[b * vector_size + j] = debias_direction[b] * debias_direction[j]; + context_debias_direction_products[b * vector_size + j] = context_debias_direction[b] * context_debias_direction[j]; + } + } + + + // Optimize additional losses + for (long long word_id = 0; word_id < vocab_size; word_id++) { + l1 = word_id * (vector_size + 1); + l2 = (word_id + vocab_size) * (vector_size + 1); // shift by vocab_size to get separate vectors for context words + + // content_len indicates the number of words in the definition + int content_len = word_to_def[word_id][0]; + int context_content_len = word_to_def[word_id][0]; + + // Begin computing other losses and update accordingly + real def_content_sum[vector_size], context_def_content_sum[vector_size]; + + // Initialize all values to zeros + for (b = 0; b < vector_size; b++) { + def_content_sum[b] = 0; + context_def_content_sum[b] = 0; + } + + // Getting s(w) for focal word if it has definitions + for (int l = 0; l < content_len; l++) { + long long content_id = word_to_def[word_id][l+1] * (vector_size + 1); + for (b = 0; b < vector_size; b++) { + def_content_sum[b] += W[b + content_id] / content_len; + } + } + + // Getting s(w) for context word if it has definitions + for (int l = 0; l < context_content_len; l++) { + long long context_content_id = (word_to_def[word_id][l+1] + vocab_size) * (vector_size + 1); + for (b = 0; b < vector_size; b++) { + context_def_content_sum[b] += W[b + context_content_id]/ context_content_len; + } + } + + def_loss = 0; ortho_loss = 0; context_ortho_loss = 0; proj_loss = 0; + s_dot_s = 0; w_dot_s = 0; s_dot_g = 0; g_dot_g = 0; w_dot_g = 0; + context_s_dot_s = 0; context_w_dot_s = 0; context_s_dot_g = 0; context_g_dot_g = 0; context_w_dot_g = 0; + + if (content_len > 0) { + for (b = 0; b < vector_size; b++) { + temp = (W[b + l1] - def_content_sum[b]) > 0 ? (W[b + l1] - def_content_sum[b]) : -(W[b + l1] - def_content_sum[b]); + if (!isnan(temp) && !isinf(temp)) { + def_loss += temp; + // Update for definition loss + if (use_def_loss) { + diff_def = (W[b + l1] - def_content_sum[b]) > 0 ? 1 : -1; + W[b + l1] -= diff_def * eta * lambda / sqrt(gradsq[b + l1]); + } + } else { + fprintf(stderr, "Caught nan or inf for definition loss update l1.\n"); + continue; + } + + s_dot_s += def_content_sum[b] * def_content_sum[b]; + w_dot_s += W[b + l1] * def_content_sum[b]; + s_dot_g += def_content_sum[b] * debias_direction[b]; + g_dot_g += debias_direction[b] * debias_direction[b]; + w_dot_g += W[b + l1] * debias_direction[b]; + } + + for (b = 0; b < vector_size; b++) { + proj_w_s[b] = W[b + l1] - w_dot_s / s_dot_s * def_content_sum[b]; + if (!isnan(proj_w_s[b]) && !isinf(proj_w_s[b])) { + ortho_loss += proj_w_s[b] * W[b + l1]; + } else { + fprintf(stderr, "Caught nan or inf for ortho loss l1.\n"); + continue; + } + + proj_w_g[b] = w_dot_g / g_dot_g * debias_direction[b]; + proj_s_g[b] = s_dot_g / g_dot_g * debias_direction[b]; + proj_difference[b] = (proj_w_g[b] - proj_s_g[b]) > 0 ? g_dot_g : -g_dot_g; + + } + + for (b = 0; b < vector_size; b++) { + temp = (proj_w_g[b] - proj_s_g[b]) > 0 ? (proj_w_g[b] - proj_s_g[b]) : -((proj_w_g[b] - proj_s_g[b])); + if (!isnan(temp) && !isinf(temp)) { + proj_loss += temp; + // Update for projection loss + if (use_proj_loss) { + diff_proj = 0; + for (int j = 0; j < vector_size; j++) { + diff_proj += debias_direction_products[b * vector_size + j] / proj_difference[j]; + } + + W[b + l1] -= diff_proj * eta * gamma_ / sqrt(gradsq[b + l1]); + if (isnan(W[b + l1]) || isinf(W[b + l1])) { + fprintf(stderr, "Projection loss update for l1 caught nan or inf\n"); + continue; + } + if (W[b + l1] > 1) W[b + l1] = 1; + if (W[b + l1] < -1) W[b + l1] = -1; + } + } else { + fprintf(stderr, "Caught nan or inf for projection loss update l1.\n"); + continue; + } + } + + // Update for ortho loss + if (use_ortho_loss && content_len > 0) { + for (b = 0; b < vector_size; b++) { + diff_ortho = 2 * ortho_loss * proj_w_s[b]; + if (!isnan(diff_ortho) && !isinf(diff_ortho)) { + W[b + l1] -= diff_ortho * eta * beta / sqrt(gradsq[b + l1]); + } else { + fprintf(stderr, "Caught nan or inf for ortho loss update l1.\n"); + continue; + } + } + } + def_cost[id] += def_loss; + ortho_cost[id] += pow(ortho_loss, 2); + proj_cost[id] += proj_loss; + } + else { + // No definiton, update for proj_loss + if (use_proj_loss) { + for (b = 0; b < vector_size; b++) { + g_dot_g += debias_direction[b] * debias_direction[b]; + w_dot_g += W[b + l1] * debias_direction[b]; + } + + for (b = 0; b < vector_size; b++) { + proj_w_g[b] = w_dot_g / g_dot_g * debias_direction[b]; + proj_w_g[b] = proj_w_g[b] > 0 ? g_dot_g : - g_dot_g; + } + + + for (b = 0; b < vector_size; b++) { + diff_proj = 0; + for (int j = 0; j < vector_size; j++) { + diff_proj += debias_direction_products[b * vector_size + j] / proj_w_g[j]; + } + W[b + l1] -= diff_proj * eta * gamma_ / sqrt(gradsq[b + l1]); + if (W[b + l1] > 1) W[b + l1] = 1; + if (W[b + l1] < -1) W[b + l1] = -1; + } + } + + def_cost[id] += def_loss; + ortho_cost[id] += pow(ortho_loss, 2); + proj_cost[id] += proj_loss; + } + + // Now for context word vectors + if (context_content_len > 0) { + for (b = 0; b < vector_size; b++) { + temp = (W[b + l2] - context_def_content_sum[b]) > 0 ? (W[b + l2] - context_def_content_sum[b]) : -(W[b + l2] - context_def_content_sum[b]); + if (!isnan(temp) && !isinf(temp)) { + def_loss += temp; + // Update for definition loss + if (use_def_loss) { + diff_def = (W[b + l2] - context_def_content_sum[b]) > 0 ? 1 : -1; + W[b + l2] -= diff_def * eta * lambda / sqrt(gradsq[b + l2]); + } + } else { + fprintf(stderr, "Caught nan or inf for definition loss update l2.\n"); + continue; + } + + context_s_dot_s += context_def_content_sum[b] * context_def_content_sum[b]; + context_w_dot_s += W[b + l2] * context_def_content_sum[b]; + context_s_dot_g += context_def_content_sum[b] * context_debias_direction[b]; + context_g_dot_g += context_debias_direction[b] * context_debias_direction[b]; + context_w_dot_g += W[b + l2] * context_debias_direction[b]; + } + + for (b = 0; b < vector_size; b++) { + context_proj_w_s[b] = W[b + l2] - context_w_dot_s / context_s_dot_s * context_def_content_sum[b]; + if (!isnan(context_proj_w_s[b]) && !isinf(context_proj_w_s[b])) { + context_ortho_loss += context_proj_w_s[b] * W[b + l2]; // for computing gradients later + } else { + fprintf(stderr, "Caught nan or inf for ortho loss l2.\n"); + continue; + } + + // fprintf(stderr, "debugging: context_proj_w_s at %d is %lf\n", b, context_proj_w_s[b]); + context_proj_w_g[b] = context_w_dot_g / context_g_dot_g * context_debias_direction[b]; + context_proj_s_g[b] = context_s_dot_g / context_g_dot_g * context_debias_direction[b]; + context_proj_difference[b] = (context_proj_w_g[b] - context_proj_s_g[b]) > 0 ? context_g_dot_g : -context_g_dot_g; + + } + + for (b = 0; b < vector_size; b++) { + temp = (context_proj_w_g[b] - context_proj_s_g[b]) > 0 ? (context_proj_w_g[b] - context_proj_s_g[b]) : -((context_proj_w_g[b] - context_proj_s_g[b])); + if (!isnan(temp) && !isinf(temp)) { + proj_loss += temp; + // Update for projection loss + if (use_proj_loss) { + diff_proj = 0; + for (int j = 0; j < vector_size; j++) { + diff_proj += context_debias_direction_products[b * vector_size + j] / context_proj_difference[j]; + } + + W[b + l2] -= diff_proj * eta * gamma_ / sqrt(gradsq[b + l2]); + if (W[b + l2] > 1) W[b + l2] = 1; + if (W[b + l2] < -1) W[b + l2] = -1; + } + } else { + fprintf(stderr, "Caught nan or inf for projection loss update l2.\n"); + continue; + } + } + + // Update for ortho loss + if (use_ortho_loss && context_content_len > 0) { + for (b = 0; b < vector_size; b++) { + diff_ortho = 2 * context_ortho_loss * context_proj_w_s[b]; + if (!isnan(diff_ortho) && !isinf(diff_ortho)) { + W[b + l2] -= diff_ortho * eta * beta / sqrt(gradsq[b + l2]); + } else { + fprintf(stderr, "Caught nan or inf for ortho loss update l2.\n"); + continue; + } + } + } + + def_cost[id] += def_loss; + context_ortho_cost[id] += pow(context_ortho_loss, 2); + proj_cost[id] += proj_loss; + } + else { + // No definiton, update for proj_loss + if (use_proj_loss) { + for (b = 0; b < vector_size; b++) { + context_g_dot_g += debias_direction[b] * debias_direction[b]; + context_w_dot_g += W[b + l1] * debias_direction[b]; + } + + for (b = 0; b < vector_size; b++) { + context_proj_w_g[b] = w_dot_g / g_dot_g * debias_direction[b]; + context_proj_w_g[b] = proj_w_g[b] > 0 ? g_dot_g : - g_dot_g; + } + + for (b = 0; b < vector_size; b++) { + diff_proj = 0; + for (int j = 0; j < vector_size; j++) { + diff_proj += context_debias_direction_products[b * vector_size + j] / context_proj_w_g[j]; + } + + W[b + l1] -= diff_proj * eta * gamma_ / sqrt(gradsq[b + l1]); + if (W[b + l1] > 1) W[b + l1] = 1; + if (W[b + l1] < -1) W[b + l1] = -1; + } + } + def_cost[id] += def_loss; + ortho_cost[id] += pow(context_ortho_loss, 2); + proj_cost[id] += proj_loss; + } + } + + free(W_updates1); + free(W_updates2); + + fclose(fin); + pthread_exit(NULL); +} + +/* Save params to file */ +int save_params(int nb_iter) { + /* + * nb_iter is the number of iteration (= a full pass through the cooccurrence matrix). + * nb_iter > 0 => checkpointing the intermediate parameters, so nb_iter is in the filename of output file. + * nb_iter == 0 => checkpointing the initial parameters + * else => saving the final paramters, so nb_iter is ignored. + */ + + long long a, b; + char format[20]; + char output_file[MAX_STRING_LENGTH+20], output_file_gsq[MAX_STRING_LENGTH+20]; + char *word = malloc(sizeof(char) * MAX_STRING_LENGTH + 1); + if (NULL == word) { + return 1; + } + FILE *fid, *fout; + FILE *fgs = NULL; + + if (use_binary > 0 || nb_iter == 0) { + // Save parameters in binary file + // note: always save initial parameters in binary, as the reading code expects binary + if (nb_iter < 0) + sprintf(output_file,"%s.bin",save_W_file); + else + sprintf(output_file,"%s.%03d.bin",save_W_file,nb_iter); + + fout = fopen(output_file,"wb"); + if (fout == NULL) {log_file_loading_error("weights file", save_W_file); free(word); return 1;} + for (a = 0; a < 2 * vocab_size * (vector_size + 1); a++) fwrite(&W[a], sizeof(real), 1,fout); + fclose(fout); + if (save_gradsq > 0) { + if (nb_iter < 0) + sprintf(output_file_gsq,"%s.bin",save_gradsq_file); + else + sprintf(output_file_gsq,"%s.%03d.bin",save_gradsq_file,nb_iter); + + fgs = fopen(output_file_gsq,"wb"); + if (fgs == NULL) {log_file_loading_error("gradsq file", save_gradsq_file); free(word); return 1;} + for (a = 0; a < 2 * vocab_size * (vector_size + 1); a++) fwrite(&gradsq[a], sizeof(real), 1,fgs); + fclose(fgs); + } + } + if (use_binary != 1) { // Save parameters in text file + if (nb_iter < 0) + sprintf(output_file,"%s.txt",save_W_file); + else + sprintf(output_file,"%s.%03d.txt",save_W_file,nb_iter); + if (save_gradsq > 0) { + if (nb_iter < 0) + sprintf(output_file_gsq,"%s.txt",save_gradsq_file); + else + sprintf(output_file_gsq,"%s.%03d.txt",save_gradsq_file,nb_iter); + + fgs = fopen(output_file_gsq,"wb"); + if (fgs == NULL) {log_file_loading_error("gradsq file", save_gradsq_file); free(word); return 1;} + } + fout = fopen(output_file,"wb"); + if (fout == NULL) {log_file_loading_error("weights file", save_W_file); free(word); return 1;} + fid = fopen(vocab_file, "r"); + sprintf(format,"%%%ds",MAX_STRING_LENGTH); + if (fid == NULL) {log_file_loading_error("vocab file", vocab_file); free(word); fclose(fout); return 1;} + if (write_header) fprintf(fout, "%lld %d\n", vocab_size, vector_size); + for (a = 0; a < vocab_size; a++) { + if (fscanf(fid,format,word) == 0) {free(word); fclose(fid); fclose(fout); return 1;} + // input vocab cannot contain special <unk> keyword + if (strcmp(word, "<unk>") == 0) {free(word); fclose(fid); fclose(fout); return 1;} + fprintf(fout, "%s",word); + if (model == 0) { // Save all parameters (including bias) + for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", W[a * (vector_size + 1) + b]); + for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", W[(vocab_size + a) * (vector_size + 1) + b]); + } + if (model == 1) // Save only "word" vectors (without bias) + for (b = 0; b < vector_size; b++) fprintf(fout," %lf", W[a * (vector_size + 1) + b]); + if (model == 2) // Save "word + context word" vectors (without bias) + for (b = 0; b < vector_size; b++) fprintf(fout," %lf", W[a * (vector_size + 1) + b] + W[(vocab_size + a) * (vector_size + 1) + b]); + fprintf(fout,"\n"); + if (save_gradsq > 0) { // Save gradsq + fprintf(fgs, "%s",word); + for (b = 0; b < (vector_size + 1); b++) fprintf(fgs," %lf", gradsq[a * (vector_size + 1) + b]); + for (b = 0; b < (vector_size + 1); b++) fprintf(fgs," %lf", gradsq[(vocab_size + a) * (vector_size + 1) + b]); + fprintf(fgs,"\n"); + } + if (fscanf(fid,format,word) == 0) { + // Eat irrelevant frequency entry + fclose(fout); + fclose(fid); + free(word); + return 1; + } + } + + if (use_unk_vec) { + real* unk_vec = (real*)calloc((vector_size + 1), sizeof(real)); + real* unk_context = (real*)calloc((vector_size + 1), sizeof(real)); + strcpy(word, "<unk>"); + + long long num_rare_words = vocab_size < 100 ? vocab_size : 100; + + for (a = vocab_size - num_rare_words; a < vocab_size; a++) { + for (b = 0; b < (vector_size + 1); b++) { + unk_vec[b] += W[a * (vector_size + 1) + b] / num_rare_words; + unk_context[b] += W[(vocab_size + a) * (vector_size + 1) + b] / num_rare_words; + } + } + + fprintf(fout, "%s",word); + if (model == 0) { // Save all parameters (including bias) + for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", unk_vec[b]); + for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", unk_context[b]); + } + if (model == 1) // Save only "word" vectors (without bias) + for (b = 0; b < vector_size; b++) fprintf(fout," %lf", unk_vec[b]); + if (model == 2) // Save "word + context word" vectors (without bias) + for (b = 0; b < vector_size; b++) fprintf(fout," %lf", unk_vec[b] + unk_context[b]); + fprintf(fout,"\n"); + + free(unk_vec); + free(unk_context); + } + + fclose(fid); + fclose(fout); + if (save_gradsq > 0) fclose(fgs); + } + free(word); + return 0; +} + +/* Train model */ +int train_glove() { + long long a, file_size; + int save_params_return_code; + int b; + FILE *fin, *fdef; + real total_cost = 0, total_def_cost = 0, total_ortho_cost = 0, total_proj_cost = 0, total_context_ortho_cost = 0; + + fprintf(stderr, "TRAINING MODEL\n"); + + fin = fopen(input_file, "rb"); + if (fin == NULL) {log_file_loading_error("cooccurrence file", input_file); return 1;} + fseeko(fin, 0, SEEK_END); + file_size = ftello(fin); + num_lines = file_size/(sizeof(CREC)); // Assuming the file isn't corrupt and consists only of CREC's + fclose(fin); + fprintf(stderr,"Read %lld lines.\n", num_lines); + + fdef = fopen(definition_file, "rb"); + fprintf(stderr, "Opened definition file\n"); + if (fdef == NULL) {log_file_loading_error("definition file", definition_file); return 1;} + + fprintf(stderr,"Building definition lists...\n"); + fread(&def_word_num, sizeof(int), 1, fdef); + word_to_def = (int **)malloc(vocab_size * sizeof(int *)); + int curr_id, curr_size; + int def_word_ids[def_word_num]; //for free memory later + + for (a = 0; a < def_word_num; a++) { + if (feof(fdef)) { + fprintf(stderr, "EOF reached before data fully loaded in %s.\n", definition_file); + fclose(fdef); + return -1; + } + fread(&curr_id, sizeof(int), 1, fdef); + fread(&curr_size, sizeof(int), 1, fdef); + word_to_def[curr_id] =(int *)malloc(sizeof(int) * (curr_size+1)); + def_word_ids[a] = curr_id; + word_to_def[curr_id][0] = curr_size; + for (int i = 0; i < curr_size; i++){ + fread(&(word_to_def[curr_id][i+1]), sizeof(int), 1, fdef); + } + } + fclose(fdef); + + if (verbose > 1) fprintf(stderr,"Initializing parameters..."); + initialize_parameters(); + if (verbose > 1) fprintf(stderr,"done.\n"); + if (save_init_param) { + if (verbose > 1) fprintf(stderr,"Saving initial parameters... "); + save_params_return_code = save_params(0); + if (save_params_return_code != 0) + return save_params_return_code; + if (verbose > 1) fprintf(stderr,"done.\n"); + } + if (verbose > 0) fprintf(stderr,"vector size: %d\n", vector_size); + if (verbose > 0) fprintf(stderr,"vocab size: %lld\n", vocab_size); + if (verbose > 0) fprintf(stderr,"x_max: %lf\n", x_max); + if (verbose > 0) fprintf(stderr,"alpha: %lf\n", alpha); + if (verbose > 0) fprintf(stderr,"use_def_loss: %d\n", use_def_loss); + if (verbose > 0) fprintf(stderr,"use_ortho_loss: %d\n", use_ortho_loss); + if (verbose > 0) fprintf(stderr,"use_proj_loss: %d\n", use_proj_loss); + if (verbose > 0) fprintf(stderr,"lambda: %0.9lf\n", lambda); + if (verbose > 0) fprintf(stderr,"beta: %0.9lf\n", beta); + if (verbose > 0) fprintf(stderr,"gamma: %0.9lf\n", gamma_); + if (verbose > 0) fprintf(stderr,"MALE index: %d\n", SEED_WORD_1); + if (verbose > 0) fprintf(stderr,"FEMALE index: %d\n", SEED_WORD_2); + + bias_word_value = (real *)malloc(sizeof(real) * (vocab_size)); + bias_def_value = (real *)malloc(sizeof(real) * (vocab_size)); + memset(bias_word_value, 0, sizeof(real) * (vocab_size)); + memset(bias_def_value, 0, sizeof(real) * (vocab_size)); + + context_bias_word_value = (real *)malloc(sizeof(real) * (vocab_size)); + context_bias_def_value = (real *)malloc(sizeof(real) * (vocab_size)); + memset(context_bias_word_value, 0, sizeof(real) * (vocab_size)); + memset(context_bias_def_value, 0, sizeof(real) * (vocab_size)); + + bias_word_value_temp = (real *)malloc(sizeof(real) * (cap)); + bias_def_value_temp = (real *)malloc(sizeof(real) * (cap)); + memset(bias_word_value_temp, 0, sizeof(real) * (cap)); + memset(bias_def_value_temp, 0, sizeof(real) * (cap)); + + context_bias_word_value_temp = (real *)malloc(sizeof(real) * (cap)); + context_bias_def_value_temp = (real *)malloc(sizeof(real) * (cap)); + memset(context_bias_word_value_temp, 0, sizeof(real) * (cap)); + memset(context_bias_def_value_temp, 0, sizeof(real) * (cap)); + + boy_candidate_words = (long long *)malloc(sizeof(long long) * (seed_1_top_num)); + girl_candidate_words = (long long *)malloc(sizeof(long long) * (seed_2_top_num)); + + pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t)); + lines_per_thread = (long long *) malloc(num_threads * sizeof(long long)); + + time_t rawtime; + struct tm *info; + char time_buffer[80]; + // Lock-free asynchronous SGD + for (b = 0; b < num_iter; b++) { // b counts the (40) iterations + // Get s(w) for boy and girl + int boy_def_len = word_to_def[SEED_WORD_1][0]; // original DD-GloVe: HARD-CODED to use dominant gloss only, using " ... = 11" + int girl_def_len = word_to_def[SEED_WORD_2][0]; // if not capped: word_to_def[SEED_WORD_2][0] + real boy_def_sum[vector_size], girl_def_sum[vector_size]; + long long content_id; + // Initialize all values to zeros + for (int c = 0; c < vector_size; c++) { + boy_def_sum[c] = 0; + girl_def_sum[c] = 0; + } + for (int l = 0; l < boy_def_len; l++) { + content_id = word_to_def[SEED_WORD_1][l+1] * (vector_size + 1); + for (int c = 0; c < vector_size; c++) { + boy_def_sum[c] += W[c + content_id] / boy_def_len; + } + } + for (int l = 0; l < girl_def_len; l++) { + content_id = word_to_def[SEED_WORD_2][l+1] * (vector_size + 1); + for (int c = 0; c < vector_size; c++) { + girl_def_sum[c] += W[c + content_id] / girl_def_len; + } + } + // Get s(w) for context boy and girl + real context_boy_def_sum[vector_size], context_girl_def_sum[vector_size]; + // Initialize all values to zeros + for (int c = 0; c < vector_size; c++) { + context_boy_def_sum[c] = 0; + context_girl_def_sum[c] = 0; + } + for (int l = 0; l < boy_def_len; l++) { + content_id = (word_to_def[SEED_WORD_1][l+1] + vocab_size) * (vector_size + 1); + for (int c = 0; c < vector_size; c++) { + context_boy_def_sum[c] += W[c + content_id] / boy_def_len; + } + } + for (int l = 0; l < girl_def_len; l++) { + content_id = (word_to_def[SEED_WORD_2][l+1] + vocab_size) * (vector_size + 1); + for (int c = 0; c < vector_size; c++) { + context_girl_def_sum[c] += W[c + content_id] / girl_def_len; + } + } + + + // Get norm of boy, girl and their definition sum + real boy_norm = 0, girl_norm = 0, boy_def_norm = 0, girl_def_norm = 0; + real context_boy_norm = 0, context_girl_norm = 0, context_boy_def_norm = 0, context_girl_def_norm = 0; + for (int c = 0; c < vector_size; c++) { + boy_norm += pow(W[c + SEED_WORD_1 * (vector_size + 1)], 2); + girl_norm += pow(W[c + SEED_WORD_2 * (vector_size + 1)], 2); + boy_def_norm += pow(boy_def_sum[c], 2); + girl_def_norm += pow(girl_def_sum[c], 2); + context_boy_norm += pow(W[c + (SEED_WORD_1 + vocab_size) * (vector_size + 1)], 2); + context_girl_norm += pow(W[c + (SEED_WORD_2 + vocab_size) * (vector_size + 1)], 2); + context_boy_def_norm += pow(context_boy_def_sum[c], 2); + context_girl_def_norm += pow(context_girl_def_sum[c], 2); + } + boy_norm = sqrt(boy_norm); + girl_norm = sqrt(girl_norm); + boy_def_norm = sqrt(boy_def_norm); + girl_def_norm = sqrt(girl_def_norm); + context_boy_norm = sqrt(context_boy_norm); + context_girl_norm = sqrt(context_girl_norm); + context_boy_def_norm = sqrt(context_boy_def_norm); + context_girl_def_norm = sqrt(context_girl_def_norm); + + // Iterate through all words to get bias word value, bias def value, and same for context words + for (long long word_id = 0; word_id < vocab_size; word_id++){ + real word_bias = 0, context_word_bias = 0, def_bias = 0, context_def_bias = 0; + + // Find word bias + real word_dot_boy = 0, word_dot_girl = 0, word_norm = 0; + for (int c = 0; c < vector_size; c++) { + real curr = W[c + word_id * (vector_size + 1)]; + word_dot_boy += curr * W[c + SEED_WORD_1 * (vector_size + 1)]; + word_dot_girl += curr * W[c + SEED_WORD_2 * (vector_size + 1)]; + word_norm += pow(curr, 2); + } + word_norm = sqrt(word_norm); + word_bias = word_dot_boy / (word_norm * boy_norm) - word_dot_girl / (word_norm * girl_norm); + bias_word_value[word_id] = word_bias; + if (word_id < cap && ((b < 5) || (((b + 1) % 10) == 0))) bias_word_value_temp[word_id] = word_bias; //ADJUST HERE + + real context_word_dot_boy = 0, context_word_dot_girl = 0, context_word_norm = 0; + for (int c = 0; c < vector_size; c++) { + real curr = W[c + (word_id + vocab_size) * (vector_size + 1)]; + context_word_dot_boy += curr * W[c + (SEED_WORD_1 + vocab_size) * (vector_size + 1)]; + context_word_dot_girl += curr * W[c + (SEED_WORD_2 + vocab_size) * (vector_size + 1)]; + context_word_norm += pow(curr, 2); + } + context_word_norm = sqrt(context_word_norm); + context_word_bias = context_word_dot_boy / (context_word_norm * context_boy_norm) - + context_word_dot_girl / (context_word_norm * context_girl_norm); + context_bias_word_value[word_id] = context_word_bias; + if (word_id < cap && ((b < 5) || (((b + 1) % 10) == 0))) context_bias_word_value_temp[word_id] = context_word_bias; //ADJUST HERE + + // Find definition bias + int content_len = word_to_def[word_id][0]; + real def_content_sum[vector_size], context_def_content_sum[vector_size]; + for (int c = 0; c < vector_size; c++) { + def_content_sum[c] = 0; + context_def_content_sum[c] = 0; + } + // s(w) for word + for (int l = 0; l < content_len; l++) { + long long content_id = word_to_def[word_id][l+1] * (vector_size + 1); + for (int c = 0; c < vector_size; c++) { + def_content_sum[c] += W[c + content_id]/ content_len; + } + } + // s(w) for context word + for (int l = 0; l < content_len; l++) { + long long content_id = (word_to_def[word_id][l+1] + vocab_size) * (vector_size + 1); + for (int c = 0; c < vector_size; c++) { + context_def_content_sum[c] += W[c + content_id] / content_len; + } + } + + // If no definition, 0 bias + if (content_len == 0) { + bias_def_value[word_id] = 0; + context_bias_def_value[word_id] = 0; + if (word_id < cap && ((b < 5) || (((b + 1) % 10) == 0))) bias_def_value_temp[word_id] = 0; // ADJUST HERE + if (word_id < cap && ((b < 5) || (((b + 1) % 10) == 0))) context_bias_def_value_temp[word_id] = 0; // ADJUST HERE + } + else { + real def_dot_boy = 0, def_dot_girl = 0, def_norm = 0; + for (int c = 0; c < vector_size; c++) { + def_dot_boy += def_content_sum[c] * boy_def_sum[c]; + def_dot_girl += def_content_sum[c] * girl_def_sum[c]; + def_norm += pow(def_content_sum[c], 2); + } + def_norm = sqrt(def_norm); + def_bias = def_dot_boy / (def_norm * boy_def_norm) - def_dot_girl / (def_norm * girl_def_norm); + bias_def_value[word_id] = def_bias; + if (word_id < cap && ((b < 5) || (((b + 1) % 10) == 0))) bias_def_value_temp[word_id] = def_bias; // ADJUST HERE + + real context_def_dot_boy = 0, context_def_dot_girl = 0, context_def_norm = 0; + for (int c = 0; c < vector_size; c++) { + context_def_dot_boy += context_def_content_sum[c] * context_boy_def_sum[c]; + context_def_dot_girl += context_def_content_sum[c] * context_girl_def_sum[c]; + context_def_norm += pow(context_def_content_sum[c], 2); + } + context_def_norm = sqrt(context_def_norm); + context_def_bias = context_def_dot_boy / (context_def_norm * context_boy_def_norm) - + context_def_dot_girl / (context_def_norm * context_girl_def_norm); + context_bias_def_value[word_id] = context_def_bias; + if (word_id < cap && ((b < 5) || (((b + 1) % 10) == 0))) context_bias_def_value_temp[word_id] = context_def_bias; // ADJUST HERE + } + + + } // End of iterating all vocab to find biases + + // Find most attribute-specific words + if ((b < 5) || (((b + 1) % 10) == 0)) { // ADJUST HERE how often seed words are updated. race: (b < 5) || (((b + 1) % 10) == 0). gender: b == 0 + // copies (temp) are used because original has to remain in fixed order for access via index + qsort(bias_word_value_temp, cap, sizeof(real), compare); // bias_word_value_temp is only cap long! + qsort(context_bias_word_value_temp, cap, sizeof(real), compare); // sorting in ascending order + qsort(bias_def_value_temp, cap, sizeof(real), compare); + qsort(context_bias_def_value_temp, cap, sizeof(real), compare); + + boy_added = 0, girl_added = 0; + for (long long word_id = 0; word_id < cap; word_id++) { // for all except one words in vocab since cap = 399999 + if (bias_def_value[word_id] < bias_def_value_temp[seed_2_top_num]){ // def bias of looked at word < nh smallest def bias overall (girl biases should have large negative numbers). should be the case for max. n words. + girl_candidate_words[girl_added] = word_id; + girl_added++; + + /* Debugging print statements + fprintf(stderr, "girl word added\n"); + fprintf(stderr, "bias_def_value_temp[seed_2_top_num]: %f\n", bias_def_value_temp[seed_2_top_num]); + fprintf(stderr, "bias_def_value[word_id]: %f\n", bias_def_value[word_id]);*/ + } + else if (bias_def_value[word_id] > bias_def_value_temp[cap - seed_1_top_num - 1]) { + boy_candidate_words[boy_added] = word_id; + boy_added++; + + /* Debugging print statements + fprintf(stderr, "boy word added\n"); + fprintf(stderr, "bias_def_value_temp[cap - seed_1_top_num - 1]: %f\n", bias_def_value_temp[cap - seed_1_top_num - 1]); + fprintf(stderr, "bias_def_value[word_id]: %f\n", bias_def_value[word_id]);*/ + } + + } + } + + total_cost = 0; + total_def_cost = 0; + total_ortho_cost = 0; + total_context_ortho_cost = 0; + total_proj_cost = 0; + for (a = 0; a < num_threads - 1; a++) lines_per_thread[a] = num_lines / num_threads; + lines_per_thread[a] = num_lines / num_threads + num_lines % num_threads; + long long *thread_ids = (long long*)malloc(sizeof(long long) * num_threads); + for (a = 0; a < num_threads; a++) thread_ids[a] = a; + for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, glove_thread, (void *)&thread_ids[a]); + for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL); + for (a = 0; a < num_threads; a++) { + total_cost += cost[a]; + total_def_cost += def_cost[a]; + total_ortho_cost += ortho_cost[a]; + total_context_ortho_cost += context_ortho_cost[a]; + total_proj_cost += proj_cost[a]; + } + free(thread_ids); + + time(&rawtime); + info = localtime(&rawtime); + strftime(time_buffer,80,"%x - %I:%M.%S%p", info); + fprintf(stderr, "%s, iter: %03d, glove_cost: %lf, def_cost: %lf, ortho_cost: %lf, context_ortho_cost: %lf, proj_cost: %lf\n\n\n", time_buffer, + b+1, total_cost/num_lines, total_def_cost/(vocab_size * num_threads), total_ortho_cost/(vocab_size * num_threads), + total_context_ortho_cost/(vocab_size * num_threads), total_proj_cost/(vocab_size * num_threads)); + + + if (checkpoint_every > 0 && (b + 1) % checkpoint_every == 0) { + fprintf(stderr," saving intermediate parameters for iter %03d...", b+1); + save_params_return_code = save_params(b+1); + if (save_params_return_code != 0) { + free(pt); + free(lines_per_thread); + return save_params_return_code; + } + fprintf(stderr,"done.\n"); + } + } + free(pt); + free(lines_per_thread); + + for (a = 0; a < def_word_num; a++) { + free(word_to_def[def_word_ids[a]]); + } + free(word_to_def); + free(bias_word_value); + free(bias_def_value); + free(context_bias_word_value); + free(context_bias_def_value); + free(boy_candidate_words); + free(girl_candidate_words); + + return save_params(-1); +} + +int main(int argc, char **argv) { + int i; + FILE *fid; + int result = 0; + + if (argc == 1) { + printf("GloVe: Global Vectors for Word Representation, v0.2\n"); + printf("Author: Jeffrey Pennington (jpennin@stanford.edu)\n\n"); + printf("Usage options:\n"); + printf("\t-verbose <int>\n"); + printf("\t\tSet verbosity: 0, 1, or 2 (default)\n"); + printf("\t-write-header <int>\n"); + printf("\t\tIf 1, write vocab_size/vector_size as first line. Do nothing if 0 (default).\n"); + printf("\t-vector-size <int>\n"); + printf("\t\tDimension of word vector representations (excluding bias term); default 50\n"); + printf("\t-threads <int>\n"); + printf("\t\tNumber of threads; default 8\n"); + printf("\t-iter <int>\n"); + printf("\t\tNumber of training iterations; default 25\n"); + printf("\t-eta <float>\n"); + printf("\t\tInitial learning rate; default 0.05\n"); + printf("\t-alpha <float>\n"); + printf("\t\tParameter in exponent of weighting function; default 0.75\n"); + printf("\t-x-max <float>\n"); + printf("\t\tParameter specifying cutoff in weighting function; default 100.0\n"); + printf("\t-grad-clip\n"); + printf("\t\tGradient components clipping parameter. Values will be clipped to [-grad-clip, grad-clip] interval\n"); + printf("\t-binary <int>\n"); + printf("\t\tSave output in binary format (0: text, 1: binary, 2: both); default 0\n"); + printf("\t-model <int>\n"); + printf("\t\tModel for word vector output (for text output only); default 2\n"); + printf("\t\t 0: output all data, for both word and context word vectors, including bias terms\n"); + printf("\t\t 1: output word vectors, excluding bias terms\n"); + printf("\t\t 2: output word vectors + context word vectors, excluding bias terms\n"); + printf("\t-input-file <file>\n"); + printf("\t\tBinary input file of shuffled cooccurrence data (produced by 'cooccur' and 'shuffle'); default cooccurrence.shuf.bin\n"); + printf("\t-vocab-file <file>\n"); + printf("\t\tFile containing vocabulary (truncated unigram counts, produced by 'vocab_count'); default vocab.txt\n"); + printf("\t-save-file <file>\n"); + printf("\t\tFilename, excluding extension, for word vector output; default vectors\n"); + printf("\t-gradsq-file <file>\n"); + printf("\t\tFilename, excluding extension, for squared gradient output; default gradsq\n"); + printf("\t-save-gradsq <int>\n"); + printf("\t\tSave accumulated squared gradients; default 0 (off); ignored if gradsq-file is specified\n"); + printf("\t-checkpoint-every <int>\n"); + printf("\t\tCheckpoint a model every <int> iterations; default 0 (off)\n"); + printf("\t-load-init-param <int>\n"); + printf("\t\tLoad initial parameters from -init-param-file; default 0 (false)\n"); + printf("\t-save-init-param <int>\n"); + printf("\t\tSave initial parameters (i.e., checkpoint the model before any training); default 0 (false)\n"); + printf("\t-init-param-file <file>\n"); + printf("\t\tBinary initial parameters file to be loaded if -load-init-params is 1; (default is to look for vectors.000.bin)\n"); + printf("\t-load-init-gradsq <int>\n"); + printf("\t\tLoad initial squared gradients from -init-gradsq-file; default 0 (false)\n"); + printf("\t-init-gradsq-file <file>\n"); + printf("\t\tBinary initial squared gradients file to be loaded if -load-init-gradsq is 1; (default is to look for gradsq.000.bin)\n"); + printf("\t-seed <int>\n"); + printf("\t\tRandom seed to use. If not set, will be randomized using current time."); + printf("\nExample usage:\n"); + printf("./glove -input-file cooccurrence.shuf.bin -vocab-file vocab.txt -save-file vectors -gradsq-file gradsq -verbose 2 -vector-size 100 -threads 16 -alpha 0.75 -x-max 100.0 -eta 0.05 -binary 2 -model 2\n\n"); + result = 0; + } else { + if ((i = find_arg((char *)"-write-header", argc, argv)) > 0) write_header = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-verbose", argc, argv)) > 0) verbose = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-vector-size", argc, argv)) > 0) vector_size = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-iter", argc, argv)) > 0) num_iter = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-threads", argc, argv)) > 0) num_threads = atoi(argv[i + 1]); + cost = malloc(sizeof(real) * num_threads); + def_cost = malloc(sizeof(real) * num_threads); + ortho_cost = malloc(sizeof(real) * num_threads); + context_ortho_cost = malloc(sizeof(real) * num_threads); + proj_cost = malloc(sizeof(real) * num_threads); + if ((i = find_arg((char *)"-alpha", argc, argv)) > 0) alpha = atof(argv[i + 1]); + if ((i = find_arg((char *)"-x-max", argc, argv)) > 0) x_max = atof(argv[i + 1]); + if ((i = find_arg((char *)"-eta", argc, argv)) > 0) eta = atof(argv[i + 1]); + if ((i = find_arg((char *)"-grad-clip", argc, argv)) > 0) grad_clip_value = atof(argv[i + 1]); + if ((i = find_arg((char *)"-binary", argc, argv)) > 0) use_binary = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-model", argc, argv)) > 0) model = atoi(argv[i + 1]); + if (model != 0 && model != 1) model = 2; + if ((i = find_arg((char *)"-save-gradsq", argc, argv)) > 0) save_gradsq = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-vocab-file", argc, argv)) > 0) strcpy(vocab_file, argv[i + 1]); + else strcpy(vocab_file, (char *)"vocab.txt"); + if ((i = find_arg((char *)"-save-file", argc, argv)) > 0) strcpy(save_W_file, argv[i + 1]); + else strcpy(save_W_file, (char *)"vectors"); + if ((i = find_arg((char *)"-gradsq-file", argc, argv)) > 0) { + strcpy(save_gradsq_file, argv[i + 1]); + save_gradsq = 1; + } + else if (save_gradsq > 0) strcpy(save_gradsq_file, (char *)"gradsq"); + if ((i = find_arg((char *)"-input-file", argc, argv)) > 0) strcpy(input_file, argv[i + 1]); + else strcpy(input_file, (char *)"cooccurrence.shuf.bin"); + if ((i = find_arg((char *)"-checkpoint-every", argc, argv)) > 0) checkpoint_every = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-init-param-file", argc, argv)) > 0) strcpy(init_param_file, argv[i + 1]); + else strcpy(init_param_file, (char *)"vectors.000.bin"); + if ((i = find_arg((char *)"-load-init-param", argc, argv)) > 0) load_init_param = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-save-init-param", argc, argv)) > 0) save_init_param = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-init-gradsq-file", argc, argv)) > 0) strcpy(init_gradsq_file, argv[i + 1]); + else strcpy(init_gradsq_file, (char *)"gradsq.000.bin"); + if ((i = find_arg((char *)"-load-init-gradsq", argc, argv)) > 0) load_init_gradsq = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-seed", argc, argv)) > 0) seed = atoi(argv[i + 1]); + + strcpy(definition_file, "/home/students/reichelt/ba/bias-mitigation-ba/data/embeddings/glove/dd-glove/definitions.dat"); // change file name here + + if ((i = find_arg((char *)"-use-def-loss", argc, argv)) > 0) use_def_loss = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-use-ortho-loss", argc, argv)) > 0) use_ortho_loss = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-use-proj-loss", argc, argv)) > 0) use_proj_loss = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-lambda", argc, argv)) > 0) lambda = atof(argv[i + 1]); + if ((i = find_arg((char *)"-beta", argc, argv)) > 0) beta = atof(argv[i + 1]); + if ((i = find_arg((char *)"-gamma", argc, argv)) > 0) gamma_ = atof(argv[i + 1]); + + vocab_size = 0; + fid = fopen(vocab_file, "r"); + if (fid == NULL) {log_file_loading_error("vocab file", vocab_file); free(cost); return 1;} + while ((i = getc(fid)) != EOF) if (i == '\n') vocab_size++; // Count number of entries in vocab_file + fclose(fid); + if (vocab_size == 0) {fprintf(stderr, "Unable to find any vocab entries in vocab file %s.\n", vocab_file); free(cost); return 1;} + result = train_glove(); + free(cost); + free(def_cost); + free(ortho_cost); + free(context_ortho_cost); + free(proj_cost); + } + free(W); + free(gradsq); + + return result; +} diff --git a/DD-GloVe/src/shuffle.c b/DD-GloVe/src/shuffle.c new file mode 100755 index 0000000000000000000000000000000000000000..e2dee816eed59b2dd7dbc2c8131197fe16b4b53b --- /dev/null +++ b/DD-GloVe/src/shuffle.c @@ -0,0 +1,210 @@ +// Tool to shuffle entries of word-word cooccurrence files +// +// Copyright (c) 2014 The Board of Trustees of +// The Leland Stanford Junior University. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +// For more information, bug reports, fixes, contact: +// Jeffrey Pennington (jpennin@stanford.edu) +// GlobalVectors@googlegroups.com +// http://nlp.stanford.edu/projects/glove/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "common.h" + + +static const long LRAND_MAX = ((long) RAND_MAX + 2) * (long)RAND_MAX; + +int verbose = 2; // 0, 1, or 2 +int seed = 0; +long long array_size = 2000000; // size of chunks to shuffle individually +char *file_head; // temporary file string +real memory_limit = 2.0; // soft limit, in gigabytes + +/* Generate uniformly distributed random long ints */ +static long rand_long(long n) { + long limit = LRAND_MAX - LRAND_MAX % n; + long rnd; + do { + rnd = ((long)RAND_MAX + 1) * (long)rand() + (long)rand(); + } while (rnd >= limit); + return rnd % n; +} + +/* Write contents of array to binary file */ +int write_chunk(CREC *array, long size, FILE *fout) { + long i = 0; + for (i = 0; i < size; i++) fwrite(&array[i], sizeof(CREC), 1, fout); + return 0; +} + +/* Fisher-Yates shuffle */ +void shuffle(CREC *array, long n) { + long i, j; + CREC tmp; + for (i = n - 1; i > 0; i--) { + j = rand_long(i + 1); + tmp = array[j]; + array[j] = array[i]; + array[i] = tmp; + } +} + +/* Merge shuffled temporary files; doesn't necessarily produce a perfect shuffle, but good enough */ +int shuffle_merge(int num) { + long i, j, k, l = 0; + int fidcounter = 0; + CREC *array; + char filename[MAX_STRING_LENGTH]; + FILE **fid, *fout = stdout; + + array = malloc(sizeof(CREC) * array_size); + fid = calloc(num, sizeof(FILE)); + for (fidcounter = 0; fidcounter < num; fidcounter++) { //num = number of temporary files to merge + sprintf(filename,"%s_%04d.bin",file_head, fidcounter); + fid[fidcounter] = fopen(filename, "rb"); + if (fid[fidcounter] == NULL) { + log_file_loading_error("temp file", filename); + free(array); + free_fid(fid, num); + return 1; + } + } + if (verbose > 0) fprintf(stderr, "Merging temp files: processed %ld lines.", l); + + while (1) { //Loop until EOF in all files + i = 0; + //Read at most array_size values into array, roughly array_size/num from each temp file + for (j = 0; j < num; j++) { + if (feof(fid[j])) continue; + for (k = 0; k < array_size / num; k++){ + fread(&array[i], sizeof(CREC), 1, fid[j]); + if (feof(fid[j])) break; + i++; + } + } + if (i == 0) break; + l += i; + shuffle(array, i-1); // Shuffles lines between temp files + write_chunk(array,i,fout); + if (verbose > 0) fprintf(stderr, "\033[31G%ld lines.", l); + } + fprintf(stderr, "\033[0GMerging temp files: processed %ld lines.", l); + for (fidcounter = 0; fidcounter < num; fidcounter++) { + fclose(fid[fidcounter]); + sprintf(filename,"%s_%04d.bin",file_head, fidcounter); + remove(filename); + } + fprintf(stderr, "\n\n"); + free(array); + free(fid); + return 0; +} + +/* Shuffle large input stream by splitting into chunks */ +int shuffle_by_chunks() { + if (seed == 0) { + seed = time(0); + } + fprintf(stderr, "Using random seed %d\n", seed); + srand(seed); + long i = 0, l = 0; + int fidcounter = 0; + char filename[MAX_STRING_LENGTH]; + CREC *array; + FILE *fin = stdin, *fid; + array = malloc(sizeof(CREC) * array_size); + + fprintf(stderr,"SHUFFLING COOCCURRENCES\n"); + if (verbose > 0) fprintf(stderr,"array size: %lld\n", array_size); + sprintf(filename,"%s_%04d.bin",file_head, fidcounter); + fid = fopen(filename,"w"); + if (fid == NULL) { + log_file_loading_error("file", filename); + free(array); + return 1; + } + if (verbose > 1) fprintf(stderr, "Shuffling by chunks: processed 0 lines."); + + while (1) { //Continue until EOF + if (i >= array_size) {// If array is full, shuffle it and save to temporary file + shuffle(array, i-2); + l += i; + if (verbose > 1) fprintf(stderr, "\033[22Gprocessed %ld lines.", l); + write_chunk(array,i,fid); + fclose(fid); + fidcounter++; + sprintf(filename,"%s_%04d.bin",file_head, fidcounter); + fid = fopen(filename,"w"); + if (fid == NULL) { + log_file_loading_error("file", filename); + free(array); + return 1; + } + i = 0; + } + fread(&array[i], sizeof(CREC), 1, fin); + if (feof(fin)) break; + i++; + } + shuffle(array, i-2); //Last chunk may be smaller than array_size + write_chunk(array,i,fid); + l += i; + if (verbose > 1) fprintf(stderr, "\033[22Gprocessed %ld lines.\n", l); + if (verbose > 1) fprintf(stderr, "Wrote %d temporary file(s).\n", fidcounter + 1); + fclose(fid); + free(array); + return shuffle_merge(fidcounter + 1); // Merge and shuffle together temporary files +} + +int main(int argc, char **argv) { + int i; + + if (argc == 2 && + (!scmp(argv[1], "-h") || !scmp(argv[1], "-help") || !scmp(argv[1], "--help"))) { + printf("Tool to shuffle entries of word-word cooccurrence files\n"); + printf("Author: Jeffrey Pennington (jpennin@stanford.edu)\n\n"); + printf("Usage options:\n"); + printf("\t-verbose <int>\n"); + printf("\t\tSet verbosity: 0, 1, or 2 (default)\n"); + printf("\t-memory <float>\n"); + printf("\t\tSoft limit for memory consumption, in GB; default 4.0\n"); + printf("\t-array-size <int>\n"); + printf("\t\tLimit to length <int> the buffer which stores chunks of data to shuffle before writing to disk. \n\t\tThis value overrides that which is automatically produced by '-memory'.\n"); + printf("\t-temp-file <file>\n"); + printf("\t\tFilename, excluding extension, for temporary files; default temp_shuffle\n"); + printf("\t-seed <int>\n"); + printf("\t\tRandom seed to use. If not set, will be randomized using current time."); + printf("\nExample usage: (assuming 'cooccurrence.bin' has been produced by 'coccur')\n"); + printf("./shuffle -verbose 2 -memory 8.0 < cooccurrence.bin > cooccurrence.shuf.bin\n"); + return 0; + } + + file_head = malloc(sizeof(char) * MAX_STRING_LENGTH); + if ((i = find_arg((char *)"-verbose", argc, argv)) > 0) verbose = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-temp-file", argc, argv)) > 0) strcpy(file_head, argv[i + 1]); + else strcpy(file_head, (char *)"temp_shuffle"); + if ((i = find_arg((char *)"-memory", argc, argv)) > 0) memory_limit = atof(argv[i + 1]); + array_size = (long long) (0.95 * (real)memory_limit * 1073741824/(sizeof(CREC))); + if ((i = find_arg((char *)"-array-size", argc, argv)) > 0) array_size = atoll(argv[i + 1]); + if ((i = find_arg((char *)"-seed", argc, argv)) > 0) seed = atoi(argv[i + 1]); + const int returned_value = shuffle_by_chunks(); + free(file_head); + return returned_value; +} + diff --git a/DD-GloVe/src/vocab_count.c b/DD-GloVe/src/vocab_count.c new file mode 100755 index 0000000000000000000000000000000000000000..723afa0678d8c7a66734720da690d87163077300 --- /dev/null +++ b/DD-GloVe/src/vocab_count.c @@ -0,0 +1,172 @@ +// Tool to extract unigram counts +// +// GloVe: Global Vectors for Word Representation +// Copyright (c) 2014 The Board of Trustees of +// The Leland Stanford Junior University. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +// For more information, bug reports, fixes, contact: +// Jeffrey Pennington (jpennin@stanford.edu) +// Christopher Manning (manning@cs.stanford.edu) +// https://github.com/stanfordnlp/GloVe/ +// GlobalVectors@googlegroups.com +// http://nlp.stanford.edu/projects/glove/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "common.h" + +typedef struct vocabulary { + char *word; + long long count; +} VOCAB; + +int verbose = 2; // 0, 1, or 2 +long long min_count = 1; // min occurrences for inclusion in vocab +long long max_vocab = 0; // max_vocab = 0 for no limit + + +/* Vocab frequency comparison; break ties alphabetically */ +int CompareVocabTie(const void *a, const void *b) { + long long c; + if ( (c = ((VOCAB *) b)->count - ((VOCAB *) a)->count) != 0) return ( c > 0 ? 1 : -1 ); + else return (scmp(((VOCAB *) a)->word,((VOCAB *) b)->word)); + +} + +/* Vocab frequency comparison; no tie-breaker */ +int CompareVocab(const void *a, const void *b) { + long long c; + if ( (c = ((VOCAB *) b)->count - ((VOCAB *) a)->count) != 0) return ( c > 0 ? 1 : -1 ); + else return 0; +} + +/* Search hash table for given string, insert if not found */ +void hashinsert(HASHREC **ht, char *w) { + HASHREC *htmp, *hprv; + unsigned int hval = HASHFN(w, TSIZE, SEED); + + for (hprv = NULL, htmp = ht[hval]; htmp != NULL && scmp(htmp->word, w) != 0; hprv = htmp, htmp = htmp->next); + if (htmp == NULL) { + htmp = (HASHREC *) malloc( sizeof(HASHREC) ); + htmp->word = (char *) malloc( strlen(w) + 1 ); + strcpy(htmp->word, w); + htmp->num = 1; + htmp->next = NULL; + if ( hprv==NULL ) + ht[hval] = htmp; + else + hprv->next = htmp; + } + else { + /* new records are not moved to front */ + htmp->num++; + if (hprv != NULL) { + /* move to front on access */ + hprv->next = htmp->next; + htmp->next = ht[hval]; + ht[hval] = htmp; + } + } + return; +} + +int get_counts() { + long long i = 0, j = 0, vocab_size = 12500; + // char format[20]; + char str[MAX_STRING_LENGTH + 1]; + HASHREC **vocab_hash = inithashtable(); + HASHREC *htmp; + VOCAB *vocab; + FILE *fid = stdin; + + fprintf(stderr, "BUILDING VOCABULARY\n"); + if (verbose > 1) fprintf(stderr, "Processed %lld tokens.", i); + // sprintf(format,"%%%ds",MAX_STRING_LENGTH); + while ( ! feof(fid)) { + // Insert all tokens into hashtable + int nl = get_word(str, fid); + if (nl) continue; // just a newline marker or feof + if (strcmp(str, "<unk>") == 0) { + fprintf(stderr, "\nError, <unk> vector found in corpus.\nPlease remove <unk>s from your corpus (e.g. cat text8 | sed -e 's/<unk>/<raw_unk>/g' > text8.new)"); + free_table(vocab_hash); + return 1; + } + hashinsert(vocab_hash, str); + if (((++i)%100000) == 0) if (verbose > 1) fprintf(stderr,"\033[11G%lld tokens.", i); + } + if (verbose > 1) fprintf(stderr, "\033[0GProcessed %lld tokens.\n", i); + vocab = malloc(sizeof(VOCAB) * vocab_size); + for (i = 0; i < TSIZE; i++) { // Migrate vocab to array + htmp = vocab_hash[i]; + while (htmp != NULL) { + vocab[j].word = htmp->word; + vocab[j].count = htmp->num; + j++; + if (j>=vocab_size) { + vocab_size += 2500; + vocab = (VOCAB *)realloc(vocab, sizeof(VOCAB) * vocab_size); + } + htmp = htmp->next; + } + } + if (verbose > 1) fprintf(stderr, "Counted %lld unique words.\n", j); + if (max_vocab > 0 && max_vocab < j) + // If the vocabulary exceeds limit, first sort full vocab by frequency without alphabetical tie-breaks. + // This results in pseudo-random ordering for words with same frequency, so that when truncated, the words span whole alphabet + qsort(vocab, j, sizeof(VOCAB), CompareVocab); + else max_vocab = j; + qsort(vocab, max_vocab, sizeof(VOCAB), CompareVocabTie); //After (possibly) truncating, sort (possibly again), breaking ties alphabetically + + for (i = 0; i < max_vocab; i++) { + if (vocab[i].count < min_count) { // If a minimum frequency cutoff exists, truncate vocabulary + if (verbose > 0) fprintf(stderr, "Truncating vocabulary at min count %lld.\n",min_count); + break; + } + printf("%s %lld\n",vocab[i].word,vocab[i].count); + } + + if (i == max_vocab && max_vocab < j) if (verbose > 0) fprintf(stderr, "Truncating vocabulary at size %lld.\n", max_vocab); + fprintf(stderr, "Using vocabulary of size %lld.\n\n", i); + free_table(vocab_hash); + free(vocab); + return 0; +} + +int main(int argc, char **argv) { + if (argc == 2 && + (!scmp(argv[1], "-h") || !scmp(argv[1], "-help") || !scmp(argv[1], "--help"))) { + printf("Simple tool to extract unigram counts\n"); + printf("Author: Jeffrey Pennington (jpennin@stanford.edu)\n\n"); + printf("Usage options:\n"); + printf("\t-verbose <int>\n"); + printf("\t\tSet verbosity: 0, 1, or 2 (default)\n"); + printf("\t-max-vocab <int>\n"); + printf("\t\tUpper bound on vocabulary size, i.e. keep the <int> most frequent words. The minimum frequency words are randomly sampled so as to obtain an even distribution over the alphabet.\n"); + printf("\t-min-count <int>\n"); + printf("\t\tLower limit such that words which occur fewer than <int> times are discarded.\n"); + printf("\nExample usage:\n"); + printf("./vocab_count -verbose 2 -max-vocab 100000 -min-count 10 < corpus.txt > vocab.txt\n"); + return 0; + } + + int i; + if ((i = find_arg((char *)"-verbose", argc, argv)) > 0) verbose = atoi(argv[i + 1]); + if ((i = find_arg((char *)"-max-vocab", argc, argv)) > 0) max_vocab = atoll(argv[i + 1]); + if ((i = find_arg((char *)"-min-count", argc, argv)) > 0) min_count = atoll(argv[i + 1]); + return get_counts(); +} +