diff --git a/results/trial_with_lemma.log b/results/trial_with_lemma.log new file mode 100644 index 0000000000000000000000000000000000000000..066dba2a10f69840458dab5bac537e05e096742e --- /dev/null +++ b/results/trial_with_lemma.log @@ -0,0 +1,276 @@ +[ INFO ] Configuration - Loading /eval.properties FROM /home/students/zimmermann/Courses/ws17/fsem/absinth/WSI-Evaluator/config/eval.properties +[ INFO ] WSIEvaluator - Run started at 18:54 +[ INFO ] Dataset - Loading the datasets/trial/ +[ INFO ] Dataset - Loading the topics data datasets/trial/ +[ INFO ] Dataset - Loading the subtopics data +[ INFO ] Dataset - Loading the snippets data +[ INFO ] Dataset - Loading the relations data +[ INFO ] Dataset - Loading the relations data +[ INFO ] WSIEvaluator - +=================================================== +Starting the evaluation +=================================================== +[ INFO ] WSIEvaluator - +=================================================== +subtopic-recall@K default: 100 +=================================================== +[ WARN ] WSIEvaluator - +============= Query 1 : "soul food" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 2 snippet clusters: ==================== +The cluster 1 contains the snippets: [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 19, 20, 26, 28, 30, 32, 33, 35, 39, 45, 46, 47, 48, 49, 50, 59, 60, 61, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 84, 85, 86, 87, 94, 95, 96, 97] +The cluster 2 contains the snippets: [9, 11, 17, 18, 21, 22, 23, 24, 25, 27, 31, 34, 36, 37, 38, 40, 41, 42, 43, 44, 51, 52, 53, 54, 55, 56, 57, 58, 62, 63, 64, 65, 68, 69, 81, 82, 83, 88, 90, 91, 92, 93, 98, 99, 100] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [1, 9, 2, 11, 3, 17, 4, 18, 5, 21, 6, 22, 7, 23, 8, 24, 10, 25, 12, 27, 13, 31, 14, 34, 15, 36, 16, 37, 19, 38, 20, 40, 26, 41, 28, 42, 30, 43, 32, 44, 33, 51, 35, 52, 39, 53, 45, 54, 46, 55, 47, 56, 48, 57, 49, 58, 50, 62, 59, 63, 60, 64, 61, 65, 66, 68, 67, 69, 70, 81, 71, 82, 72, 83, 73, 88, 74, 90, 75, 91, 76, 92, 77, 93, 78, 98, 79, 99, 80, 100, 84, 85, 86, 87, 94, 95, 96, 97, 29, 89] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "soul food" =============== +============ Value of Rand Index = 0.5 ================================= +============ Partial average Rand Index = 0.5 ====== +============ Value of Adjusted Rand Index = 0.011326779883708487 ================================= +============ Partial average of Adjusted Rand Index = 0.011326779883708487 ================================= +============ Value of Jaccard Index = 0.3743680485338726 ================================= +============ Partial average of Jaccard Index = 0.3743680485338726 ====== +============ Precision = 0.7857142857142857, Recall = 0.8279569892473119, F1 = 0.806282722513089 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 2 ============ +============ average size of the created clusters: 49.0 ============ +=========================================================================== + +[ WARN ] WSIEvaluator - +============= Query 2 : "the block" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 3 snippet clusters: ==================== +The cluster 1 contains the snippets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 60, 61, 62, 63, 64, 65, 66, 68, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 97, 98, 99, 100] +The cluster 3 contains the snippets: [22, 33, 37, 39, 56, 57, 67, 69, 77, 96] +The cluster 2 contains the snippets: [12, 40, 59] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [1, 12, 22, 2, 40, 33, 3, 59, 37, 4, 39, 5, 56, 6, 57, 7, 67, 8, 69, 9, 77, 11, 96, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 60, 61, 62, 63, 64, 65, 66, 68, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 97, 98, 99, 100, 10] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "the block" =============== +============ Value of Rand Index = 0.518989898989899 ================================= +============ Partial average Rand Index = 0.5094949494949494 ====== +============ Value of Adjusted Rand Index = -0.0317214098770107 ================================= +============ Partial average of Adjusted Rand Index = -0.010197314996651106 ================================= +============ Value of Jaccard Index = 0.46470323741007197 ================================= +============ Partial average of Jaccard Index = 0.4195356429719723 ====== +============ Precision = 0.1919191919191919, Recall = 0.7037037037037037, F1 = 0.30158730158730157 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 3 ============ +============ average size of the created clusters: 33.0 ============ +=========================================================================== + +[ WARN ] WSIEvaluator - +============= Query 3 : "stephen king" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 1 snippet clusters: ==================== +The cluster 1 contains the snippets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "stephen king" =============== +============ Value of Rand Index = 0.98 ================================= +============ Partial average Rand Index = 0.6663299663299663 ====== +============ Value of Adjusted Rand Index = 0.0 ================================= +============ Partial average of Adjusted Rand Index = -0.006798209997767404 ================================= +============ Value of Jaccard Index = 0.98 ================================= +============ Partial average of Jaccard Index = 0.6063570953146482 ====== +============ Precision = 0.99, Recall = 1.0, F1 = 0.9949748743718593 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 1 ============ +============ average size of the created clusters: 100.0 ============ +=========================================================================== + +[ WARN ] WSIEvaluator - +============= Query 4 : "cool water" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 1 snippet clusters: ==================== +The cluster 1 contains the snippets: [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 3, 10, 28, 40, 55, 56, 82] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "cool water" =============== +============ Value of Rand Index = 0.585050505050505 ================================= +============ Partial average Rand Index = 0.6460101010101009 ====== +============ Value of Adjusted Rand Index = 0.020819043799792002 ================================= +============ Partial average of Adjusted Rand Index = 1.0610345162244753E-4 ================================= +============ Value of Jaccard Index = 0.5601713062098501 ================================= +============ Partial average of Jaccard Index = 0.5948106480384487 ====== +============ Precision = 0.21505376344086022, Recall = 0.8, F1 = 0.3389830508474576 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 1 ============ +============ average size of the created clusters: 93.0 ============ +=========================================================================== + +[ INFO ] WSIEvaluator - + +=========== Final average value of S-recall@K: ============= +1 0.3958 +2 0.6250 +3 0.7083 +4 0.7083 +5 0.7708 +6 0.7708 +7 0.8542 +8 0.9167 +9 0.9167 +10 0.9167 +11 0.9167 +12 0.9167 +13 1.0000 +14 1.0000 +15 1.0000 +16 1.0000 +17 1.0000 +18 1.0000 +19 1.0000 +20 1.0000 +21 1.0000 +22 1.0000 +23 1.0000 +24 1.0000 +25 1.0000 +26 1.0000 +27 1.0000 +28 1.0000 +29 1.0000 +30 1.0000 +31 1.0000 +32 1.0000 +33 1.0000 +34 1.0000 +35 1.0000 +36 1.0000 +37 1.0000 +38 1.0000 +39 1.0000 +40 1.0000 +41 1.0000 +42 1.0000 +43 1.0000 +44 1.0000 +45 1.0000 +46 1.0000 +47 1.0000 +48 1.0000 +49 1.0000 +50 1.0000 +51 1.0000 +52 1.0000 +53 1.0000 +54 1.0000 +55 1.0000 +56 1.0000 +57 1.0000 +58 1.0000 +59 1.0000 +60 1.0000 +61 1.0000 +62 1.0000 +63 1.0000 +64 1.0000 +65 1.0000 +66 1.0000 +67 1.0000 +68 1.0000 +69 1.0000 +70 1.0000 +71 1.0000 +72 1.0000 +73 1.0000 +74 1.0000 +75 1.0000 +76 1.0000 +77 1.0000 +78 1.0000 +79 1.0000 +80 1.0000 +81 1.0000 +82 1.0000 +83 1.0000 +84 1.0000 +85 1.0000 +86 1.0000 +87 1.0000 +88 1.0000 +89 1.0000 +90 1.0000 +91 1.0000 +92 1.0000 +93 1.0000 +94 1.0000 +95 1.0000 +96 1.0000 +97 1.0000 +98 1.0000 +99 1.0000 +100 1.0000 + +=========== Final average value of S-precision@r: ============= +0.4000 0.8333 +0.4500 0.5833 +0.5000 0.4111 +0.5500 0.3958 +0.6000 0.3655 +0.6500 0.3140 +0.7000 0.3017 +0.7500 0.2709 +0.8000 0.2460 +0.8500 0.2253 +0.9000 0.2079 +0.9500 0.1930 +1.0000 0.0267 + +=========== Final average value of F1: ===================== +average F1 = 0.6593683761975361 + +=========== Final average value of Rand Index: ============= +average Rand Index = 0.6460101010101009 + +=========== Final average value of Adjusted Rand Index: ==== +average Adj Rand Index = 1.0610345162244753E-4 + +=========== Final average value of Jaccard Index: ========== +average Jaccard Index = 0.5948106480384487 + +================ Statistics: ==================================== +============ average number of created clusters: 1.75 +============ average cluster size: 68.75 + +[ WARN ] WSIEvaluator - Simulation started at: 18:54 and completed at 18:54 +Execution time: 1.026 sec diff --git a/results/trial_without_lemma.log b/results/trial_without_lemma.log new file mode 100644 index 0000000000000000000000000000000000000000..9f3644fb3d1734011ed7e8a43f2b5fe00da860f6 --- /dev/null +++ b/results/trial_without_lemma.log @@ -0,0 +1,279 @@ +[ INFO ] Configuration - Loading /eval.properties FROM /home/students/zimmermann/Courses/ws17/fsem/absinth/WSI-Evaluator/config/eval.properties +[ INFO ] WSIEvaluator - Run started at 17:56 +[ INFO ] Dataset - Loading the datasets/trial/ +[ INFO ] Dataset - Loading the topics data datasets/trial/ +[ INFO ] Dataset - Loading the subtopics data +[ INFO ] Dataset - Loading the snippets data +[ INFO ] Dataset - Loading the relations data +[ INFO ] Dataset - Loading the relations data +[ INFO ] WSIEvaluator - +=================================================== +Starting the evaluation +=================================================== +[ INFO ] WSIEvaluator - +=================================================== +subtopic-recall@K default: 100 +=================================================== +[ WARN ] WSIEvaluator - +============= Query 1 : "soul food" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 3 snippet clusters: ==================== +The cluster 1 contains the snippets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 22, 24, 25, 26, 30, 31, 33, 35, 39, 45, 46, 47, 48, 49, 50, 60, 61, 62, 63, 64, 65, 66, 67, 73, 74, 75, 76, 77, 78, 79, 80, 84, 85, 86, 87, 88, 90, 91, 93, 94, 95, 96, 97] +The cluster 2 contains the snippets: [21, 23, 27, 28, 34, 36, 37, 38, 40, 41, 42, 43, 44, 51, 52, 53, 54, 55, 56, 57, 58, 68, 69, 70, 71, 72, 81, 82, 83, 92, 98, 99, 100] +The cluster 3 contains the snippets: [15, 32] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [1, 21, 15, 2, 23, 32, 3, 27, 4, 28, 5, 34, 6, 36, 7, 37, 8, 38, 9, 40, 10, 41, 11, 42, 12, 43, 13, 44, 14, 51, 17, 52, 18, 53, 19, 54, 20, 55, 22, 56, 24, 57, 25, 58, 26, 68, 30, 69, 31, 70, 33, 71, 35, 72, 39, 81, 45, 82, 46, 83, 47, 92, 48, 98, 49, 99, 50, 100, 60, 61, 62, 63, 64, 65, 66, 67, 73, 74, 75, 76, 77, 78, 79, 80, 84, 85, 86, 87, 88, 90, 91, 93, 94, 95, 96, 97, 16, 29, 59, 89] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "soul food" =============== +============ Value of Rand Index = 0.5040404040404041 ================================= +============ Partial average Rand Index = 0.5040404040404041 ====== +============ Value of Adjusted Rand Index = 0.025197274115047952 ================================= +============ Partial average of Adjusted Rand Index = 0.025197274115047952 ================================= +============ Value of Jaccard Index = 0.3775354969574036 ================================= +============ Partial average of Jaccard Index = 0.3775354969574036 ====== +============ Precision = 0.8020833333333334, Recall = 0.8279569892473119, F1 = 0.8148148148148148 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 3 ============ +============ average size of the created clusters: 32.0 ============ +=========================================================================== + +[ WARN ] WSIEvaluator - +============= Query 2 : "the block" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 3 snippet clusters: ==================== +The cluster 1 contains the snippets: [2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 99, 100] +The cluster 2 contains the snippets: [12, 21, 22, 23, 29, 76, 77, 91, 95, 97, 98] +The cluster 3 contains the snippets: [39, 56, 57, 96] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [2, 12, 39, 3, 21, 56, 4, 22, 57, 5, 23, 96, 6, 29, 7, 76, 8, 77, 9, 91, 11, 95, 13, 97, 14, 98, 15, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 99, 100, 1, 10] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "the block" =============== +============ Value of Rand Index = 0.5921212121212122 ================================= +============ Partial average Rand Index = 0.5480808080808082 ====== +============ Value of Adjusted Rand Index = 0.13725322621998112 ================================= +============ Partial average of Adjusted Rand Index = 0.08122525016751454 ================================= +============ Value of Jaccard Index = 0.5132594021215043 ================================= +============ Partial average of Jaccard Index = 0.445397449539454 ====== +============ Precision = 0.20408163265306123, Recall = 0.7407407407407407, F1 = 0.32 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 3 ============ +============ average size of the created clusters: 32.666666666666664 ============ +=========================================================================== + +[ WARN ] WSIEvaluator - +============= Query 3 : "stephen king" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 3 snippet clusters: ==================== +The cluster 3 contains the snippets: [1, 2, 3, 4, 6, 8, 10, 11, 12, 14, 16, 17, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 35, 36, 37, 40, 41, 43, 44, 45, 46, 49, 50, 51, 52, 53, 55, 58, 59, 60, 62, 68, 70, 71, 72, 73, 74, 75, 77, 79, 80, 84, 86, 91, 92, 94, 95, 96, 97] +The cluster 1 contains the snippets: [5, 9, 13, 15, 18, 23, 33, 38, 39, 42, 47, 48, 54, 56, 57, 61, 63, 64, 65, 66, 67, 69, 76, 78, 81, 82, 85, 87, 88, 89, 90, 93, 98, 99, 100] +The cluster 2 contains the snippets: [7, 32, 34, 83] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [5, 7, 1, 9, 32, 2, 13, 34, 3, 15, 83, 4, 18, 6, 23, 8, 33, 10, 38, 11, 39, 12, 42, 14, 47, 16, 48, 17, 54, 19, 56, 20, 57, 21, 61, 22, 63, 24, 64, 25, 65, 26, 66, 27, 67, 28, 69, 29, 76, 30, 78, 31, 81, 35, 82, 36, 85, 37, 87, 40, 88, 41, 89, 43, 90, 44, 93, 45, 98, 46, 99, 49, 100, 50, 51, 52, 53, 55, 58, 59, 60, 62, 68, 70, 71, 72, 73, 74, 75, 77, 79, 80, 84, 86, 91, 92, 94, 95, 96, 97] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "stephen king" =============== +============ Value of Rand Index = 0.49737373737373736 ================================= +============ Partial average Rand Index = 0.5311784511784512 ====== +============ Value of Adjusted Rand Index = 0.011615896775833764 ================================= +============ Partial average of Adjusted Rand Index = 0.05802213237028762 ================================= +============ Value of Jaccard Index = 0.4906857727737973 ================================= +============ Partial average of Jaccard Index = 0.4604935572842351 ====== +============ Precision = 0.99, Recall = 1.0, F1 = 0.9949748743718593 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 3 ============ +============ average size of the created clusters: 33.333333333333336 ============ +=========================================================================== + +[ WARN ] WSIEvaluator - +============= Query 4 : "cool water" ============== + +[ INFO ] WSIEvaluator - +============== Cluster creation phase ================== + +[ WARN ] WSIEvaluator - ClusteredSnippets: + +================== 1 snippet clusters: ==================== +The cluster 1 contains the snippets: [1, 2, 4, 5, 6, 7, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] + +[ INFO ] WSIEvaluator - +================== Starting Evaluation ================== +Resulting list: [1, 2, 4, 5, 6, 7, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 3, 8, 9, 10, 12, 25, 26, 28, 40, 50, 55, 56, 82, 87] +[ WARN ] WSIEvaluator - +================ Results of Rand Index for the topic "cool water" =============== +============ Value of Rand Index = 0.5541414141414142 ================================= +============ Partial average Rand Index = 0.5369191919191919 ====== +============ Value of Adjusted Rand Index = 0.002291925310413701 ================================= +============ Partial average of Adjusted Rand Index = 0.04408958060531914 ================================= +============ Value of Jaccard Index = 0.5062639821029082 ================================= +============ Partial average of Jaccard Index = 0.4719361634889034 ====== +============ Precision = 0.23255813953488372, Recall = 0.8, F1 = 0.36036036036036034 ============ + +[ WARN ] WSIEvaluator - +================ Statistics: ==================================== +============ number of created clusters: 1 ============ +============ average size of the created clusters: 86.0 ============ +=========================================================================== + +[ INFO ] WSIEvaluator - + +=========== Final average value of S-recall@K: ============= +1 0.3958 +2 0.4792 +3 0.5625 +4 0.5625 +5 0.6250 +6 0.7083 +7 0.7708 +8 0.7708 +9 0.7708 +10 0.8542 +11 0.8542 +12 0.8542 +13 0.8542 +14 0.8542 +15 0.8542 +16 0.8542 +17 0.8542 +18 0.8542 +19 0.8542 +20 0.8542 +21 0.8542 +22 0.8542 +23 0.8542 +24 0.8542 +25 0.8542 +26 0.8542 +27 0.8542 +28 0.8542 +29 0.8542 +30 0.8542 +31 0.8542 +32 0.8542 +33 0.9167 +34 0.9167 +35 0.9167 +36 0.9167 +37 0.9167 +38 0.9167 +39 0.9167 +40 0.9167 +41 0.9167 +42 0.9167 +43 0.9167 +44 0.9167 +45 0.9167 +46 0.9167 +47 0.9167 +48 0.9167 +49 0.9167 +50 0.9167 +51 0.9167 +52 0.9167 +53 0.9167 +54 0.9167 +55 0.9167 +56 0.9167 +57 0.9167 +58 0.9167 +59 0.9167 +60 0.9167 +61 0.9167 +62 0.9167 +63 0.9167 +64 0.9167 +65 0.9167 +66 0.9167 +67 0.9167 +68 0.9167 +69 0.9167 +70 0.9167 +71 0.9167 +72 0.9167 +73 0.9167 +74 0.9167 +75 0.9167 +76 0.9167 +77 0.9167 +78 0.9167 +79 0.9167 +80 0.9167 +81 0.9167 +82 0.9167 +83 0.9167 +84 0.9167 +85 0.9167 +86 0.9167 +87 0.9167 +88 1.0000 +89 1.0000 +90 1.0000 +91 1.0000 +92 1.0000 +93 1.0000 +94 1.0000 +95 1.0000 +96 1.0000 +97 1.0000 +98 1.0000 +99 1.0000 +100 1.0000 + +=========== Final average value of S-precision@r: ============= +0.4000 0.6000 +0.4500 0.3730 +0.5000 0.3530 +0.5500 0.2951 +0.6000 0.2798 +0.6500 0.2473 +0.7000 0.1749 +0.7500 0.1277 +0.8000 0.1191 +0.8500 0.1118 +0.9000 0.1055 +0.9500 0.1000 +1.0000 0.0267 + +=========== Final average value of F1: ===================== +average F1 = 0.6706566012253643 + +=========== Final average value of Rand Index: ============= +average Rand Index = 0.5369191919191919 + +=========== Final average value of Adjusted Rand Index: ==== +average Adj Rand Index = 0.04408958060531914 + +=========== Final average value of Jaccard Index: ========== +average Jaccard Index = 0.4719361634889034 + +================ Statistics: ==================================== +============ average number of created clusters: 2.5 +============ average cluster size: 46.0 + +[ WARN ] WSIEvaluator - Simulation started at: 17:56 and completed at 17:56 +Execution time: 0.355 sec diff --git a/src/config.py b/src/config.py index cf97fedea72f3d4d53f9c6e1deb6f45f86a8098e..5e22df01c61feb62847707c886ace5225efc4d8a 100644 --- a/src/config.py +++ b/src/config.py @@ -6,8 +6,8 @@ Configuration file Choose paths for corpus, dataset and output. - The output directory should be empty when starting absinth. ''' -corpus = "/proj/absinth/wikipedia_reduced/" -dataset = "../WSI-Evaluator/datasets/MORESQUE/" +corpus = "/proj/absinth/wikipedia_shuffled2/" +dataset = "../WSI-Evaluator/datasets/dataset/" test = "../WSI-Evaluator/datasets/trial/" output = "../output/" @@ -16,7 +16,7 @@ Choose stop words and allowed pos-tags. - Stop words will not be considered for nodes. - Only tokens with allowed pos-tags will be considered. ''' -stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free', 'pp'] +stop_words = ['utc', "'s", 'new', 'p.', 'first', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free', 'pp', 'twitter', 'facebook', 'youtube', 'copyright', '®', '|'] allowed_tags = ['NN','NNS','JJ','JJS','JJR','NNP'] ''' @@ -43,5 +43,10 @@ max_weight = 0.9 Choose minimum number of neighbors and maximum median weight of the most frequent neighbors of a node for root hubs. - the threshold is calculated using the media of the same number of neighbors declared in min_neighbors. ''' -min_neighbors = 6 +min_neighbors = 5 threshold = 0.8 + +''' +Choose whether or not the tokens should be lemmatised. +''' +lemma = True