Skip to content
Snippets Groups Projects
Commit 8689b058 authored by toyota's avatar toyota
Browse files

update data

parent 4a5a9846
No related branches found
No related tags found
No related merge requests found
File added
......@@ -29,24 +29,25 @@ Other values are default.
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.679597365162284
average F1 = 0.6822850886106397
=========== Final average value of Rand Index: =============
average Rand Index = 0.4274242424242424
average Rand Index = 0.42222222222222217
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.0477313870009419
average Adj Rand Index = 0.0774543476929948
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.24314621033143669
average Jaccard Index = 0.23433892522626892
================ Statistics: ====================================
============ average number of created clusters: 7.5
============ average cluster size: 17.361111111111114
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -121,7 +122,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintext_unigramm.bin")
model.load_model("/proj/toyota/wiki_model_unigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -28,24 +28,25 @@ Other values are default.
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.6798168213956604
average F1 = 0.6749557232429892
=========== Final average value of Rand Index: =============
average Rand Index = 0.40141414141414145
average Rand Index = 0.4215656565656566
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.02639210729751324
average Adj Rand Index = 0.04000866755660871
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.22710863255455066
average Jaccard Index = 0.24957193134807046
================ Statistics: ====================================
============ average number of created clusters: 7.5
============ average cluster size: 17.361111111111114
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -120,7 +121,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintexts_bigramm.bin")
model.load_model("/proj/toyota/wiki_model_bigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -29,25 +29,26 @@ Other values are default.
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.6822850886106397
average F1 = 0.689607949235083
=========== Final average value of Rand Index: =============
average Rand Index = 0.40095959595959596
average Rand Index = 0.4713636363636364
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.028802431941060965
average Adj Rand Index = 0.10025435637472
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.19490495257962206
average Jaccard Index = 0.2814881591205523
================ Statistics: ====================================
============ average number of created clusters: 7.0
============ average cluster size: 14.285714285714286
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -114,7 +115,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintext_unigramm.bin")
model.load_model("/proj/toyota/wiki_model_unigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -29,24 +29,25 @@ Other values are default.
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.6749557232429892
average F1 = 0.6856912156460447
=========== Final average value of Rand Index: =============
average Rand Index = 0.3984848484848485
average Rand Index = 0.4411616161616162
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.01963669755259982
average Adj Rand Index = 0.07822474956102966
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.20394359000967346
average Jaccard Index = 0.25975388676579275
================ Statistics: ====================================
============ average number of created clusters: 7.0
============ average cluster size: 14.285714285714286
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -113,7 +114,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintexts_bigramm.bin")
model.load_model("/proj/toyota/wiki_model_bigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -27,25 +27,26 @@ For the WSI purposes it uses the following methods:
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.5842727291855876
average F1 = 0.6609591661520388
=========== Final average value of Rand Index: =============
average Rand Index = 0.38616161616161615
average Rand Index = 0.4530808080808081
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.0010435679035448626
average Adj Rand Index = -0.02411648222615019
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.21713055494037742
average Jaccard Index = 0.3900090662960543
================ Statistics: ====================================
============ average number of created clusters: 6.75
============ average cluster size: 11.535416666666666
============ average number of created clusters: 3.75
============ average cluster size: 30.0
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -124,7 +125,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintext_unigramm.bin")
model.load_model("/proj/toyota/wiki_model_unigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -28,24 +28,25 @@ For the WSI purposes it uses the following methods:
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.6013936257538159
average F1 = 0.6656369246000418
=========== Final average value of Rand Index: =============
average Rand Index = 0.38823232323232326
average Rand Index = 0.5479292929292929
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = -0.0038108019555718464
average Adj Rand Index = -0.030051450442432036
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.21276973487337159
average Jaccard Index = 0.5311661990583127
================ Statistics: ====================================
============ average number of created clusters: 6.25
============ average cluster size: 12.740079365079364
============ average number of created clusters: 2.25
============ average cluster size: 54.16666666666667
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -124,7 +125,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintexts_bigramm.bin")
model.load_model("/proj/toyota/wiki_model_bigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -26,24 +26,25 @@ For the WSI purposes it uses the following methods:
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.6636355295257614
average F1 = 0.6663112571041688
=========== Final average value of Rand Index: =============
average Rand Index = 0.6465151515151515
average Rand Index = 0.6585858585858586
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.03561869550794858
average Adj Rand Index = 0.00638025509068816
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.6278794821842425
average Jaccard Index = 0.6496166607036896
================ Statistics: ====================================
============ average number of created clusters: 7.5
============ average cluster size: 15.050505050505052
============ average number of created clusters: 5.0
============ average cluster size: 21.904761904761905
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -110,7 +111,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintext_unigramm.bin")
model.load_model("/proj/toyota/wiki_model_unigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
......@@ -26,25 +26,25 @@ For the WSI purposes it uses the following methods:
------------------- EVALUATION -------------------
=========== Final average value of F1: =====================
average F1 = 0.6636355295257614
average F1 = 0.6738982099959293
=========== Final average value of Rand Index: =============
average Rand Index = 0.6323232323232324
average Rand Index = 0.6581818181818182
=========== Final average value of Adjusted Rand Index: ====
average Adj Rand Index = 0.014505707454839388
average Adj Rand Index = 0.02869241083779963
=========== Final average value of Jaccard Index: ==========
average Jaccard Index = 0.6153347420466695
average Jaccard Index = 0.6421053213743444
================ Statistics: ====================================
============ average number of created clusters: 7.75
============ average cluster size: 15.795454545454547
============ average number of created clusters: 6.0
============ average cluster size: 17.55952380952381
"""
import sys
sys.path.append("/home/students/toyota")
sys.path.append("/proj/toyota")
import sent2vec
from collections import defaultdict, deque
import re
......@@ -111,7 +111,7 @@ def preprocess_data(text):
# For every word in a sentence make a vector representation with sense2vec; make a compositional vector for every sentence as sum of BOW vectors:
def compos_sent2vec(prepr_data,len_vector):
model = sent2vec.Sent2vecModel()
model.load_model("/proj/toyota/plaintexts_bigramm.bin")
model.load_model("/proj/toyota/wiki_model_bigram.bin")
for value in prepr_data.values():
for paragr in value: #one snippet
par_list = [] # list with a snippet
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment