erste Ergebnisse

fd4302d8 · nwarslan · 6079442e · fd4302d8 · fd4302d8
Commit fd4302d8 authored 6 years ago by nwarslan
--- a/WSD/computed_keys.txt
+++ b/WSD/computed_keys.txt
+d000.s000.t001 ['U']
+d000.s000.t004 man%1:18:08::
+d000.s000.t006 say%2:32:08::
+d000.s001.t001 peer%2:39:00::
+d000.s001.t005 imbibition%1:04:00::
+d000.s001.t006 companion%1:18:01::
+d000.s001.t008 ['U']
+d000.s001.t010 tear%1:04:00:: U
+d000.s001.t011 eye%1:09:00::
+d000.s002.t001 make%2:40:02::
+d000.s002.t003 ready%3:00:00::
+d000.s002.t004 answer%1:04:00::
+d000.s002.t007 much%3:00:00::
+d000.s002.t010 surprise%1:04:00::
+d000.s002.t014 paroxysm%1:26:00::
+d000.s002.t016 coughing%1:26:00::
+d000.s003.t000 ['U']
+d000.s003.t002 world%1:14:00::
+d000.s003.t003 intoxicated%3:00:00::
+d000.s003.t005 weirdo%1:18:02::
--- a/WSD/wsd_method1.py
+++ b/WSD/wsd_method1.py
@@ -42,7 +42,8 @@ def open_mapping(filename):

 def open_sense_keys(filename):
 	"""
-	
+	open wn30-17 sense key mapping
+	returns a dictionary {(syn_id_30, pos):sense_key_17}
 	"""
 	with open(filename, 'r') as input:
 		sense_keys = {(line.split()[0], line.split()[3]):line.split()[2] for line in input.readlines()}
@@ -52,11 +53,10 @@ def map_words(sentence):
 	"""
 	takes a list of ambig words 
 	mapps them to potential synsets
-	returns a list of synset_id lists
+	returns a list of synset_id lists  [ [[w1s1],[w1s2],[w1s3]] , [[w2s1],...] ,...]
 	"""
 	pos = ['n','v','a','r']
 	ambig_list = []
-	#split = False
 	def get_lem_id(token):
 		if token in lemmata_mapping.keys():
 			ambig_list.append(lemmata_mapping[token][1])
@@ -76,20 +76,17 @@ def map_words(sentence):

 	for word in sentence:
 		add = False
-		#print(word)
 		add = get_lem_id(word[0]+'/'+word[1])
-		#print(add)
 		if add: continue
 		elif '-' in word[0]: 
 			words = word[0].split('-')
 			for w in words:
 				add = get_node_id(w)
-				#if add: split = True
 		else:
 			add = get_node_id(word[0])

 		if not add: ambig_list.append(['U'])
-	#print(ambig_list)		
+		
 	return ambig_list

 def embed(node_id):
@@ -97,23 +94,12 @@ def embed(node_id):
 	takes a node id (int)
 	returns it's embedding (array)
 	"""
-	#l1 = pos_embeddings
-	#l2 = lemmata_embeddings
-	embedding = np.concatenate((id_embeddings[node_id],pos_embeddings[node_id],lex_file_embeddings[node_id],lemmata_embeddings[node_id]), axis=0)
+	labels = (id_embeddings[node_id],pos_embeddings[node_id],lex_file_embeddings[node_id],lemmata_embeddings[node_id])
+	embedding = np.concatenate(labels, axis=0)

 	return embedding


-#def concatenate(l1 , l2):
-	"""
-	soll: konkatiniert die Embeddings ausgewählter Labels pro Knoten
-		--> soll embed() werden
-	ist: konkatiniert die Embeddings ausgewählter Labels aller Knoten
-	"""
-	#embeddings = [np.concatenate((l1[i], l2[i]), axis=0) for i in range(len(l1))]
-	#return embeddings
-
-
 def get_distance(node_combi):
 	"""
 	takes a list of node embedding lists