Skip to content
Snippets Groups Projects
Commit 4701ee43 authored by nwarslan's avatar nwarslan
Browse files

added wsd_method 1

parent 3dba9dfb
No related branches found
No related tags found
No related merge requests found
......@@ -145,7 +145,8 @@ def get_distance(node_combi, dist_dict):
def map_distances(sentence, embed_dict):
"""
this function computes every distance between a word and all senses of other words
returns a dictionary {(id1,id2):distance,...}
"""
dist_dict ={}
for i in range(len(sentence[:-1])):
......@@ -158,7 +159,7 @@ def map_distances(sentence, embed_dict):
def find_optimum(sentence, dist_dict):
"""
this function finds the optimal sense combination of words in a sentence
"""
optimum = [0,0]
for n_combi in node_combi(sentence):
......@@ -171,13 +172,11 @@ def find_optimum(sentence, dist_dict):
return optimum
def map_sense():
def node_combi(sentence):
"""
IN:[int]
this function computes every possible sense combination of a given sentence
"""
def node_combi(sentence):
for combi in itertools.product(*sentence):
yield combi
......@@ -193,10 +192,11 @@ def write_answer_to_file(data, filename):
def iterate_over(senseval_data, label_embeddings, lemmata_mapping, id_mapping, sense_key_mapping):
"""
this function iterates over sentences in senseval data
computes the optimal combination of senses given a sentence
"""
ambig_sents = open_mapping(senseval_data)
# split sentence if it contains more than 6 words
for i, sent in enumerate(ambig_sents):
if len(sent) >=6:
ambig_sents[i] = sent[:6]
......@@ -204,12 +204,16 @@ def iterate_over(senseval_data, label_embeddings, lemmata_mapping, id_mapping, s
sentences = [[[word['lemma'],word['pos'],word['key']] for word in sent] for sent in ambig_sents]
solutions = []
for sentence in sentences:
mapped_sent = map_words(sentence, lemmata_mapping)[0]
lemma_list = map_words(sentence, lemmata_mapping)[1]
# check if compound word were splitted in two
split = 0
if len(sentence) < len(mapped_sent):
split = len(mapped_sent) - len(sentence)
# saves the index of an unknown word and removes it from mapped_sent
no_key = []
for word in mapped_sent:
if word[0]=='U':
......@@ -217,36 +221,33 @@ def iterate_over(senseval_data, label_embeddings, lemmata_mapping, id_mapping, s
mapped_sent = [word for word in mapped_sent if word[0]!='U']
# computes label embeddings for every node and saves them in a dict
embed_dict = embed(mapped_sent, label_embeddings)
# computes distances between nodes and saves them in to a dict
dist_dict = map_distances(mapped_sent, embed_dict)
# computes the optimal sense combination
optimum = find_optimum(mapped_sent, dist_dict)
# maps node id's to wn 3 synset id's
optimum = [id_mapping[str(o)] for o in optimum[1]]
# map syn id to sense_key
# map synset id to sense_key
for i, o in enumerate(optimum):
key = tuple(o + [lemma_list[i]])
if key in sense_key_mapping.keys():
optimum[i] = sense_key_mapping[key]
else: optimum[i] = ''
# adds the removed unknown words
if len(no_key) != 0:
for el in no_key:
optimum.insert(el, '')
# resplits compound words
resplit = 0
for i, word in enumerate(sentence):
if split != 0:
if '-' in word[0]:
optimum[i] = optimum[i] + ' ' + optimum[i+1]
optimum.remove(optimum[i+1])
split -= 1
#print(sentence)
#print(mapped_sent)
#print(optimum)
solution_sent = [(word[2],optimum[k]) for k, word in enumerate(sentence)]
solutions += solution_sent
......@@ -262,7 +263,7 @@ if __name__ == '__main__':
sense_key_mapping = open_sense_keys(SENSE_KEY_MAPPING)
solutions = iterate_over(SENSEVAL_2, label_embeddings, lemmata_mapping, sense_key_mapping)
solutions = iterate_over(SENSEVAL_2, label_embeddings, lemmata_mapping, id_mapping sense_key_mapping)
write_answer_to_file(solutions, OUTPUT2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment