Skip to content
Snippets Groups Projects
Commit 94c9807f authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Checks if topic already processed, allows parallel processes.

parent 2c81f0fe
No related branches found
No related tags found
No related merge requests found
......@@ -114,7 +114,7 @@ def frequencies(corpus_path, target):
i += 1
#update print
print('[a] 100%\tNodes: {}\tEdges: {}.'.format(len(node_freq), len(edge_freq)), target)
print('[a] 100%\tNodes: {}\tEdges: {}.'.format(len(node_freq), len(edge_freq))+'\t('+target+')')
return node_freq, edge_freq
......@@ -312,6 +312,10 @@ def WSI(topic_id, topic_name, results):
#removes trailing new_lines
old_target = topic_name.strip() #original target
if old_target.strip() in [f.replace('.absinth', '') for f in os.listdir(config.output)]:
return None
out_buffer += ("[A] Word sense induction for '"+old_target+"':\n")
#in topics longer than two words, the leading 'the' can generally be removed without changing the sense
......@@ -330,11 +334,11 @@ def WSI(topic_id, topic_name, results):
#counts occurences of single words, as well as cooccurrences, saves it in dictionary
print('[a]', 'Counting nodes and edges.\t('+old_target+')')
node_freq, edge_freq = frequencies(corpus_path, target)
out_buffer += '[A] Nodes: {}\tEdges: {}\n'.format(str(len(node_freq)), str(len(edge_freq)))
#builds graph from these dictionaries, also applies multiple filters
print('[a]', 'Building graph.\t('+old_target+')')
G = build_graph(node_freq, edge_freq)
out_buffer += '[A] Nodes: {}\tEdges: {}\n'.format(str(len(G.nodes)), str(len(G.edges)))
#finds root hubs (senses) within the graph + more filters for these
print('[a]', 'Collecting root hubs.\t('+old_target+')')
......@@ -399,15 +403,13 @@ if __name__ == '__main__':
# topics.txt is a list of target words
topics = dict()
processed_topics = [f.replace('.absinth', '') for f in os.listdir(config.output)]
with open(data_path+'topics.txt', 'r') as topics_file:
for line in topics_file.readlines()[1:]:
l = line.split('\t')
if l[1].strip() not in processed_topics:
topics[l[0]] = l[1]
topics[l[0]] = l[1]
# multiprocessing
with Pool(4) as pool:
......@@ -415,4 +417,4 @@ if __name__ == '__main__':
pool.starmap(WSI, [(key, value, results) for key,value in topics.items()])
#for key, value in topics.items():
# WSI(key, value, results)
#WSI(key, value, results)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment