From 306095ca75bcb603fd32091c676e1f16d58800e9 Mon Sep 17 00:00:00 2001 From: Victor Zimmermann <zimmermann@cl.uni-heidelberg.de> Date: Thu, 8 Mar 2018 19:20:59 +0100 Subject: [PATCH] Formatting, cluster counting begins with 1. --- src/absinth.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/absinth.py b/src/absinth.py index c9cef83..75734d7 100644 --- a/src/absinth.py +++ b/src/absinth.py @@ -1,18 +1,14 @@ import sys - print('[A] Loading ' + sys.argv[0] + '.\n') - import os # for reading files import networkx as nx # for visualisation from copy import deepcopy from nltk.corpus import stopwords import numpy as np # for calculations - import config - import spacy # for nlp -nlp = spacy.load('en') # standard english nlp +nlp = spacy.load('en') # standard english nlp def frequencies(corpus_path, target): @@ -33,10 +29,13 @@ def frequencies(corpus_path, target): for f in files: if i % int(len(files)/23) == 0: + file_ratio = i/len(files[:]) max_node_ratio = len(node_freq)/max_nodes max_edge_ratio = len(edge_freq)/max_edges + ratios = [file_ratio, max_node_ratio, max_edge_ratio] + print(' ~{}%\tNodes: {}\tEdges: {}.'.format(int((max(ratios))*100), len(node_freq), len(edge_freq))) if len(node_freq) > max_nodes: @@ -218,7 +217,7 @@ def disambiguate(mst, hubs, contexts): #if no sense is found for a target word, we should assume that there only is one sense if len(H) == 0: - result.append((0, idx)) + result.append((1, idx)) else: @@ -254,7 +253,7 @@ def disambiguate(mst, hubs, contexts): else: - result.append((np.argmax(scores), idx)) + result.append((np.argmax(scores)+1, idx)) return result -- GitLab