diff --git a/src/absinth.py b/src/absinth.py index c9cef83300d3d10011b645351333baaecc539b42..75734d73c0d13d4e13f4be3a377ea94ecac7b281 100644 --- a/src/absinth.py +++ b/src/absinth.py @@ -1,18 +1,14 @@ import sys - print('[A] Loading ' + sys.argv[0] + '.\n') - import os # for reading files import networkx as nx # for visualisation from copy import deepcopy from nltk.corpus import stopwords import numpy as np # for calculations - import config - import spacy # for nlp -nlp = spacy.load('en') # standard english nlp +nlp = spacy.load('en') # standard english nlp def frequencies(corpus_path, target): @@ -33,10 +29,13 @@ def frequencies(corpus_path, target): for f in files: if i % int(len(files)/23) == 0: + file_ratio = i/len(files[:]) max_node_ratio = len(node_freq)/max_nodes max_edge_ratio = len(edge_freq)/max_edges + ratios = [file_ratio, max_node_ratio, max_edge_ratio] + print(' ~{}%\tNodes: {}\tEdges: {}.'.format(int((max(ratios))*100), len(node_freq), len(edge_freq))) if len(node_freq) > max_nodes: @@ -218,7 +217,7 @@ def disambiguate(mst, hubs, contexts): #if no sense is found for a target word, we should assume that there only is one sense if len(H) == 0: - result.append((0, idx)) + result.append((1, idx)) else: @@ -254,7 +253,7 @@ def disambiguate(mst, hubs, contexts): else: - result.append((np.argmax(scores), idx)) + result.append((np.argmax(scores)+1, idx)) return result