Skip to content
Snippets Groups Projects
Commit ad919229 authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

All in one if no root hubs, singletons if no cluster matches.

parent 14a0691a
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,7 @@ import numpy as np # for calculations
nlp = spacy.load('en') # standard english nlp
def frequencies(corpus_path, target, stop_words=['utc', 'new', 'other'], allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP'], min_context_size = 4, max_nodes=10000, max_edges=1000000):
def frequencies(corpus_path, target, stop_words=['utc', 'new', 'other'], allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP'], min_context_size = 4, max_nodes=100000, max_edges=10000000):
node_freq = dict()
edge_freq = dict()
......@@ -201,14 +201,12 @@ def disambiguate(mst, hubs, contexts):
try:
if max(vector) == 0:
result.append((backup_cluster, idx))
backup_cluster += 1
pass
else:
cluster = np.argmax(vector)
result.append((cluster, idx))
except:
result.append((backup_cluster, idx))
backup_cluster += 1
result.append((0, idx))
return result
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment