Skip to content
Snippets Groups Projects
Commit 306095ca authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Formatting, cluster counting begins with 1.

parent a507c2dc
No related branches found
No related tags found
No related merge requests found
import sys
print('[A] Loading ' + sys.argv[0] + '.\n')
import os # for reading files
import networkx as nx # for visualisation
from copy import deepcopy
from nltk.corpus import stopwords
import numpy as np # for calculations
import config
import spacy # for nlp
nlp = spacy.load('en') # standard english nlp
nlp = spacy.load('en') # standard english nlp
def frequencies(corpus_path, target):
......@@ -33,10 +29,13 @@ def frequencies(corpus_path, target):
for f in files:
if i % int(len(files)/23) == 0:
file_ratio = i/len(files[:])
max_node_ratio = len(node_freq)/max_nodes
max_edge_ratio = len(edge_freq)/max_edges
ratios = [file_ratio, max_node_ratio, max_edge_ratio]
print(' ~{}%\tNodes: {}\tEdges: {}.'.format(int((max(ratios))*100), len(node_freq), len(edge_freq)))
if len(node_freq) > max_nodes:
......@@ -218,7 +217,7 @@ def disambiguate(mst, hubs, contexts):
#if no sense is found for a target word, we should assume that there only is one sense
if len(H) == 0:
result.append((0, idx))
result.append((1, idx))
else:
......@@ -254,7 +253,7 @@ def disambiguate(mst, hubs, contexts):
else:
result.append((np.argmax(scores), idx))
result.append((np.argmax(scores)+1, idx))
return result
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment