Skip to content
Snippets Groups Projects
Commit 306095ca authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Formatting, cluster counting begins with 1.

parent a507c2dc
No related branches found
No related tags found
No related merge requests found
import sys import sys
print('[A] Loading ' + sys.argv[0] + '.\n') print('[A] Loading ' + sys.argv[0] + '.\n')
import os # for reading files import os # for reading files
import networkx as nx # for visualisation import networkx as nx # for visualisation
from copy import deepcopy from copy import deepcopy
from nltk.corpus import stopwords from nltk.corpus import stopwords
import numpy as np # for calculations import numpy as np # for calculations
import config import config
import spacy # for nlp import spacy # for nlp
nlp = spacy.load('en') # standard english nlp
nlp = spacy.load('en') # standard english nlp
def frequencies(corpus_path, target): def frequencies(corpus_path, target):
...@@ -33,10 +29,13 @@ def frequencies(corpus_path, target): ...@@ -33,10 +29,13 @@ def frequencies(corpus_path, target):
for f in files: for f in files:
if i % int(len(files)/23) == 0: if i % int(len(files)/23) == 0:
file_ratio = i/len(files[:]) file_ratio = i/len(files[:])
max_node_ratio = len(node_freq)/max_nodes max_node_ratio = len(node_freq)/max_nodes
max_edge_ratio = len(edge_freq)/max_edges max_edge_ratio = len(edge_freq)/max_edges
ratios = [file_ratio, max_node_ratio, max_edge_ratio] ratios = [file_ratio, max_node_ratio, max_edge_ratio]
print(' ~{}%\tNodes: {}\tEdges: {}.'.format(int((max(ratios))*100), len(node_freq), len(edge_freq))) print(' ~{}%\tNodes: {}\tEdges: {}.'.format(int((max(ratios))*100), len(node_freq), len(edge_freq)))
if len(node_freq) > max_nodes: if len(node_freq) > max_nodes:
...@@ -218,7 +217,7 @@ def disambiguate(mst, hubs, contexts): ...@@ -218,7 +217,7 @@ def disambiguate(mst, hubs, contexts):
#if no sense is found for a target word, we should assume that there only is one sense #if no sense is found for a target word, we should assume that there only is one sense
if len(H) == 0: if len(H) == 0:
result.append((0, idx)) result.append((1, idx))
else: else:
...@@ -254,7 +253,7 @@ def disambiguate(mst, hubs, contexts): ...@@ -254,7 +253,7 @@ def disambiguate(mst, hubs, contexts):
else: else:
result.append((np.argmax(scores), idx)) result.append((np.argmax(scores)+1, idx))
return result return result
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment