Skip to content
Snippets Groups Projects
Commit 306e1df9 authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Add Kleinscheiß.

parent b8177d53
No related branches found
No related tags found
No related merge requests found
......@@ -112,7 +112,7 @@ def build_graph(node_freq, edge_freq, min_node_freq=10, min_edge_freq=5, max_wei
return G
def root_hubs(graph, edge_freq, min_neighbors=5, theshold=0.8):
def root_hubs(graph, edge_freq, min_neighbors=4, theshold=0.8):
G = deepcopy(graph)
V = sorted(G.nodes, key=lambda key: G.degree[key], reverse=True) # -1 to sort descending (...3 -> 2 -> 1...)
......@@ -170,6 +170,7 @@ def disambiguate(mst, hubs, contexts):
T = mst
H = hubs
C = [c.lower().strip() for c in contexts]
backup_cluster = len(H)
result = []
......@@ -196,13 +197,12 @@ def disambiguate(mst, hubs, contexts):
T.nodes[v]['s'] = np.array([s if s == max(scores) else 0 for s in scores])
for c in contexts:
for c in C:
c = c.lower()
toks = [t.text for t in nlp(c)]
vector = np.sum([T.nodes[t]['s'] if t in T.nodes else np.zeros(len(H)) for t in toks], axis=0)
idx = contexts.index(c) + 1
idx = C.index(c) + 1
if len(vector) == 0: #if no senses are found -> all in one
result.append((0, idx))
......@@ -225,7 +225,7 @@ if __name__ == '__main__':
data_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/WSI-Evaluator/datasets/MORESQUE/'
#corpus_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/test'
corpus_path = '/proj/absinth/wikipedia.txt.dump.20140615-en.SZTAKI/'
results_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/results/'
results_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/clustering/'
stop = set(stopwords.words('english') + ['utc', 'new', 'other'])
......@@ -260,7 +260,8 @@ if __name__ == '__main__':
target = value.strip()
print("[A] Processing '"+target+"'.\n")
if target[:4] == 'the_' and target.count('_') >= 2: #hard coded 'the'-protection
target = target[4:]
f = open(results_path+target+'.absinth', 'w')
f.write('subTopicID\tresultID\n')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment