Replace try-except blocks with if statements

7ff0e703 · Victor Zimmermann · f80f2d90 · 7ff0e703
Commit 7ff0e703 authored 7 years ago by Victor Zimmermann
--- a/code/absinth_nx.py
+++ b/code/absinth_nx.py
@@ -5,11 +5,12 @@ import spacy # for nlp
 import networkx as nx # for visualisation
 import matplotlib.pyplot as plt # for visualisation
 from copy import deepcopy
+from nltk.corpus import stopwords
 import numpy as np # for calculations
 nlp = spacy.load('en') # standard english nlp
-def frequencies(corpus_path, target, stop_words=['utc', 'new', 'other'], allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP'], min_context_size = 4, max_nodes=100000, max_edges=10000000):
+def frequencies(corpus_path, target, stop_words=[], allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP','VBZ','VBG'], min_context_size = 4, max_nodes=100000, max_edges=10000000):
    node_freq = dict()
    edge_freq = dict()
@@ -111,7 +112,7 @@ def build_graph(node_freq, edge_freq, min_node_freq=10, min_edge_freq=5, max_wei
    return G
-def root_hubs(graph, edge_freq, min_neighbors=4, theshold=0.8):
+def root_hubs(graph, edge_freq, min_neighbors=5, theshold=0.8):
    G = deepcopy(graph)
    V = sorted(G.nodes, key=lambda key: G.degree[key], reverse=True) # -1 to sort descending (...3 -> 2 -> 1...)
@@ -178,16 +179,19 @@ def disambiguate(mst, hubs, contexts):
        for h in H:
-            try:
+            if nx.has_path(T,v,h):
                path = nx.shortest_path(T,v,h,'weight')
                total_weight = 0
                for i in range(1, len(path)):
                    total_weight += T[path[i-1]][path[i]]['weight']
                scores.append(1/(1+total_weight))
-            except:
+            else:
                scores.append(0)
        T.nodes[v]['s'] = np.array([s if s == max(scores) else 0 for s in scores])
@@ -199,14 +203,12 @@ def disambiguate(mst, hubs, contexts):
        idx = contexts.index(c) + 1
-        try:
+        if len(vector) == 0: #if no senses are found -> all in one
-            if max(vector) == 0:
-                pass
-            else:    
-                cluster = np.argmax(vector)
-                result.append((cluster, idx))
-        except:
            result.append((0, idx))
+        elif max(vector) == 0: #if no sense matches -> singletons
+            pass
+        else: 
+            result.append((np.argmax(vector), idx))
    return result
@@ -224,6 +226,8 @@ if __name__ == '__main__':
    corpus_path = '/proj/absinth/wikipedia.txt.dump.20140615-en.SZTAKI/'
    results_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/results/'
+    stop = set(stopwords.words('english') + ['utc', 'new', 'other'])
    results = dict()
    with open(data_path+'results.txt', 'r') as results_file:
@@ -260,14 +264,16 @@ if __name__ == '__main__':
        f.write('subTopicID\tresultID\n')
        print('[A] Counting Tokens...')
-        node_freq, edge_freq = frequencies(corpus_path, target)
+        node_freq, edge_freq = frequencies(corpus_path, target, stop)
        print('\n[A] Building Graph.\n')
        G = build_graph(node_freq, edge_freq)
        print('[A] Collecting Root Hubs...')
        H = root_hubs(G, edge_freq)
-        print('Root Hubs:', H, '\n')
+        for h in H:
+            mfn = sorted(G.adj[h], key=lambda key: edge_freq[h,key] if h < key else edge_freq[key, h], reverse=True)[:6]
+            print('{}: {}\n'.format(h, mfn))
        print('[A] Building Minimum Spanning Tree.\n')
        T = components(G, H, target)