From 7ff0e703482f99e1ac40faf464377c5c33585bd1 Mon Sep 17 00:00:00 2001
From: Victor Zimmermann <zimmermann@cl.uni-heidelberg.de>
Date: Wed, 7 Mar 2018 14:34:03 +0100
Subject: [PATCH] Replace try-except blocks with if statements

---
 code/absinth_nx.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/code/absinth_nx.py b/code/absinth_nx.py
index 14726b1..ce072f4 100644
--- a/code/absinth_nx.py
+++ b/code/absinth_nx.py
@@ -5,11 +5,12 @@ import spacy # for nlp
 import networkx as nx # for visualisation
 import matplotlib.pyplot as plt # for visualisation
 from copy import deepcopy
+from nltk.corpus import stopwords
 import numpy as np # for calculations
 nlp = spacy.load('en') # standard english nlp
 
 
-def frequencies(corpus_path, target, stop_words=['utc', 'new', 'other'], allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP'], min_context_size = 4, max_nodes=100000, max_edges=10000000):
+def frequencies(corpus_path, target, stop_words=[], allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP','VBZ','VBG'], min_context_size = 4, max_nodes=100000, max_edges=10000000):
     
     node_freq = dict()
     edge_freq = dict()
@@ -111,7 +112,7 @@ def build_graph(node_freq, edge_freq, min_node_freq=10, min_edge_freq=5, max_wei
     return G
 
 
-def root_hubs(graph, edge_freq, min_neighbors=4, theshold=0.8):
+def root_hubs(graph, edge_freq, min_neighbors=5, theshold=0.8):
     
     G = deepcopy(graph)
     V = sorted(G.nodes, key=lambda key: G.degree[key], reverse=True) # -1 to sort descending (...3 -> 2 -> 1...)
@@ -178,16 +179,19 @@ def disambiguate(mst, hubs, contexts):
         
         for h in H:
             
-            try:
+            if nx.has_path(T,v,h):
+                
                 path = nx.shortest_path(T,v,h,'weight')
                 total_weight = 0
             
                 for i in range(1, len(path)):
+                    
                     total_weight += T[path[i-1]][path[i]]['weight']
             
                 scores.append(1/(1+total_weight))
                 
-            except:
+            else:
+                
                 scores.append(0)
         
         T.nodes[v]['s'] = np.array([s if s == max(scores) else 0 for s in scores])
@@ -199,14 +203,12 @@ def disambiguate(mst, hubs, contexts):
         
         idx = contexts.index(c) + 1
         
-        try:
-            if max(vector) == 0:
-                pass
-            else:    
-                cluster = np.argmax(vector)
-                result.append((cluster, idx))
-        except:
+        if len(vector) == 0: #if no senses are found -> all in one
             result.append((0, idx))
+        elif max(vector) == 0: #if no sense matches -> singletons
+            pass
+        else: 
+            result.append((np.argmax(vector), idx))
         
     return result
 
@@ -224,6 +226,8 @@ if __name__ == '__main__':
     corpus_path = '/proj/absinth/wikipedia.txt.dump.20140615-en.SZTAKI/'
     results_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/results/'
     
+    stop = set(stopwords.words('english') + ['utc', 'new', 'other'])
+    
     results = dict()
     
     with open(data_path+'results.txt', 'r') as results_file:
@@ -260,14 +264,16 @@ if __name__ == '__main__':
         f.write('subTopicID\tresultID\n')
         
         print('[A] Counting Tokens...')
-        node_freq, edge_freq = frequencies(corpus_path, target)
+        node_freq, edge_freq = frequencies(corpus_path, target, stop)
         
         print('\n[A] Building Graph.\n')
         G = build_graph(node_freq, edge_freq)
         
         print('[A] Collecting Root Hubs...')
         H = root_hubs(G, edge_freq)
-        print('Root Hubs:', H, '\n')
+        for h in H:
+            mfn = sorted(G.adj[h], key=lambda key: edge_freq[h,key] if h < key else edge_freq[key, h], reverse=True)[:6]
+            print('{}: {}\n'.format(h, mfn))
         
         print('[A] Building Minimum Spanning Tree.\n')
         T = components(G, H, target)
-- 
GitLab