From 306095ca75bcb603fd32091c676e1f16d58800e9 Mon Sep 17 00:00:00 2001
From: Victor Zimmermann <zimmermann@cl.uni-heidelberg.de>
Date: Thu, 8 Mar 2018 19:20:59 +0100
Subject: [PATCH] Formatting, cluster counting begins with 1.

---
 src/absinth.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/absinth.py b/src/absinth.py
index c9cef83..75734d7 100644
--- a/src/absinth.py
+++ b/src/absinth.py
@@ -1,18 +1,14 @@
 import sys
-
 print('[A] Loading ' + sys.argv[0] + '.\n')
-
 import os # for reading files
 import networkx as nx # for visualisation
 from copy import deepcopy
 from nltk.corpus import stopwords
 import numpy as np # for calculations
-
 import config
-
 import spacy # for nlp
-nlp = spacy.load('en') # standard english nlp
 
+nlp = spacy.load('en') # standard english nlp
 
 
 def frequencies(corpus_path, target):
@@ -33,10 +29,13 @@ def frequencies(corpus_path, target):
     for f in files:
         
         if i % int(len(files)/23) == 0:
+            
             file_ratio = i/len(files[:])
             max_node_ratio = len(node_freq)/max_nodes
             max_edge_ratio = len(edge_freq)/max_edges
+            
             ratios = [file_ratio, max_node_ratio, max_edge_ratio]
+            
             print(' ~{}%\tNodes: {}\tEdges: {}.'.format(int((max(ratios))*100), len(node_freq), len(edge_freq)))
         
         if len(node_freq) > max_nodes:
@@ -218,7 +217,7 @@ def disambiguate(mst, hubs, contexts):
         #if no sense is found for a target word, we should assume that there only is one sense
         if len(H) == 0:
             
-            result.append((0, idx))
+            result.append((1, idx))
         
         else:
             
@@ -254,7 +253,7 @@ def disambiguate(mst, hubs, contexts):
             
             else:
                 
-                result.append((np.argmax(scores), idx))
+                result.append((np.argmax(scores)+1, idx))
 
     return result
 
-- 
GitLab