diff --git a/src/absinth.py b/src/absinth.py
index 12484a8fd3a7aa7fdbaeed69887ca9543c00e72e..144cb203c83fe9312b104c16cf6a57c867cf7b1c 100644
--- a/src/absinth.py
+++ b/src/absinth.py
@@ -12,6 +12,7 @@ from multiprocessing import Pool
 nlp = spacy.load('en') # standard english nlp
 
 
+#counts occurences of nodes and cooccurrences
 def frequencies(corpus_path, target):
     
     stop_words = set(stopwords.words('english') + config.stop_words)
@@ -20,16 +21,16 @@ def frequencies(corpus_path, target):
     max_nodes = config.max_nodes
     max_edges = config.max_edges
     
-    node_freq = dict()
-    edge_freq = dict()
+    node_freq = dict() #counts (potential) nodes
+    edge_freq = dict() #counts (potential) edges
     
-    files = [corpus_path + f for f in os.listdir(corpus_path)]
+    files = [corpus_path + f for f in os.listdir(corpus_path)] #file names of corpus files
     s_target = target.replace('_', ' ') #target word with spaces
     
-    i = 0
+    i = 0 #for update print statements
     for f in files:
         
-        if i % int(len(files)/10) == 0:
+        if i % int(len(files)/10) == 0: #prints update after every 10th of the corpus is parsed
             
             file_ratio = i/len(files[:])
             max_node_ratio = len(node_freq)/max_nodes
@@ -37,48 +38,56 @@ def frequencies(corpus_path, target):
             
             ratios = [file_ratio, max_node_ratio, max_edge_ratio]
             
+            #uses the ratio closest to 100%.
             percentage = int((max(ratios))*100)
             
             print('[a] ~{:02d}%\tNodes: {}\tEdges: {}.'.format(percentage, len(node_freq), len(edge_freq)), target)
         
+        #checks maximum node values
         if len(node_freq) > max_nodes:
             return node_freq, edge_freq
         
+        #checks maximum edge values
         if len(edge_freq) > max_edges:
             return node_freq, edge_freq
         
-        with open(f, 'r') as lines:
+        with open(f, 'r') as lines: #parses single file
             
             try:
                 
-                for line in lines:
+                for line in lines: #parses single paragraph
                     
                     line = line.lower()
                     
-                    if s_target in line:
+                    if s_target in line: #greedy pre selection, not perfect
                         
-                        tokens = set()
-                        doc = nlp(line.replace(s_target, target))
+                        tokens = set() #set of node candidates
+                        doc = nlp(line.replace(s_target, target)) #nlp processing
                         
-                        if target in [t.text for t in doc]:
+                        if target in [t.text for t in doc]: #better selection
                             
                             for tok in doc:
                                 
-                                text = tok.text
-                                tag = tok.tag_
+                                text = tok.text #string value
+                                tag = tok.tag_ #pos tag
                                 
+                                #doesn't add target word to nodes
                                 if text == target:
                                     pass
                                 
+                                #doesn't add stop words to nodes
                                 elif text in stop_words:
                                     pass
                                 
+                                #only adds tokens with allowed tags to nodes
                                 elif tag in allowed_tags:
                                     tokens.add(tok.text)
                                     
+                            #if there are enough (good) tokens in paragraph
                             if len(tokens) >= min_context_size:
                                 for token in tokens:
                                     
+                                    #updates counts for nodes
                                     if token in node_freq:
                                         node_freq[token] += 1
                                     else:
@@ -86,11 +95,13 @@ def frequencies(corpus_path, target):
                                 
                                 for edge in {(x,y) for x in tokens for y in tokens if x < y}:
                                     
+                                    #updates counts for edges
                                     if edge in edge_freq:
                                         edge_freq[edge] += 1
                                     else:
                                         edge_freq[edge] = 1
             
+            #if a file is corrupted (can't always be catched with if-else)
             except UnicodeDecodeError:
                 
                 pass
@@ -98,10 +109,13 @@ def frequencies(corpus_path, target):
         
         i += 1
     
+    #update print
     print('[a] 100%\tNodes: {}\tEdges: {}.'.format(len(node_freq), len(edge_freq)), target)
+    
     return node_freq, edge_freq
 
 
+#build graph from frequency dictionaries
 def build_graph(node_freq, edge_freq):
     
     min_node_freq = config.min_node_freq
@@ -110,11 +124,13 @@ def build_graph(node_freq, edge_freq):
     
     G = nx.Graph()
     
+    #node : node frequency
     for key, value in node_freq.items():
         
         if value >= min_node_freq:
             G.add_node(key)
             
+    #edge : edge frequency
     for key, value in edge_freq.items():
         
         if value < min_edge_freq:
@@ -130,33 +146,37 @@ def build_graph(node_freq, edge_freq):
     return G
 
 
+#Identifies senses by choosing nodes with high degrees
 def root_hubs(graph, edge_freq, min_neighbors=4, theshold=0.8):
     
     min_neighbors = config.min_neighbors
     threshold = config.threshold
     
     G = deepcopy(graph)
-    V = sorted(G.nodes, key=lambda key: G.degree[key], reverse=True) # -1 to sort descending (...3 -> 2 -> 1...)
-    H = list()
+    V = sorted(G.nodes, key=lambda key: G.degree[key], reverse=True) # sorts according to degree
+    H = list() #output list
     
     while V:
         
-        v = V[0]
+        v = V[0] #best hub candidate
         
         if G.degree[v] >= min_neighbors:
         
-            mfn = sorted(G.adj[v], key=lambda key: edge_freq[v,key] if v < key else edge_freq[key, v], reverse=True)[:min_neighbors] #mfn: most frequent neighbors
+            mfn = sorted(G.adj[v], key=lambda key: edge_freq[v,key] if v < key else edge_freq[key, v], reverse=True)[:min_neighbors] #most frequent neighbors
             
-            if np.mean([G.edges[v,n]['weight'] for n in mfn]) < theshold:
+            if np.mean([G.edges[v,n]['weight'] for n in mfn]) < theshold: #if the median weight of the most frequent neighbors is under threshold
                 
                 H.append(v)
             
+                #removes neighbors of new hub as hub candidates
                 for nbr in deepcopy(G).adj[v]:
                 
                     G.remove_node(nbr)
                 
+            #removes hub candidate
             G.remove_node(v)
             
+            #reorderd potential hubs after deletions
             V = sorted(G.nodes, key=lambda key: G.degree[key], reverse=True)
         
         else:
@@ -170,7 +190,7 @@ def root_hubs(graph, edge_freq, min_neighbors=4, theshold=0.8):
 def components(graph, hubs, target):
     
     G = deepcopy(graph)
-    H = hubs
+    H = hubs #root hubs
     t = target
     
     #G.add_node(t)
@@ -179,6 +199,7 @@ def components(graph, hubs, target):
         
     T = nx.minimum_spanning_tree(G)
     
+    #removes singletons
     for node in deepcopy(T).nodes:
         if len(T.adj[node]) == 0:
             T.remove_node(node)
@@ -186,17 +207,22 @@ def components(graph, hubs, target):
     return T
 
 
+#Calculates score for a given path in a minimum spanning tree
 def score(graph, from_node, to_node):
     
+    #if correct tree
     if nx.has_path(graph, from_node, to_node):
                 
+        # calculates shortest path (approximation for path with lowest total weight)
         path = nx.shortest_path(graph, from_node, to_node, 'weight')
         total_weight = 0
     
+        #adds weights of every sub-path
         for i in range(1, len(path)):
             sub_from, sub_to = path[i-1], path[i]
             total_weight += graph[sub_from][sub_to]['weight']
     
+        #the further the path, the lower the score
         return 1/(1+total_weight)
         
     else:
@@ -204,47 +230,52 @@ def score(graph, from_node, to_node):
         return 0
 
 
+# Basically Word Sense Disambiguation, matches context to sense
 def disambiguate(mst, hubs, contexts, target=""):
     
     target = target.replace('_', ' ')
-    T = mst
-    H = hubs
-    C = [c.lower().strip().replace(target, '') for c in contexts]
+    T = mst #minimum spanning tree
+    H = hubs #root hubs
+    C = [c.lower().strip().replace(target, '') for c in contexts] #cleaned up contexts
     
-    score_dict = dict()
-    result = list()
+    score_dict = dict() #memoisation for scores
+    result = list() #output of function
 
     for c in C:
         
-        idx = C.index(c) + 1
+        idx = C.index(c) + 1 #index based on position in list
         
         #if no sense is found for a target word, we should assume that there only is one sense
-        if len(H) == 0:
+        if len(H) == 0: 
             
-            result.append((1, idx))
+            result.append((1, idx, 0))
         
         else:
             
-            doc = nlp(c)
-            texts = [tok.text for tok in doc]
+            doc = nlp(c) #parsed context
+            texts = [tok.text for tok in doc] #tokens
             
             scores = np.zeros(len(H)) #initialise with zeros for every sense
             
             for text in texts:
                 
-                if text in T.nodes:
+                if text in T.nodes: #if word wasn't filtered out
                     
-                    new_scores = list()
+                    new_scores = list() #scores to be added to total scores
                     
-                    for h in H:
-                        if (text, h) in score_dict:
+                    for h in H: #for each hub
+                        
+                        if (text, h) in score_dict: #memoisation
+                            
                             new_scores.append(score_dict[(text,h)])
+                        
                         else:
+                            
                             new_score = score(T, text, h)
                             new_scores.append(new_score)
-                            score_dict[(text,h)] = new_scores
+                            score_dict[(text,h)] = new_score #memoisation
                         
-                    scores = np.add(scores, new_scores)
+                    scores = scores + np.array(new_scores)
                 
                 else:
                 
@@ -257,22 +288,32 @@ def disambiguate(mst, hubs, contexts, target=""):
             
             else:
                 
-                result.append((np.argmax(scores)+1, idx))
+                #applies sense with the highest score to context
+                max_score = np.max(scores)
+                argmax_score = np.argmax(scores)
+                
+                #clusters begin at 1
+                result.append((argmax_score + 1, idx))
 
     return result
 
 
+# our main function, here the main stepps for word sense induction are called
 def WSI(topic_id, topic_name, results):
     
+    #buffer for useful information
     out_buffer = '\n'
     
+    #paths for input (corpus) and output(directory)
     corpus_path = config.corpus
     output_path = config.output
             
+    #removes trailing new_lines
     old_target = topic_name.strip() #original target
     out_buffer += ("[A] Word sense induction for '"+old_target+"':\n")
     
-    if old_target[:4] == 'the_' and old_target.count('_') >= 2: #hard coded 'the'-protection
+    #in topics longer than two words, the leading 'the' can generally be removed without changing the sense
+    if old_target[:4] == 'the_' and old_target.count('_') >= 2:
         
         target = old_target[4:]
         
@@ -280,37 +321,46 @@ def WSI(topic_id, topic_name, results):
         
         target = old_target
     
+    #writes headline for output files
     f = open(output_path+target+'.absinth', 'w')
     f.write('subTopicID\tresultID\n')
     
+    #counts occurences of single words, as well as cooccurrences, saves it in dictionary
     print('[a]', 'Counting nodes and edges.', old_target)
     node_freq, edge_freq = frequencies(corpus_path, target)
-    out_buffer += '[A] Nodes: {}\tEdges:{}\n'.format(str(len(node_freq)), str(len(edge_freq)))
+    out_buffer += '[A] Nodes: {}\tEdges: {}\n'.format(str(len(node_freq)), str(len(edge_freq)))
     
+    #builds graph from these dictionaries, also applies multiple filters
     print('[a]', 'Building graph.', old_target)
     G = build_graph(node_freq, edge_freq)
     
+    #finds root hubs (senses) within the graph + more filters for these
     print('[a]', 'Collecting root hubs.', old_target)
     H = root_hubs(G, edge_freq)
     out_buffer += '[A] Root hubs:\n'
     
-    i = 1
+    #adds sense inventory to buffer with some common neighbors for context
+    i = 1 #sense index
     for h in H:
         
         mfn = sorted(G.adj[h], key=lambda x: edge_freq[h,x] if h < x else edge_freq[x, h], reverse=True)[:6]
         out_buffer += (' {}. {}: {}\n'.format(i, h, mfn))
         i += 1
     
+    #performs minimum_spanning_tree algorithm on graph
     print('[a]', 'Building minimum spanning tree.', old_target)
     T = components(G, H, target)
 
+    #matches senses to clusters
     print('[a]', 'Disambiguating results.', old_target)
     D = disambiguate(T, H, results[topic_id], target)
     out_buffer += ('[A] Mapping: '+ str(D) + '\n')
     
+    #prints buffer
     print('[a]', 'Writing to file.', old_target)
     print(out_buffer)
     
+    #writes clustering to file
     for d in D:
         
         f.write(topic_id+'.'+str(d[0])+'\t'+topic_id+'.'+str(d[1])+'\n')
@@ -320,8 +370,13 @@ def WSI(topic_id, topic_name, results):
 
 if __name__ == '__main__':
     
-    data_path = config.dataset
+    # If absinth.py is run in test environment
+    if '-t' in sys.argv:
+        data_path = config.test
+    else:
+        data_path = config.dataset
     
+    # results.txt includes the queries for a given target word
     results = dict()
     
     with open(data_path+'results.txt', 'r') as results_file:
@@ -329,14 +384,15 @@ if __name__ == '__main__':
         for line in results_file.readlines()[1:]:
             
             l = line.split('\t')
-            id1, _ = l[0].split('.')
+            id1, _ = l[0].split('.') #the second part of the id is ignored, as it is identical to the list index
             
             if id1 not in results:
                 results[id1]=list()
                 
-            results[id1].append(" ".join(l[2:]))
-            
+            results[id1].append(" ".join(l[2:])) # here I join title and snippet, the URL is ignored
             
+    
+    # topics.txt is a list of target words
     topics = dict()
     
     with open(data_path+'topics.txt', 'r') as topics_file:
@@ -346,7 +402,10 @@ if __name__ == '__main__':
             l = line.split('\t')
             topics[l[0]] = l[1]
     
+    # multiprocessing
     with Pool(4) as pool:
+        # calls WSI() for for topics at a time
         pool.starmap(WSI, [(key, value, results) for key,value in topics.items()])
+        
     #for key, value in topics.items():
     #    WSI(key, value, results)