diff --git a/src/abstinent.py b/src/abstinent.py
new file mode 100644
index 0000000000000000000000000000000000000000..724eeec19e08e377650c61307f46a7ebb8f417ff
--- /dev/null
+++ b/src/abstinent.py
@@ -0,0 +1,679 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Artificially Basic System Trying to Induce Numerous ENTities
+
+This module performs word sense induction for a given word on a corpus and
+matches a list of contexts to each. The method to achieve this is just a 
+baseline, working with a random selection of senses and a simple Lesk
+implementation.
+
+Example:
+    The function can be called with the following command:
+    
+        $ python3 abstinent.py
+        
+    The function can be called with a list of modifiers.
+    
+Modifiers:
+    '-t': Runs abstinent.py on the trial path given in the config.py instead of
+        the data_path.
+    '-p n': Runs abstinent.py with n concurrent processes (standard: 1). 
+
+.. _Association Based Semantic Induction Tools from Heidelberg
+    https://gitlab.cl.uni-heidelberg.de/zimmermann/absinth
+
+"""
+
+import sys
+print('[a] Loading ' + sys.argv[0] + '.\n')
+import config
+import networkx as nx # for visualisation
+import numpy as np
+import os # for reading files
+import pprint
+import random
+import re
+import scipy.special
+import spacy # for nlp
+import time
+
+from copy import deepcopy
+from multiprocessing import Pool
+from scipy import stats 
+
+random.seed(325)
+nlp = spacy.load('en') # standard english nlp
+
+
+def read_dataset(data_path: str) -> (dict, dict):
+    """Collects topics.txt and results.txt.
+    
+    Iterates over topics.txt and results.txt in the data path and converts them
+    to dictionaries with the ID as key and the target word / title + snippet as
+    values.
+    
+    Args:
+        data_path: File path to directory containing topics.txt and results.txt.
+        
+    Returns:
+        One dictionary for each file. 
+    """
+    
+    results = dict()
+    
+    with open(data_path+'results.txt', 'r') as results_file:
+        
+        for line in results_file.readlines()[1:]:
+            
+            l = line.split('\t')
+            id1, _ = l[0].split('.') #the second part of the id is ignored, as it is identical to the list index
+            
+            if id1 not in results:
+                results[id1]=list()
+                
+            results[id1].append(" ".join(l[2:]).strip()) # here I join title and snippet, the URL is ignored
+            
+    
+    # topics.txt is a list of target words
+    topics = dict()
+    
+    with open(data_path+'topics.txt', 'r') as topics_file:
+        
+        for line in topics_file.readlines()[1:]:
+            
+            l = line.split('\t')
+            topics[l[0]] = l[1].strip()
+    
+    return results, topics
+
+
+def frequencies(target_string: str, search_result_list: list) -> (dict, dict):
+    """Counts occurrences of nodes and cooccurrences.
+    
+    Iterates over the corpus (and snippets provided with the task) line by line 
+    and counts every token and tuple of tokens within a line (context). These 
+    tokens is filtered by stop words, pos tags and context length.
+    
+    Args:
+        target_string: contexts are selected if they contain this string. For
+            further processing this string is removed from the contexts.
+        search_result_list: List of titles and snippets provided with the task.
+        
+    Returns:
+        Dictionary of occurrences of every eligible token within every context
+            the target occurs in, dictionary of occurrences of every eligible
+            tuple of tokens within every context the target occurs in.
+    
+    """
+
+    corpus_path = config.corpus
+    max_node_count = config.max_nodes
+    max_edge_count = config.max_edges
+    
+    bracketed_target_string = '('+target_string+')'
+    
+    # Remove unnecessary tokens from snippets.
+    _search_result_list = list()
+    for r in search_result_list:
+        r = r.replace('<b>', '')
+        r = r.replace('</b>', '')
+        r = r.replace(r'\\', '')
+        r = r.strip()
+        _search_result_list.append(r)
+    
+    # Initialise frequencies with counts from results.
+    node_freq_dict, edge_freq_dict = process_file(_search_result_list,
+                                                  target_string,
+                                                  dict(),
+                                                  dict()) 
+    
+    corpus_file_path_list = [corpus_path + f for f in os.listdir(corpus_path)]
+    corpus_size = len(corpus_file_path_list)
+    
+    processed_file_count = 0
+    for corpus_file_path in corpus_file_path_list:
+        
+        node_count = len(node_freq_dict)
+        edge_count = len(edge_freq_dict)
+        
+        # Print update after every 11th of the corpus is parsed.
+        if processed_file_count % int(corpus_size/11) == 0: 
+            
+            file_ratio = processed_file_count / corpus_size
+            max_node_ratio = node_count / max_node_count
+            max_edge_ratio = edge_count / max_edge_count
+            
+            ratios = [file_ratio, max_node_ratio, max_edge_ratio]
+            
+            # Use ratio closest to 100%.
+            highest_ratio = int((max(ratios))*100)
+            
+            print('[a] ~{:02d}%\tNodes: {}\tEdges: {}\t{}.'.format(highest_ratio,
+                                                                   node_count,
+                                                                   edge_count,
+                                                                   bracketed_target_string))
+        
+        if node_count > max_node_count:
+            print('[a] 100%\tNodes: {}\tEdges: {}\t{}.'.format(node_count,
+                                                               edge_count,
+                                                               bracketed_target_string))
+            return node_freq_dict, edge_freq_dict
+        
+        if edge_count > max_edge_count:
+            print('[a] 100%\tNodes: {}\tEdges: {}\t{}.'.format(node_count,
+                                                               edge_count,
+                                                               bracketed_target_string))
+            return node_freq_dict, edge_freq_dict
+        
+        with open(corpus_file_path, 'r') as corpus_file:
+            
+            node_freq_dict, edge_freq_dict = process_file(corpus_file,
+                                                          target_string,
+                                                          node_freq_dict,
+                                                          edge_freq_dict)
+        
+        processed_file_count += 1
+    
+    print('[a] 100%\tNodes: {}\tEdges: {}\t{}.'.format(node_count,
+                                                       edge_count,
+                                                       bracketed_target_string))
+    
+    return node_freq_dict, edge_freq_dict
+
+
+def process_file(context_list: list, target_string: str,
+                 node_freq_dict: dict, edge_freq_dict: dict) -> (dict, dict): 
+    """Updates the counts of nodes and edges for a given document and target.
+    
+    Ammends the input dictionaries with counts from each context withing the
+    list of contexts. Furthermore filters out small contexts and tokens from
+    the stopword list or with wrong pos tags.
+    
+    Args:
+        context_list: List of contexts (lines, paragraphs) that are to be
+            considered for updating the counting dictionaries.
+        target_string: Target string for filtering out every context that does 
+            not contain it.
+        node_freq_dict: Dictionary of occurrences of every eligible token
+            within every context the target occurs in.
+        edge_freq_dict: Dictionary of occurrences of every eligible tuple of
+            tokens within every context the target occurs in.
+    
+    Returns:
+        Updated versions of the input node dict and input edge dict.
+    """
+    
+    spaced_target_string = target_string.replace('_', ' ')
+    
+    stopword_list = config.stop_words
+    allowed_tag_list = config.allowed_tags
+    min_context_size = config.min_context_size
+    max_context_size = config.max_context_size
+        
+    try:
+        
+        for context in context_list:
+            
+            context = context.lower()
+            if spaced_target_string in context: # Pre-select lines greedy.
+                
+                token_set = set()
+                
+                # Allow target to be treated as single entity.
+                context = context.replace(spaced_target_string, target_string)
+                processed_context = nlp(context)
+                
+                if target_string in [token.text for token in processed_context]:
+                    
+                    for token in processed_context:
+                        
+                        # Do not add target word to nodes.
+                        if token.text == target_string:
+                            pass
+                        
+                        # Do not add stop words to nodes.
+                        elif token.is_stop or token.text in stopword_list:
+                            pass
+                        
+                        # Add only tokens with allowed tags to nodes.
+                        elif token.tag_ in allowed_tag_list:
+                            if config.lemma == True:
+                                token_set.add(token.lemma_)
+                            else:
+                                token_set.add(token.text)
+                            
+                    context_size = len(token_set)
+                    
+                    if context_size >= min_context_size and context_size <= max_context_size:
+                        for token in token_set:
+                            
+                            if token in node_freq_dict:
+                                node_freq_dict[token] += 1
+                            else:
+                                node_freq_dict[token] = 1
+                        
+                        #set of possible edges
+                        for edge in {(x,y) for x in token_set for y in token_set if x < y}:
+                            
+                            if edge in edge_freq_dict:
+                                edge_freq_dict[edge] += 1
+                            else:
+                                edge_freq_dict[edge] = 1
+    
+    # If file is corrupted (can't always be catched with if-else), ignore file.
+    except UnicodeDecodeError:
+        
+        pass            
+    
+    return node_freq_dict, edge_freq_dict
+
+
+def build_graph(node_freq_dict: dict, edge_freq_dict: dict) -> nx.Graph:
+    """Builds undirected weighted graph from dictionaries.
+    
+    Creates graph and appends every edge and node in the parameter dictionaries,
+    given they occur frequently enough. For every edge a weight is calculated.
+    
+    Args:
+        node_freq_dict: Dictionary of occurrences of every eligible token
+            within every context the target occurs in.
+        edge_freq_dict: Dictionary of occurrences of every eligible tuple of
+            tokens within every context the target occurs in.
+    
+    Returns:
+        Filtered undirected dice weighted small word cooccurrence graph for a 
+            given target entity.
+    """
+    
+    min_node_freq = config.min_node_freq
+    min_edge_freq = config.min_edge_freq
+    max_weight = config.max_weight
+    
+    cooccurrence_graph = nx.Graph()
+    
+    for node, frequency in node_freq_dict.items():
+        
+        if frequency >= min_node_freq:
+            cooccurrence_graph.add_node(node)
+            
+    for node_tuple, frequency in edge_freq_dict.items():
+        
+        if frequency < min_edge_freq:
+            
+            continue
+        
+        elif node_tuple[0] not in cooccurrence_graph.nodes:
+            
+            continue
+        
+        elif node_tuple[1] not in cooccurrence_graph.nodes:
+            
+            continue
+        
+        else:
+            
+            cooccurrence_frequency = edge_freq_dict[node_tuple]
+            node0_frequency = node_freq_dict[node_tuple[0]]
+            node1_frequency = node_freq_dict[node_tuple[1]]
+            
+            prob_0 = cooccurrence_frequency / node0_frequency
+            prob_1 = cooccurrence_frequency / node1_frequency
+            
+            best_weight = 1 - max(prob_0, prob_1)
+            #dice_weight = 1 - ((prob_0 + prob_1) / 2)
+            
+            if best_weight <= max_weight:
+                
+                cooccurrence_graph.add_edge(*node_tuple, weight=best_weight)
+            
+            else:
+                
+                pass
+    
+    # Remove singletons, deepcopy for iteration while being altered.
+    for node in deepcopy(cooccurrence_graph).nodes:
+        if len(cooccurrence_graph.adj[node]) == 0:
+            cooccurrence_graph.remove_node(node)
+    
+    return cooccurrence_graph
+
+
+def induce(topic_name: str, result_list: list) -> (nx.Graph, list, dict):
+    """
+    Use n random nodes as root hubs.
+    """
+    
+    stat_dict = dict()
+    
+    stat_dict['target'] = topic_name
+    
+    print('[a]', 'Counting nodes and edges.\t('+topic_name+')')
+    node_freq_dict, edge_freq_dict = frequencies(topic_name, result_list)
+    
+    #builds graph from these dictionaries, also applies multiple filters
+    print('[a]', 'Building graph.\t('+topic_name+')')
+    graph = build_graph(node_freq_dict, edge_freq_dict)
+    
+    for string in topic_name.split('_'):
+        if string in graph.nodes:
+            graph.remove_node(string)
+    
+    stat_dict['nodes'] = len(graph.nodes)
+    stat_dict['edges'] = len(graph.edges)
+
+    #finds root hubs (senses) within the graph + more filters for these
+    print('[a]', 'Collecting root hubs.\t('+topic_name+')')
+    
+    sense_count = min(config.sense_count, len(graph.nodes))
+    
+    root_hub_list = sorted([(value,key) for key,value in node_freq_dict.items() if key in graph.nodes],
+                           reverse=True)[:sense_count]
+    root_hub_list = [hub[1] for hub in root_hub_list]
+    
+    #adds sense inventory to buffer with some common neighbors for context
+    stat_dict['hubs'] = dict()
+    
+    for root_hub in root_hub_list:
+    
+        by_frequency = lambda node: edge_freq_dict[root_hub,node] \
+                                        if root_hub < node \
+                                        else edge_freq_dict[node, root_hub]
+                                    
+        most_frequent_neighbor_list = sorted(graph.adj[root_hub],
+                                                key=by_frequency, reverse=True) 
+        
+        stat_dict['hubs'][root_hub] = most_frequent_neighbor_list[:6]
+
+    return graph, root_hub_list, stat_dict
+
+
+def bag_of_senses(graph: nx.Graph, root_hub_list:list) -> dict:
+    """
+    Matches each node to the root hub it is closest to.
+    """
+    
+    root_hub_count = len(root_hub_list)
+    
+    bag = {i:[] for i in range(root_hub_count)}
+    
+    for node in graph.nodes:
+        
+        score = [0] * root_hub_count
+        
+        for i in range(root_hub_count):
+            
+            root = root_hub_list[i]
+            
+            if nx.has_path(graph, node, root):
+                path = nx.shortest_path(graph, node, root, 'weight')
+                score[i] = 1/(1+len(path))
+            
+        bag[np.argmax(score)].append(node)
+        
+    return bag
+    
+
+def disambiguate(bag_of_senses: dict, context_list: list) -> dict:
+    """
+    Lesk.
+    """
+    
+    context_idx = 0
+    
+    mapping_dict = dict()
+    
+    for context in context_list:
+        
+        context_idx += 1
+        score = [0] * len(bag_of_senses)
+        
+        processed_context = nlp(context)
+        
+        text_list = [token.text for token in processed_context]
+        
+        for text in text_list:
+            
+            for sense, words in bag_of_senses.items():
+                    
+                if text in words:
+                        
+                    score[sense] += 1
+        
+        sense = np.argmax(score)
+        
+        if sense in mapping_dict:
+            mapping_dict[sense].append(context_idx)
+        else:
+            mapping_dict[sense] = [context_idx]
+    
+    return mapping_dict
+    
+
+def print_stats(stat_dict: dict) -> None:
+    """Prints various statistics and logs them to file.
+    
+    Args:
+        stat_dict: Dictionary with various statistics.
+    
+    """
+    
+    stat_string = []
+    
+    ts = time.gmtime()
+    
+    key_list= ['target','nodes','edges','L','C','L_rand','C_rand','clusters','a_mean_size','h_mean_size','pipe_gain']
+    
+    stat_string.append('Topic: {}.'.format(stat_dict['target']))
+    stat_string.append('Processed {} at {}.'.format(time.strftime("%Y-%m-%d", ts),time.strftime("%H:%M:%S", ts)))
+    stat_string.append('Nodes: {}\tEdges: {}.'.format(stat_dict['nodes'],stat_dict['edges']))
+    stat_string.append('Characteristic path length: {}.'.format(stat_dict['L']))
+    stat_string.append('Global clustering coefficient: {}.'.format(stat_dict['C']))
+    stat_string.append('Mean cluster length (arithmetic): {}.'.format(stat_dict['a_mean_size']))
+    stat_string.append('Mean cluster length (harmonic): {}.'.format(stat_dict['h_mean_size']))
+    stat_string.append('Number of clusters: {}.'.format(stat_dict['clusters']))
+    stat_string.append('Tuples gained through merging: {}.'.format(stat_dict['pipe_gain']))
+    stat_string.append('Sense inventory:')
+    for hub in stat_dict['hubs'].keys():
+        stat_string.append(' -> {}: {}.'.format(hub, ", ".join(stat_dict['hubs'][hub])))
+    
+    print('\n[A] '+'\n[A] '.join(stat_string)+'\n')
+    
+    with open('../baseline/statistics.txt', 'a') as stat_file:
+        
+        stat_file.write('\n '.join(stat_string)+'\n\n')
+    
+    write_header = not os.path.exists('.statistics.tsv')
+    
+    with open('../baseline/.statistics.tsv', 'a') as stat_file:
+        
+        if write_header:
+            
+            stat_file.write('\t'.join(key_list)+'\n')
+            
+        stat_file.write('\t'.join([str(stat_dict[key]) for key in key_list])+'\n')
+        
+    
+        
+            
+        
+
+def global_clustering_coefficient(graph: nx.Graph) -> float:
+    """Calculates global clustering coefficient from graph.
+    
+    Iterates over every node and calculates the global coefficient as a mean
+    of every local clustering coefficient. 
+    
+    Args:
+        graph: Undirected graph.
+        
+    Returns:
+        Global coefficient.
+    """
+    
+    local_coefficient_list = list()
+    
+    for node in graph.nodes:
+        
+        neighbor_list = graph.adj[node]
+        
+        neighbor_edge_list = [(x,y) for x in neighbor_list 
+                              for y in neighbor_list if x<y]
+        
+        if len(neighbor_edge_list) == 0:
+            
+            local_coefficient_list.append(0)
+        
+        else:
+            
+            edge_count = 0
+            for x,y in neighbor_edge_list:
+                if graph.has_edge(x,y):
+                    edge_count += 1
+            
+            local_coefficient_list.append(edge_count/len(neighbor_edge_list))
+        
+    return np.mean(local_coefficient_list)
+
+
+def characteristic_path_length(graph: nx.Graph) -> float:
+    """Calculates characteristic path length from graph.
+    
+    Iterates over every node tuple and calculates the shortest path between them.
+    The average path length is returned. Tuples without path are ignored.
+    
+    Args:
+        graph: Undirected graph.
+        
+    Returns:
+        Global coefficient.
+    """
+    
+    path_length_list = list()
+    
+    path_list = [(x,y) for x in graph.nodes for y in graph.nodes if x<y]
+    
+    for path in path_list:
+        
+        if nx.has_path(graph,*path):
+            
+            shortest_path = nx.shortest_path(graph,*path)
+            
+            path_length_list.append(len(shortest_path))
+        
+    return np.mean(path_length_list)
+
+
+def main(topic_id: int, topic_name: str, result_dict: dict) -> None:
+    """Calls induction and disambiguation functions, performs main task.
+
+    The task is to both induce senses and match search results to them. This
+    function calls in much the same way induce() and disambiguate_mst() to 
+    perform these sub tasks. The result is then written to the output directory
+    specified in config.py.
+    
+    Args:
+        topic_id: Index of topic in topics.txt.
+        topic_name: Target string.
+        result_dict: Dictionary with topic_id as key and list of search queries
+            (from results.txt) as values.
+            
+    """
+    
+    if topic_name in [output_file_name.replace('.absinth', '') 
+                      for output_file_name in os.listdir(config.base_out)]:
+        return None
+    
+    else:
+        
+        print('[a]', 'Inducing word senses for {}.'.format(topic_name))
+        
+        graph, root_hub_list, stat_dict = induce(topic_name, result_dict[topic_id])
+        
+        stat_dict['L'] = characteristic_path_length(graph)
+        stat_dict['C'] = global_clustering_coefficient(graph)
+        
+        edge_count = len(graph.edges)
+        node_count = len(graph.nodes)
+        mean_degree = edge_count/node_count
+        
+        stat_dict['L_rand'] = np.log(node_count)/np.log(mean_degree)
+        stat_dict['C_rand'] = 2 * mean_degree/node_count
+        
+        print('[a]', 'Disambiguating results.\t('+topic_name+')')
+        
+        bag = bag_of_senses(graph, root_hub_list)
+        mapping_dict = disambiguate(bag, result_dict[topic_id])
+        
+        mapping_list = [item[1] for item in sorted(mapping_dict.items())]
+        mapping_count = len(mapping_list)
+                        
+        stat_dict['pipe_gain'] = None
+        
+        #collect statistics from result.
+        cluster_count = 0
+        cluster_length_list = list()
+        
+        for cluster,result_list in mapping_dict.items():
+            
+            cluster_length = len(result_list)
+            
+            if cluster_length != 0:
+                
+                cluster_count += 1
+                cluster_length_list.append(cluster_length)
+                
+        stat_dict['h_mean_size'] = stats.hmean(cluster_length_list)
+        stat_dict['a_mean_size'] = np.mean(cluster_length_list)
+        stat_dict['clusters'] = cluster_count
+
+        print('[a]', 'Writing to file.\t('+topic_name+')')
+        
+        output_path = config.base_out
+        output_file_name = output_path+topic_name+'.absinth'
+        
+        with open(output_file_name, 'w') as output_file:
+
+            output_file.write('subTopicID\tresultID\n')
+
+            for cluster_id,result_list in mapping_dict.items():
+                for result_id in result_list:
+                    output_line = '{}.{}\t{}.{}\n'.format(topic_id, cluster_id,
+                                                        topic_id, result_id)
+                    output_file.write(output_line)
+                    
+        print_stats(stat_dict)
+        
+        
+
+
+if __name__ == '__main__':
+    """Check for modifiers and call main().
+    
+    Only called when absinth.py is started manually. Checks for various
+    modifiers, i.e. test environment and number of processes to run 
+    simultaneously.
+    """
+    
+    # If absinth.py is run in test environment.
+    if '-t' in sys.argv:
+        data_path = config.test
+    else:
+        data_path = config.dataset
+        
+    result_dict, topic_dict = read_dataset(data_path)
+    
+    # Enables manual setting of process count.
+    if '-p' in sys.argv:
+        
+        process_count = int(sys.argv[sys.argv.index('-p') + 1])
+        
+        with Pool(process_count) as pool:
+            
+            parameter_list = [(topic_id, topic_name, result_dict)
+                              for topic_id,topic_name in topic_dict.items()]
+            pool.starmap(main, sorted(parameter_list)) #determineate function
+
+    else:
+        
+        for topic_id, topic_name in sorted(topic_dict.items()):
+            main(topic_id, topic_name, result_dict)