diff --git a/code/absinth.py b/code/absinth.py index e1f9f210f92cbc671e4256d3bb7e7d76762ce03b..1938a3808c9b8e9ecb5f141b4c625b0a7141738a 100644 --- a/code/absinth.py +++ b/code/absinth.py @@ -180,7 +180,7 @@ def disambiguation(mst, context): if __name__ == '__main__': - filters = {'min_occurrences' : 10, 'min_cooccurrence' : 5, 'stop_words' : [], 'allowed_tags' : ['NN', 'NNS', 'JJ', 'JJS', 'JJR', 'NNP'], 'context_size' : 4, 'max_distance' : 0.9} + filters = {'min_occurrences' : 10, 'min_cooccurrence' : 5, 'stop_words' : ['utc'], 'allowed_tags' : ['NN', 'NNS', 'JJ', 'JJS', 'JJR', 'NNP'], 'context_size' : 4, 'max_distance' : 0.9} data_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/WSI-Evaluator/datasets/MORESQUE' #corpus_path = '/home/students/zimmermann/Courses/ws17/fsem/absinth/test' corpus_path = '/proj/absinth/wikipedia.txt.dump.20140615-en.SZTAKI'