From 414db2ce7b6a395ba5c41db861c067f6f5f47796 Mon Sep 17 00:00:00 2001 From: Victor Zimmermann <zimmermann@cl.uni-heidelberg.de> Date: Fri, 9 Mar 2018 20:51:46 +0100 Subject: [PATCH] =?UTF-8?q?V=C3=A9ronis=20Pre-Sets,=20seem=20to=20work=20b?= =?UTF-8?q?est=20(smaller=20corpus=20also=20had=20better=20results)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/config.py b/src/config.py index ecda988..cf97fed 100644 --- a/src/config.py +++ b/src/config.py @@ -8,6 +8,7 @@ Choose paths for corpus, dataset and output. ''' corpus = "/proj/absinth/wikipedia_reduced/" dataset = "../WSI-Evaluator/datasets/MORESQUE/" +test = "../WSI-Evaluator/datasets/trial/" output = "../output/" ''' @@ -15,14 +16,14 @@ Choose stop words and allowed pos-tags. - Stop words will not be considered for nodes. - Only tokens with allowed pos-tags will be considered. ''' -stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free'] -allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP','VBZ','VBG'] +stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free', 'pp'] +allowed_tags = ['NN','NNS','JJ','JJS','JJR','NNP'] ''' Choose the maximum number of nodes and edges that should be considered before building the graph. ''' -max_nodes = 100000 -max_edges = 10000000 +max_nodes = 20000 +max_edges = 2000000 ''' Choose the minimum context size. @@ -43,4 +44,4 @@ Choose minimum number of neighbors and maximum median weight of the most frequen - the threshold is calculated using the media of the same number of neighbors declared in min_neighbors. ''' min_neighbors = 6 -theshold = 0.8 +threshold = 0.8 -- GitLab