diff --git a/src/config.py b/src/config.py index ecda9887ce494107b830108ad75695cc7c15c889..cf97fedea72f3d4d53f9c6e1deb6f45f86a8098e 100644 --- a/src/config.py +++ b/src/config.py @@ -8,6 +8,7 @@ Choose paths for corpus, dataset and output. ''' corpus = "/proj/absinth/wikipedia_reduced/" dataset = "../WSI-Evaluator/datasets/MORESQUE/" +test = "../WSI-Evaluator/datasets/trial/" output = "../output/" ''' @@ -15,14 +16,14 @@ Choose stop words and allowed pos-tags. - Stop words will not be considered for nodes. - Only tokens with allowed pos-tags will be considered. ''' -stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free'] -allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP','VBZ','VBG'] +stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free', 'pp'] +allowed_tags = ['NN','NNS','JJ','JJS','JJR','NNP'] ''' Choose the maximum number of nodes and edges that should be considered before building the graph. ''' -max_nodes = 100000 -max_edges = 10000000 +max_nodes = 20000 +max_edges = 2000000 ''' Choose the minimum context size. @@ -43,4 +44,4 @@ Choose minimum number of neighbors and maximum median weight of the most frequen - the threshold is calculated using the media of the same number of neighbors declared in min_neighbors. ''' min_neighbors = 6 -theshold = 0.8 +threshold = 0.8