Skip to content
Snippets Groups Projects
Commit 414db2ce authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Véronis Pre-Sets, seem to work best (smaller corpus also had better results)

parent d4b72c96
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,7 @@ Choose paths for corpus, dataset and output.
'''
corpus = "/proj/absinth/wikipedia_reduced/"
dataset = "../WSI-Evaluator/datasets/MORESQUE/"
test = "../WSI-Evaluator/datasets/trial/"
output = "../output/"
'''
......@@ -15,14 +16,14 @@ Choose stop words and allowed pos-tags.
- Stop words will not be considered for nodes.
- Only tokens with allowed pos-tags will be considered.
'''
stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free']
allowed_tags=['NN','NNS','JJ','JJS','JJR','NNP','VBZ','VBG']
stop_words = ['utc', "'s", 'new', 'other', 'talk', 'wikipedia', 'article', 'topic', 'page', 'editors', 'encyclopedia', 'free', 'pp']
allowed_tags = ['NN','NNS','JJ','JJS','JJR','NNP']
'''
Choose the maximum number of nodes and edges that should be considered before building the graph.
'''
max_nodes = 100000
max_edges = 10000000
max_nodes = 20000
max_edges = 2000000
'''
Choose the minimum context size.
......@@ -43,4 +44,4 @@ Choose minimum number of neighbors and maximum median weight of the most frequen
- the threshold is calculated using the media of the same number of neighbors declared in min_neighbors.
'''
min_neighbors = 6
theshold = 0.8
threshold = 0.8
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment