Skip to content
Snippets Groups Projects
Commit 6ba4aad4 authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Spelling and lemmatisaton.

parent 87812127
No related branches found
No related tags found
No related merge requests found
......@@ -168,7 +168,7 @@ def process_file(context_list, target_string, node_freq_dict, edge_freq_dict):
# Add only tokens with allowed tags to nodes.
elif token.tag_ in allowed_tag_list:
token_set.add(token.text)
token_set.add(token.lemma_)
context_size = len(token_set)
......@@ -209,20 +209,20 @@ def build_graph(node_freq_dict, edge_freq_dict):
tokens within every context the target occurs in.
Returns:
cooccurence_graph: Filtered undirected dice weighted small word
cooccurence graph for a given target entity.
cooccurrence_graph: Filtered undirected dice weighted small word
cooccurrence graph for a given target entity.
"""
min_node_freq = config.min_node_freq
min_edge_freq = config.min_edge_freq
max_weight = config.max_weight
cooccurence_graph = nx.Graph()
cooccurrence_graph = nx.Graph()
for node, frequency in node_freq_dict.items():
if frequency >= min_node_freq:
cooccurence_graph.add_node(node)
cooccurrence_graph.add_node(node)
for node_tuple, frequency in edge_freq_dict.items():
......@@ -230,11 +230,11 @@ def build_graph(node_freq_dict, edge_freq_dict):
continue
elif node_tuple[0] not in cooccurence_graph.nodes:
elif node_tuple[0] not in cooccurrence_graph.nodes:
continue
elif node_tuple[1] not in cooccurence_graph.nodes:
elif node_tuple[1] not in cooccurrence_graph.nodes:
continue
......@@ -247,25 +247,25 @@ def build_graph(node_freq_dict, edge_freq_dict):
prob_0 = cooccurrence_frequency / node0_frequency
prob_1 = cooccurrence_frequency / node1_frequency
#best_weight = 1 - max(prob_0, prob_1)
dice_weight = 1 - ((prob_0 + prob_1) / 2)
best_weight = 1 - max(prob_0, prob_1)
#dice_weight = 1 - ((prob_0 + prob_1) / 2)
if dice_weight <= max_weight:
if best_weight <= max_weight:
cooccurence_graph.add_edge(*node_tuple, weight=dice_weight)
cooccurrence_graph.add_edge(*node_tuple, weight=best_weight)
else:
pass
return cooccurence_graph
return cooccurrence_graph
def root_hubs(graph, edge_freq_dict):
"""Identifies senses (root hubs) by choosing nodes with high degrees
Selects root hubs according to the algorithm in Véronis (2004). Nodes with
high degree and neighbors with low weights (high cooccurence) are chosen
high degree and neighbors with low weights (high cooccurrence) are chosen
until there are no more viable candidates. A root hub candidate is every
node that is not already a hub and is not a neighbor of one.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment