Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Absinth - A Small World of Semantic Similarity
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Victor Zimmermann
Absinth - A Small World of Semantic Similarity
Commits
121f9d68
Commit
121f9d68
authored
7 years ago
by
Victor Zimmermann
Browse files
Options
Downloads
Patches
Plain Diff
Saves graphs now.
parent
ffce1462
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/absinth.py
+272
-212
272 additions, 212 deletions
src/absinth.py
with
272 additions
and
212 deletions
src/absinth.py
+
272
−
212
View file @
121f9d68
...
...
@@ -7,7 +7,7 @@ matches a list of contexts to each. The method to achieve this is a modified
reimplementation of Véronis
'
Hyperlex (2004).
Example:
The function can be called with the following command
.
:
The function can be called with the following command:
$ python3 absinth.py
...
...
@@ -23,9 +23,15 @@ Modifiers:
"""
##########################
# Dependencies #
##########################
import
sys
print
(
'
[a] Loading
'
+
sys
.
argv
[
0
]
+
'
.
\n
'
)
import
config
import
json
import
networkx
as
nx
# for visualisation
import
numpy
as
np
import
os
# for reading files
...
...
@@ -42,6 +48,9 @@ from scipy import stats
nlp
=
spacy
.
load
(
'
en
'
)
# standard english nlp
##########################
# Preprocessing #
##########################
def
read_dataset
(
data_path
:
str
)
->
(
dict
,
dict
):
"""
Collects topics.txt and results.txt.
...
...
@@ -85,6 +94,102 @@ def read_dataset(data_path: str) -> (dict, dict):
return
results
,
topics
##########################
# Induction #
##########################
def
induce
(
topic_name
:
str
,
result_list
:
list
)
->
(
nx
.
Graph
,
list
,
dict
):
"""
Induces word senses for a given topic from corpus.
Counts frequencies from corpus and search result list, builds graph from
these counts (with some filters). Root hubs (senses) are collected from
this graph.
Args:
topic_name: Target string.
result_list: List of search result (context) strings.
Returns:
A cooccurrence graph,
a list of root hub strings (senses)
and dictionary of various statistics.
"""
stat_dict
=
dict
()
stat_dict
[
'
target
'
]
=
topic_name
#in topics longer than two words, the leading 'the' can generally be removed without changing the sense
if
topic_name
[:
4
]
==
'
the_
'
and
topic_name
.
count
(
'
_
'
)
>
1
:
target_string
=
topic_name
[
4
:]
else
:
target_string
=
topic_name
print
(
'
[a]
'
,
'
Counting nodes and edges.
\t
(
'
+
topic_name
+
'
)
'
)
#Check if frequencies were already counted before.
node_dict_name
=
topic_name
+
'
_node.json
'
edge_dict_name
=
topic_name
+
'
_edge.json
'
graph_in_existence
=
False
for
graph_name
in
os
.
listdir
(
config
.
graph
):
if
topic_name
in
graph_name
:
graph_in_existence
=
True
with
open
(
node_dict_name
,
'
r
'
)
as
node_file
,
open
(
edge_dict_name
,
'
r
'
)
as
edge_file
:
node_freq_dict
=
json
.
load
(
node_file
)
edge_freq_dict
=
json
.
load
(
edge_file
)
continue
if
graph_in_existence
==
False
:
node_freq_dict
,
edge_freq_dict
=
frequencies
(
target_string
,
result_list
)
with
open
(
node_dict_name
,
'
w
'
)
as
node_file
,
open
(
edge_dict_name
,
'
w
'
)
as
edge_file
:
node_file
.
write
(
json
.
dumps
(
node_freq_dict
))
edge_file
.
write
(
json
.
dumps
(
edge_freq_dict
))
#builds graph from these dictionaries, also applies multiple filters
print
(
'
[a]
'
,
'
Building graph.
\t
(
'
+
topic_name
+
'
)
'
)
graph
=
build_graph
(
node_freq_dict
,
edge_freq_dict
)
for
string
in
topic_name
.
split
(
'
_
'
):
if
string
in
graph
.
nodes
:
graph
.
remove_node
(
string
)
stat_dict
[
'
nodes
'
]
=
len
(
graph
.
nodes
)
stat_dict
[
'
edges
'
]
=
len
(
graph
.
edges
)
#finds root hubs (senses) within the graph + more filters for these
print
(
'
[a]
'
,
'
Collecting root hubs.
\t
(
'
+
topic_name
+
'
)
'
)
root_hub_list
=
root_hubs
(
graph
,
edge_freq_dict
)
#adds sense inventory to buffer with some common neighbors for context
stat_dict
[
'
hubs
'
]
=
dict
()
for
root_hub
in
root_hub_list
:
by_frequency
=
lambda
node
:
edge_freq_dict
[
root_hub
,
node
]
\
if
root_hub
<
node
\
else
edge_freq_dict
[
node
,
root_hub
]
most_frequent_neighbor_list
=
sorted
(
graph
.
adj
[
root_hub
],
key
=
by_frequency
,
reverse
=
True
)
stat_dict
[
'
hubs
'
][
root_hub
]
=
most_frequent_neighbor_list
[:
6
]
return
graph
,
root_hub_list
,
stat_dict
def
frequencies
(
target_string
:
str
,
search_result_list
:
list
)
->
(
dict
,
dict
):
"""
Counts occurrences of nodes and cooccurrences.
...
...
@@ -408,151 +513,16 @@ def root_hubs(graph: nx.Graph, edge_freq_dict: dict) -> list:
return
hub_list
def
components
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
,
target_string
:
str
)
->
nx
.
Graph
:
"""
Builds minimum spanning tree from graph and removes singletons.
Applies components algorithm from Véronis (2004) and removes singletons.
Args:
graph: Undirected weighted graph.
root_hub_list: List of strings of root hubs of graph.
target_string: Root of minimum spanning tree.
Returns:
Minimum spanning tree with target as root and root hubs as direct
children. Singletons removed.
"""
graph_copy
=
deepcopy
(
graph
)
graph_copy
.
add_node
(
target_string
)
for
root_hub
in
root_hub_list
:
graph_copy
.
add_edge
(
target_string
,
root_hub
,
weight
=
0
)
minimum_spanning_tree
=
nx
.
minimum_spanning_tree
(
graph_copy
)
return
minimum_spanning_tree
def
score
(
graph
:
nx
.
Graph
,
component
:
str
,
root_hub_list
:
list
)
->
np
.
array
:
"""
Calculate score for a given component in a minimum spanning tree.
First the correct root for the component is chosen. If no root hub is
suitable, an empty array is returned. A score is calculated for the distance
of the component and its root and returned as part of an array filled with
zeroes.
Args:
graph: Minimum spanning tree.
component: Node (string) from which the distances are to be calculated.
root_hub_list: List of strings of root hubs (senses) of original graph.
Returns:
Array with one score for the correct root hub and filled with zeroes.
"""
root_hub_count
=
len
(
root_hub_list
)
#Initialise score array.
score_array
=
np
.
zeros
(
root_hub_count
)
# Find root of component.
distance_list
=
list
()
for
root_hub
in
root_hub_list
:
if
nx
.
has_path
(
graph
,
component
,
root_hub
):
distance_list
.
append
(
1
/
(
1
+
len
(
nx
.
shortest_path
(
graph
,
component
,
root_hub
))))
else
:
distance_list
.
append
(
0
)
if
sum
(
distance_list
)
==
0
:
return
score_array
root_idx
=
np
.
argmax
(
distance_list
)
root
=
root_hub_list
[
root_idx
]
shortest_path
=
nx
.
shortest_path
(
graph
,
component
,
root
,
'
weight
'
)
total_weight
=
0
# Add weights of every sub-path.
for
i
in
range
(
1
,
len
(
shortest_path
)):
sub_from
,
sub_to
=
shortest_path
[
i
-
1
],
shortest_path
[
i
]
total_weight
+=
graph
[
sub_from
][
sub_to
][
'
weight
'
]
score_array
=
np
.
zeros
(
root_hub_count
)
score_array
[
root_idx
]
=
1
/
(
1
+
total_weight
)
return
score_array
def
induce
(
topic_name
:
str
,
result_list
:
list
)
->
(
nx
.
Graph
,
list
,
dict
):
"""
Induces word senses for a given topic from corpus.
Counts frequencies from corpus and search result list, builds graph from
these counts (with some filters). Root hubs (senses) are collected from
this graph.
Args:
topic_name: Target string.
result_list: List of search result (context) strings.
Returns:
A cooccurrence graph,
a list of root hub strings (senses)
and dictionary of various statistics.
"""
stat_dict
=
dict
()
stat_dict
[
'
target
'
]
=
topic_name
#in topics longer than two words, the leading 'the' can generally be removed without changing the sense
if
topic_name
[:
4
]
==
'
the_
'
and
topic_name
.
count
(
'
_
'
)
>
1
:
target_string
=
topic_name
[
4
:]
else
:
target_string
=
topic_name
print
(
'
[a]
'
,
'
Counting nodes and edges.
\t
(
'
+
topic_name
+
'
)
'
)
node_freq_dict
,
edge_freq_dict
=
frequencies
(
target_string
,
result_list
)
#builds graph from these dictionaries, also applies multiple filters
print
(
'
[a]
'
,
'
Building graph.
\t
(
'
+
topic_name
+
'
)
'
)
graph
=
build_graph
(
node_freq_dict
,
edge_freq_dict
)
for
string
in
topic_name
.
split
(
'
_
'
):
if
string
in
graph
.
nodes
:
graph
.
remove_node
(
string
)
stat_dict
[
'
nodes
'
]
=
len
(
graph
.
nodes
)
stat_dict
[
'
edges
'
]
=
len
(
graph
.
edges
)
#finds root hubs (senses) within the graph + more filters for these
print
(
'
[a]
'
,
'
Collecting root hubs.
\t
(
'
+
topic_name
+
'
)
'
)
root_hub_list
=
root_hubs
(
graph
,
edge_freq_dict
)
#adds sense inventory to buffer with some common neighbors for context
stat_dict
[
'
hubs
'
]
=
dict
()
for
root_hub
in
root_hub_list
:
by_frequency
=
lambda
node
:
edge_freq_dict
[
root_hub
,
node
]
\
if
root_hub
<
node
\
else
edge_freq_dict
[
node
,
root_hub
]
most_frequent_neighbor_list
=
sorted
(
graph
.
adj
[
root_hub
],
key
=
by_frequency
,
reverse
=
True
)
stat_dict
[
'
hubs
'
][
root_hub
]
=
most_frequent_neighbor_list
[:
6
]
return
graph
,
root_hub_list
,
stat_dict
##############################
# Propagation Disambiguation #
##############################
def
colour
_graph
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
)
->
nx
.
Graph
:
"""
Colour
s graph accoring to root hubs.
def
label
_graph
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
)
->
nx
.
Graph
:
"""
propagation
s graph accoring to root hubs.
Evolving network that
colour
s neighboring nodes iterative. See sentiment
Evolving network that
propagation
s neighboring nodes iterative. See sentiment
propagation.
Args:
...
...
@@ -560,7 +530,7 @@ def colour_graph(graph: nx.Graph, root_hub_list: list) -> nx.Graph:
root_hub_list: List of senses.
Returns:
Colour
ed graph.
labell
ed graph.
"""
...
...
@@ -579,7 +549,7 @@ def colour_graph(graph: nx.Graph, root_hub_list: list) -> nx.Graph:
graph
.
node
[
node
][
'
sense
'
]
=
None
max_iteration_count
=
config
.
max_
colour
_iteration_count
max_iteration_count
=
config
.
max_
propagation
_iteration_count
iteration_count
=
0
stable
=
False
...
...
@@ -607,12 +577,12 @@ def colour_graph(graph: nx.Graph, root_hub_list: list) -> nx.Graph:
graph
.
node
[
node
][
'
dist
'
].
append
(
neighbor_weight_list
)
old_
colour
=
graph_copy
.
node
[
node
][
'
sense
'
]
new_
colour
=
np
.
argmax
(
np
.
mean
(
graph
.
node
[
node
][
'
dist
'
],
axis
=
0
))
old_
propagation
=
graph_copy
.
node
[
node
][
'
sense
'
]
new_
propagation
=
np
.
argmax
(
np
.
mean
(
graph
.
node
[
node
][
'
dist
'
],
axis
=
0
))
if
old_
colour
!=
new_colour
:
if
old_
propagation
!=
new_propagation
:
stable
=
False
graph
.
node
[
node
][
'
sense
'
]
=
new_
colour
graph
.
node
[
node
][
'
sense
'
]
=
new_
propagation
else
:
pass
...
...
@@ -626,12 +596,12 @@ def colour_graph(graph: nx.Graph, root_hub_list: list) -> nx.Graph:
graph
.
node
[
node
][
'
dist
'
]
=
np
.
mean
(
graph
.
node
[
node
][
'
dist
'
],
axis
=
0
)
return
graph
def
disambiguate_propagation
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
,
context_list
:
list
)
->
dict
:
"""
Clusters senses to root hubs using a labelled graph.
def
disambiguate_colour
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
,
context_list
:
list
)
->
dict
:
"""
Clusters senses to root hubs using a coloured graph.
This algorithm colours the graph using evolutionary graph theory
This algorithm propagations the graph using evolutionary graph theory
and calculates scores for each root hub given a context based on this graph.
Args:
...
...
@@ -643,7 +613,7 @@ def disambiguate_colour(graph: nx.Graph, root_hub_list: list, context_list: list
A dictionary with root hub IDs as keys and context indices as values.
"""
colour
ed_graph
=
colour
_graph
(
graph
,
root_hub_list
)
labell
ed_graph
=
label
_graph
(
graph
,
root_hub_list
)
mapping_dict
=
{
i
:
list
()
for
i
in
range
(
1
,
len
(
root_hub_list
)
+
1
)}
...
...
@@ -667,11 +637,11 @@ def disambiguate_colour(graph: nx.Graph, root_hub_list: list, context_list: list
else
:
text
=
token
.
text
if
text
in
colour
ed_graph
.
nodes
:
if
text
in
labell
ed_graph
.
nodes
:
text_
colour
_dist
=
colour
ed_graph
.
node
[
text
][
'
dist
'
]
text_
propagation
_dist
=
labell
ed_graph
.
node
[
text
][
'
dist
'
]
if
not
any
(
text_
colour
_dist
):
if
not
any
(
text_
propagation
_dist
):
pass
...
...
@@ -681,9 +651,9 @@ def disambiguate_colour(graph: nx.Graph, root_hub_list: list, context_list: list
root_hub_idx
=
root_hub_list
.
index
(
root_hub
)
if
nx
.
has_path
(
colour
ed_graph
,
text
,
root_hub
):
if
nx
.
has_path
(
labell
ed_graph
,
text
,
root_hub
):
shortest_path
=
nx
.
shortest_path
(
colour
ed_graph
,
shortest_path
=
nx
.
shortest_path
(
labell
ed_graph
,
text
,
root_hub
,
'
weight
'
)
...
...
@@ -693,10 +663,10 @@ def disambiguate_colour(graph: nx.Graph, root_hub_list: list, context_list: list
for
i
in
range
(
1
,
len
(
shortest_path
)):
sub_from
,
sub_to
=
shortest_path
[
i
-
1
],
shortest_path
[
i
]
total_weight
+=
\
colour
ed_graph
[
sub_from
][
sub_to
][
'
weight
'
]
labell
ed_graph
[
sub_from
][
sub_to
][
'
weight
'
]
score
[
root_hub_idx
]
+=
(
1
/
(
1
+
total_weight
))
\
*
colour
ed_graph
.
node
[
text
][
'
dist
'
][
root_hub_idx
]
*
labell
ed_graph
.
node
[
text
][
'
dist
'
][
root_hub_idx
]
else
:
...
...
@@ -717,6 +687,88 @@ def disambiguate_colour(graph: nx.Graph, root_hub_list: list, context_list: list
return
mapping_dict
##############################
# MST Disambiguation #
##############################
def
components
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
,
target_string
:
str
)
->
nx
.
Graph
:
"""
Builds minimum spanning tree from graph and removes singletons.
Applies components algorithm from Véronis (2004) and removes singletons.
Args:
graph: Undirected weighted graph.
root_hub_list: List of strings of root hubs of graph.
target_string: Root of minimum spanning tree.
Returns:
Minimum spanning tree with target as root and root hubs as direct
children. Singletons removed.
"""
graph_copy
=
deepcopy
(
graph
)
graph_copy
.
add_node
(
target_string
)
for
root_hub
in
root_hub_list
:
graph_copy
.
add_edge
(
target_string
,
root_hub
,
weight
=
0
)
minimum_spanning_tree
=
nx
.
minimum_spanning_tree
(
graph_copy
)
return
minimum_spanning_tree
def
score
(
graph
:
nx
.
Graph
,
component
:
str
,
root_hub_list
:
list
)
->
np
.
array
:
"""
Calculate score for a given component in a minimum spanning tree.
First the correct root for the component is chosen. If no root hub is
suitable, an empty array is returned. A score is calculated for the distance
of the component and its root and returned as part of an array filled with
zeroes.
Args:
graph: Minimum spanning tree.
component: Node (string) from which the distances are to be calculated.
root_hub_list: List of strings of root hubs (senses) of original graph.
Returns:
Array with one score for the correct root hub and filled with zeroes.
"""
root_hub_count
=
len
(
root_hub_list
)
#Initialise score array.
score_array
=
np
.
zeros
(
root_hub_count
)
# Find root of component.
distance_list
=
list
()
for
root_hub
in
root_hub_list
:
if
nx
.
has_path
(
graph
,
component
,
root_hub
):
distance_list
.
append
(
1
/
(
1
+
len
(
nx
.
shortest_path
(
graph
,
component
,
root_hub
))))
else
:
distance_list
.
append
(
0
)
if
sum
(
distance_list
)
==
0
:
return
score_array
root_idx
=
np
.
argmax
(
distance_list
)
root
=
root_hub_list
[
root_idx
]
shortest_path
=
nx
.
shortest_path
(
graph
,
component
,
root
,
'
weight
'
)
total_weight
=
0
# Add weights of every sub-path.
for
i
in
range
(
1
,
len
(
shortest_path
)):
sub_from
,
sub_to
=
shortest_path
[
i
-
1
],
shortest_path
[
i
]
total_weight
+=
graph
[
sub_from
][
sub_to
][
'
weight
'
]
score_array
=
np
.
zeros
(
root_hub_count
)
score_array
[
root_idx
]
=
1
/
(
1
+
total_weight
)
return
score_array
def
disambiguate_mst
(
graph
:
nx
.
Graph
,
root_hub_list
:
list
,
context_list
:
list
,
topic_name
:
str
)
->
dict
:
"""
Matches contexts to senses.
...
...
@@ -804,53 +856,11 @@ def disambiguate_mst(graph: nx.Graph, root_hub_list: list,
return
mapping_dict
def
print_stats
(
stat_dict
:
dict
)
->
None
:
"""
Prints various statistics and logs them to file.
Args:
stat_dict: Dictionary with various statistics.
"""
stat_string
=
[]
ts
=
time
.
gmtime
()
key_list
=
[
'
target
'
,
'
nodes
'
,
'
edges
'
,
'
L
'
,
'
C
'
,
'
L_rand
'
,
'
C_rand
'
,
'
clusters
'
,
'
a_mean_size
'
,
'
h_mean_size
'
,
'
pipe_gain
'
]
stat_string
.
append
(
'
Topic: {}.
'
.
format
(
stat_dict
[
'
target
'
]))
stat_string
.
append
(
'
Processed {} at {}.
'
.
format
(
time
.
strftime
(
"
%Y-%m-%d
"
,
ts
),
time
.
strftime
(
"
%H:%M:%S
"
,
ts
)))
stat_string
.
append
(
'
Nodes: {}
\t
Edges: {}.
'
.
format
(
stat_dict
[
'
nodes
'
],
stat_dict
[
'
edges
'
]))
stat_string
.
append
(
'
Characteristic path length: {}.
'
.
format
(
stat_dict
[
'
L
'
]))
stat_string
.
append
(
'
Global clustering coefficient: {}.
'
.
format
(
stat_dict
[
'
C
'
]))
stat_string
.
append
(
'
Mean cluster length (arithmetic): {}.
'
.
format
(
stat_dict
[
'
a_mean_size
'
]))
stat_string
.
append
(
'
Mean cluster length (harmonic): {}.
'
.
format
(
stat_dict
[
'
h_mean_size
'
]))
stat_string
.
append
(
'
Number of clusters: {}.
'
.
format
(
stat_dict
[
'
clusters
'
]))
stat_string
.
append
(
'
Tuples gained through merging: {}.
'
.
format
(
stat_dict
[
'
pipe_gain
'
]))
stat_string
.
append
(
'
Sense inventory:
'
)
for
hub
in
stat_dict
[
'
hubs
'
].
keys
():
stat_string
.
append
(
'
-> {}: {}.
'
.
format
(
hub
,
"
,
"
.
join
(
stat_dict
[
'
hubs
'
][
hub
])))
print
(
'
\n
[A]
'
+
'
\n
[A]
'
.
join
(
stat_string
)
+
'
\n
'
)
with
open
(
'
statistics.txt
'
,
'
a
'
)
as
stat_file
:
stat_file
.
write
(
'
\n
'
.
join
(
stat_string
)
+
'
\n\n
'
)
write_header
=
not
os
.
path
.
exists
(
'
.statistics.tsv
'
)
with
open
(
'
.statistics.tsv
'
,
'
a
'
)
as
stat_file
:
if
write_header
:
stat_file
.
write
(
'
\t
'
.
join
(
key_list
)
+
'
\n
'
)
stat_file
.
write
(
'
\t
'
.
join
([
str
(
stat_dict
[
key
])
for
key
in
key_list
])
+
'
\n
'
)
##############################
# Statistics #
##############################
def
global_clustering_coefficient
(
graph
:
nx
.
Graph
)
->
float
:
"""
Calculates global clustering coefficient from graph.
...
...
@@ -918,6 +928,56 @@ def characteristic_path_length(graph: nx.Graph) -> float:
return
np
.
mean
(
path_length_list
)
def
print_stats
(
stat_dict
:
dict
)
->
None
:
"""
Prints various statistics and logs them to file.
Args:
stat_dict: Dictionary with various statistics.
"""
stat_string
=
[]
ts
=
time
.
gmtime
()
key_list
=
[
'
target
'
,
'
nodes
'
,
'
edges
'
,
'
L
'
,
'
C
'
,
'
L_rand
'
,
'
C_rand
'
,
'
clusters
'
,
'
a_mean_size
'
,
'
h_mean_size
'
,
'
pipe_gain
'
]
stat_string
.
append
(
'
Topic: {}.
'
.
format
(
stat_dict
[
'
target
'
]))
stat_string
.
append
(
'
Processed {} at {}.
'
.
format
(
time
.
strftime
(
"
%Y-%m-%d
"
,
ts
),
time
.
strftime
(
"
%H:%M:%S
"
,
ts
)))
stat_string
.
append
(
'
Nodes: {}
\t
Edges: {}.
'
.
format
(
stat_dict
[
'
nodes
'
],
stat_dict
[
'
edges
'
]))
stat_string
.
append
(
'
Characteristic path length: {}.
'
.
format
(
stat_dict
[
'
L
'
]))
stat_string
.
append
(
'
Global clustering coefficient: {}.
'
.
format
(
stat_dict
[
'
C
'
]))
stat_string
.
append
(
'
Mean cluster length (arithmetic): {}.
'
.
format
(
stat_dict
[
'
a_mean_size
'
]))
stat_string
.
append
(
'
Mean cluster length (harmonic): {}.
'
.
format
(
stat_dict
[
'
h_mean_size
'
]))
stat_string
.
append
(
'
Number of clusters: {}.
'
.
format
(
stat_dict
[
'
clusters
'
]))
stat_string
.
append
(
'
Tuples gained through merging: {}.
'
.
format
(
stat_dict
[
'
pipe_gain
'
]))
stat_string
.
append
(
'
Sense inventory:
'
)
for
hub
in
stat_dict
[
'
hubs
'
].
keys
():
stat_string
.
append
(
'
-> {}: {}.
'
.
format
(
hub
,
"
,
"
.
join
(
stat_dict
[
'
hubs
'
][
hub
])))
print
(
'
\n
[A]
'
+
'
\n
[A]
'
.
join
(
stat_string
)
+
'
\n
'
)
with
open
(
'
statistics.txt
'
,
'
a
'
)
as
stat_file
:
stat_file
.
write
(
'
\n
'
.
join
(
stat_string
)
+
'
\n\n
'
)
write_header
=
not
os
.
path
.
exists
(
'
.statistics.tsv
'
)
with
open
(
'
.statistics.tsv
'
,
'
a
'
)
as
stat_file
:
if
write_header
:
stat_file
.
write
(
'
\t
'
.
join
(
key_list
)
+
'
\n
'
)
stat_file
.
write
(
'
\t
'
.
join
([
str
(
stat_dict
[
key
])
for
key
in
key_list
])
+
'
\n
'
)
##############################
# main #
##############################
def
main
(
topic_id
:
int
,
topic_name
:
str
,
result_dict
:
dict
)
->
None
:
"""
Calls induction and disambiguation functions, performs main task.
...
...
@@ -955,7 +1015,7 @@ def main(topic_id: int, topic_name: str, result_dict: dict) -> None:
stat_dict
[
'
C_rand
'
]
=
2
*
mean_degree
/
node_count
colour
_rank
=
config
.
colour_rank
propagation
_rank
=
config
.
colour_rank
mst_rank
=
config
.
mst_rank
#Merges Mappings according to pipeline
...
...
@@ -963,10 +1023,10 @@ def main(topic_id: int, topic_name: str, result_dict: dict) -> None:
#matches senses to clusters
print
(
'
[a]
'
,
'
Disambiguating results.
\t
(
'
+
topic_name
+
'
)
'
)
if
colour
_rank
!=
0
:
if
propagation
_rank
!=
0
:
print
(
'
[a]
'
,
'
Colouring
graph.
\t
(
'
+
topic_name
+
'
)
'
)
mapping_dict
[
colour
_rank
]
=
disambiguate_
colour
(
graph
,
root_hub_list
,
print
(
'
[a]
'
,
'
Propagating through
graph.
\t
(
'
+
topic_name
+
'
)
'
)
mapping_dict
[
propagation
_rank
]
=
disambiguate_
propagation
(
graph
,
root_hub_list
,
result_dict
[
topic_id
])
if
mst_rank
!=
0
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment