Skip to content
Snippets Groups Projects
Commit cacea6a9 authored by dimitrova's avatar dimitrova
Browse files

added CLI for new project structure

parent b40d1a06
No related branches found
No related tags found
No related merge requests found
import click
import sys
import os
@click.group()
@click.pass_context
def main(ctx):
pass
@main.command()
@click.option(
'--input', '-i', type=str, help='The path to the pickled networkx graph.', default='data/wordnet/graphs/wn_graph.pkl', show_default=True,
)
@click.option(
'--output', '-o', type=str, help='The output directory.', default='data/wordnet/', show_default=True,
)
@click.option(
'--epochs', '-n', type=int, help='The number of epochs.', default='200', show_default=True,
)
@click.option(
'--margin', '-m', type=int, help='The margin of the ranking loss function.', default='20', show_default=True,
)
@click.option(
'--batch_size', '-b', type=int, help='The minibatch size.', default='128', show_default=True,
)
@click.option(
'--emb_size', '-e', type=int, help='The size of the learnt embeddings', default='128', show_default=True,
)
@click.option(
'--rate', '-t', type=float, help='The starting learning rate of the Adam Optimizer.', default='0.001', show_default=True,
)
@click.option(
'--skip', '-s', multiple=True, default=[], show_default=True, help='Label types to skip during training.',
)
@click.pass_context
def embedding_propagation(ctx, batch_size, epochs, margin, emb_size, rate, input, output, skip):
"""
Trains the EP-SP algorithm (reimplementation of "Learning graph representations with embedding propagation", Duran and Niepert, 2017)
with the given input graph and hyperparameters.
"""
from scripts.embedding_propagation import epsp
if not os.path.exists(output):
os.mkdir(output)
ep = epsp.EmbeddingPropagation(margin=margin, epochs=epochs, batch_size=batch_size, learning_rate=rate,
embedding_size=emb_size, filename=input, output_dir=output, skip=skip)
ep.train()
@main.command()
@click.option(
'--graph', '-g', type=str, help='The path to the Cora graph, output by the cli.py cora command.', default='data/cora/graphs/cora_graph.pkl', show_default=True,
)
@click.option(
'--embeddings', '-e', type=str, help='The path to the concatenated Cora node embeddings.', default='data/cora/embeddings/merged_node_embeddings.pkl', show_default=True,
)
@click.option(
'--seed', '-s', type=int, help='The random seed for the experiment, != 0.', default=0, show_default=True,
)
@click.option(
'--iterations', '-i', type=int, help='The number of runs. Each run has a different random seed. Ignored if the -s option is used.', default='10', show_default=True,
)
@click.option(
'--num_instances', '-n', type=int, help='The number of instances per class for training.', default='20', show_default=True,
)
@click.option(
'--num_test', '-t', type=int, help='The number of random test instances.', default='1000', show_default=True,
)
@click.option(
'--regularization', '-c', type=float, help='Inverse of regularization strength.', default='0.1', show_default=True,
)
@click.pass_context
def node_classification(ctx, graph, embeddings, seed, iterations, num_instances, num_test, regularization):
"""
Runs the node classification experiment described in Section 2.3.
"""
from scripts.node_classification import nc_experiment as nc
if seed:
nc.node_classification(path_graph=graph, path_embeddings=embeddings, seed=seed, num_per_class=num_instances, C=regularization)
else:
nc.node_classification_random_seeds(path_graph=graph, path_embeddings=embeddings, num_test_instances=num_test,
num_per_class=num_instances, iterations=iterations, C=regularization)
@main.command()
@click.option(
'--input', '-i', type=str, help='The path to the raw Cora files', default='data/cora/raw/', show_default=True,
)
@click.option(
'--output', '-o', type=str, help='The output path + filename for the pickled graph', default='data/cora/graphs/graph.pkl', show_default=True,
)
@click.pass_context
def process_cora(ctx, input, output):
"""
Creates and pickles the Cora graph.
"""
from scripts.preprocessing.cora import cora
path_nodes = "{}/cora.content".format(input)
path_edges = "{}/cora.cites".format(input)
cora.write_pickle_graph_file(path_nodes=path_nodes, path_edges=path_edges, output_path=output)
@main.command()
@click.option(
'--senseval', '-s', type=click.Choice(['2', '3']), help='Whether to run the experiment on SensEval 2 or 3.', default='3', show_default=True,
)
@click.option(
'--embeddings', '-e', type=str, help='Embeddings file to load.', default='data/wordnet/embeddings/epoch_500/wsd_node_embeddings.pkl', show_default=True,
)
@click.option(
'--id_map', '-i', type=str, help='The path to the .json id mapping.', default='data/wordnet/mappings/json/id_mapping.json', show_default=True,
)
@click.option(
'--lemma_map', '-l', type=str, help='The path to the .txt lemma mapping.', default='data/wordnet/mappings/txt/lemmata_mapping2.txt', show_default=True,
)
@click.option(
'--sense_key', '-k', type=str, help='The path to the wn30->wn17 mapping.', default='data/wordnet/mappings/txt/wn30_wn17_long-wn17_pos.txt', show_default=True,
)
@click.option(
'--output', '-o', type=str, help='Name of the answer file. No path please.', default='wsd1', show_default=True,
)
@click.pass_context
def wsd_1(ctx, senseval, embeddings, id_map, lemma_map, sense_key, output):
"""
Runs Method 1 of the Word Sense Disambiguation experiment, as described in Section 3.3.2.
"""
from scripts.wsd import wsd_method1 as wsd
senseval_path = 'data/senseval{}/'.format(senseval)
input_file = os.path.join(senseval_path, 'processed/ambig_sents.json')
output_file = '{}wsd_answers/{}'.format(senseval_path, output)
if os.path.exists(output_file):
print('File {} already exists. Please delete old file or give another filename (-o).'.format(output_file))
sys.exit(1)
embeddings = wsd.open_embedding_file(embeddings)
id_mapping= wsd.open_mapping(id_map)
lemmata_mapping = wsd.open_lem_map(lemma_map)
sense_key_mapping = wsd.open_sense_keys(sense_key)
solutions = wsd.iterate_over(input_file, embeddings, lemmata_mapping, id_mapping, sense_key_mapping)
wsd.write_answer_to_file(solutions, output_file)
print('WSD answers saved under', output_file)
@main.command()
@click.option(
'--context', '-c', type=int, help='The size of the context window.', default=5, show_default=True,
)
@click.option(
'--senseval', '-s', type=click.Choice(['2', '3']), help='Whether to run the experiment on SensEval 2 or 3.', default='3', show_default=True,
)
@click.option(
'--output', '-o', type=str, help='Name of the answer file. No path please.', default='wsd2', show_default=True,
)
@click.option(
'--mappings', '-m', type=str, help='The path to the pickled WordNet mappings.', default='data/wordnet/mappings/pickled/', show_default=True,
)
@click.option(
'--embeddings', '-e', type=str, help='The path to the pickled WordNet node embeddings.', default='data/wordnet/embeddings/epoch_500/', show_default=True,
)
@click.option(
'--first_sense', '-fs', is_flag=True, help='Whether the "First WordNet Sense" baseline should be used.',
)
@click.option(
'--info', '-i', is_flag=True, help='Whether to print some info on the console.',
)
@click.pass_context
def wsd_2(ctx, context, output, senseval, mappings, embeddings, first_sense, info):
"""
Runs Method 2 of the Word Sense Disambiguation experiment, as described in Section 3.3.2.
"""
from scripts.wsd import wsd_method2 as wsd, wsd2_exp as exp
senseval_path = 'data/senseval{}/'.format(senseval)
output_file = '{}wsd_answers/{}_{}'.format(senseval_path, output, context)
if os.path.exists(output_file):
print('File {} already exists. Please delete old file or give another filename (-o).'.format(output_file))
sys.exit(1)
input_path = senseval_path + 'processed/'
docs = []
for file in sorted(os.listdir(input_path)):
if not file.endswith('.pkl'): continue
print('Processing {}...'.format(file))
docs.append(wsd.read_pkl(os.path.join(input_path, file)))
# get all required mappings
id_map = wsd.read_pkl(mappings + '/id_mapping.pkl')
reverse_id_map = wsd.read_pkl(mappings + 'reverse_id.pkl')
gloss_map = wsd.read_pkl(mappings + 'gloss_mapping.pkl')
lemma_map = wsd.read_pkl(mappings + 'lemma_mapping.pkl')
pos_map = wsd.read_pkl(mappings + 'pos_mapping.pkl')
wordnet_map = wsd.read_pkl(mappings + '/pickled_wn_mapping.pkl')
# get required embeddings
gloss_emb = wsd.read_pkl(embeddings + '/embedding_matrix_gloss') # dummy gloss embeddings
lemma_emb = wsd.read_pkl(embeddings + '/embedding_matrix_lemma')
pos_emb = wsd.read_pkl(embeddings + '/embedding_matrix_pos')
node_emb = wsd.read_pkl(embeddings + '/wsd_node_embeddings.pkl')
disambiguator = wsd.WSD(id_map, reverse_id_map, gloss_map, gloss_emb, lemma_map,
lemma_emb, pos_map, pos_emb, node_emb, wordnet_map)
for doc in docs:
exp.run_experiment(window_size=context, document=doc, disambiguator=disambiguator, output_file=output_file, mfs=first_sense, info=info)
print('WSD answers saved under', output_file)
@main.command()
@click.option(
'--answers', '-a', type=str, help='Name of the answer file. No path please.', default='wsd1', show_default=True,
)
@click.option(
'--senseval', '-s', type=click.Choice(['2', '3']), help='Whether to use the SensEval 2 or 3 answer key.', default='3', show_default=True,
)
@click.pass_context
def score_wsd(ctx, answers, senseval):
"""
Calls the official SensEval Scorer on some system output.
"""
import subprocess
senseval_path = 'data/senseval{}/'.format(senseval)
answers = senseval_path + 'wsd_answers/' + answers
if senseval == 2:
key = senseval_path + 'raw/key_senseval2'
else:
key = senseval_path + 'raw/EnglishAW.test.key'
command = ['scripts/scoring/scorer2', answers, key]
subprocess.check_call(command)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment