From 9038e14b1a9c351c92f6e0eae1ce2b1978248135 Mon Sep 17 00:00:00 2001 From: Utaemon Toyota <toyota@cl.uni-heidelberg.de> Date: Tue, 26 Feb 2019 20:36:52 +0100 Subject: [PATCH] add README and add argparse to cora.py --- EP/Cora_node_classification/README.md | 6 +++++- EP/Cora_node_classification/cora.py | 25 +++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/EP/Cora_node_classification/README.md b/EP/Cora_node_classification/README.md index 526942e..adfa8e5 100644 --- a/EP/Cora_node_classification/README.md +++ b/EP/Cora_node_classification/README.md @@ -8,6 +8,7 @@ Graph Embedding Propagation # Cora Node Classification To evaluate the trained graph and the embeddings the task of node classification will be executed. First, the data of cora will be imported into a networkX graph, which will be saved in a pickle file to use it for the training of the embeddings with our EP-SP algorithm. Afterwards the trained embedding will be evaluated with LibLinear L2-Logistic Regression provided from sklearn. +Graph building is provided on cora.py, the evaluation on node_classification.py. # Required Data - Cora dataset saved in cora_data for building the graph @@ -33,7 +34,10 @@ For node_classification.py # Running instructions For cora.py -... +python3 cora.py [-n] [-e] [-o] + -n / --nodes Path to cora file containing nodes + -e / --edges Path to cora file containing edges + -o / --output Path where the graph should be saved For node_classification.py python3 node_classification.py [-g] [-e] [-s] [-i] [-n] diff --git a/EP/Cora_node_classification/cora.py b/EP/Cora_node_classification/cora.py index 95c21df..1eed9f8 100644 --- a/EP/Cora_node_classification/cora.py +++ b/EP/Cora_node_classification/cora.py @@ -1,14 +1,20 @@ """ +@project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics +@requirements: cora data, numpy, networkX, pickle @info -Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable). +Getting a networkx graph from Cora. Graph will be saved in a pickle file. @usage -get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") --> return graph with nodes and edges -To write the graph informations in file: -def write_graph_to_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = "") +python3 cora.py [-n] [-e] [-o] + -n / --nodes Path to cora file containing nodes + -e / --edges Path to cora file containing edges + -o / --output Path where the graph should be saved + +As a module (used in node_classification.py) you can access the graph with +read_pickle_graph("path_to_cora_graph") """ +import argparse import networkx as nx import numpy as np import pickle as pkl @@ -116,8 +122,11 @@ def read_pickle_graph(path = "graph.pkl"): graph = pkl.load(f) return graph - if __name__ == "__main__": # execute only if run as a script - get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") - get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433) + parser = argparse.ArgumentParser(description="Skript for building cora graph.") + parser.add_argument("-n", "--nodes", default="/cora_data/cora.content", help="path to file containing cora nodes") + parser.add_argument("-e", "--edges", default="/home/utaemon/SP/cora/cora.cites", help="path to file containing edges/citations") + parser.add_argument("-o", "--output", default="", help="path where the graph should be saved") + args = parser.parse_args() + write_pickle_graph_file(path_nodes=args.nodes, path_edges=args.edges, path_output_graph=args.output) -- GitLab