From 9038e14b1a9c351c92f6e0eae1ce2b1978248135 Mon Sep 17 00:00:00 2001
From: Utaemon Toyota <toyota@cl.uni-heidelberg.de>
Date: Tue, 26 Feb 2019 20:36:52 +0100
Subject: [PATCH] add README and add argparse to cora.py

---
 EP/Cora_node_classification/README.md |  6 +++++-
 EP/Cora_node_classification/cora.py   | 25 +++++++++++++++++--------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/EP/Cora_node_classification/README.md b/EP/Cora_node_classification/README.md
index 526942e..adfa8e5 100644
--- a/EP/Cora_node_classification/README.md
+++ b/EP/Cora_node_classification/README.md
@@ -8,6 +8,7 @@ Graph Embedding Propagation
 
 # Cora Node Classification
 To evaluate the trained graph and the embeddings the task of node classification will be executed. First, the data of cora will be imported into a networkX graph, which will be saved in a pickle file to use it for the training of the embeddings with our EP-SP algorithm. Afterwards the trained embedding will be evaluated with LibLinear L2-Logistic Regression provided from sklearn.
+Graph building is provided on cora.py, the evaluation on node_classification.py.
 
 # Required Data
 - Cora dataset saved in cora_data for building the graph
@@ -33,7 +34,10 @@ For node_classification.py
 
 # Running instructions
 For cora.py
-...
+python3 cora.py [-n] [-e] [-o]
+	-n / --nodes	Path to cora file containing nodes
+	-e / --edges	Path to cora file containing edges
+	-o / --output	Path where the graph should be saved
 
 For node_classification.py
 python3 node_classification.py [-g] [-e] [-s] [-i] [-n]
diff --git a/EP/Cora_node_classification/cora.py b/EP/Cora_node_classification/cora.py
index 95c21df..1eed9f8 100644
--- a/EP/Cora_node_classification/cora.py
+++ b/EP/Cora_node_classification/cora.py
@@ -1,14 +1,20 @@
 """
+@project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics
+@requirements: cora data, numpy, networkX, pickle
 @info
-Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable).
+Getting a networkx graph from Cora. Graph will be saved in a pickle file.
 
 @usage
-get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites")
--> return graph with nodes and edges
-To write the graph informations in file:
-def write_graph_to_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = "")
+python3 cora.py [-n] [-e] [-o]
+	-n / --nodes	Path to cora file containing nodes
+	-e / --edges	Path to cora file containing edges
+	-o / --output	Path where the graph should be saved
+
+As a module (used in node_classification.py) you can access the graph with 
+read_pickle_graph("path_to_cora_graph")
 """
 
+import argparse
 import networkx as nx
 import numpy as np
 import pickle as pkl
@@ -116,8 +122,11 @@ def read_pickle_graph(path = "graph.pkl"):
         graph = pkl.load(f)
     return graph
 
-
 if __name__ == "__main__":
     # execute only if run as a script
-    get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites")
-    get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433)
+    parser = argparse.ArgumentParser(description="Skript for building cora graph.")
+    parser.add_argument("-n", "--nodes", default="/cora_data/cora.content", help="path to file containing cora nodes")
+    parser.add_argument("-e", "--edges", default="/home/utaemon/SP/cora/cora.cites", help="path to file containing edges/citations")
+    parser.add_argument("-o", "--output", default="", help="path where the graph should be saved")
+    args = parser.parse_args()
+    write_pickle_graph_file(path_nodes=args.nodes, path_edges=args.edges, path_output_graph=args.output)
-- 
GitLab