diff --git a/EP/Cora_node_classification/README.md b/EP/Cora_node_classification/README.md index 526942e7f3c2f27eca99c640c67becddd632ef99..adfa8e5867a984d3ba8e160eb771ff765c568818 100644 --- a/EP/Cora_node_classification/README.md +++ b/EP/Cora_node_classification/README.md @@ -8,6 +8,7 @@ Graph Embedding Propagation # Cora Node Classification To evaluate the trained graph and the embeddings the task of node classification will be executed. First, the data of cora will be imported into a networkX graph, which will be saved in a pickle file to use it for the training of the embeddings with our EP-SP algorithm. Afterwards the trained embedding will be evaluated with LibLinear L2-Logistic Regression provided from sklearn. +Graph building is provided on cora.py, the evaluation on node_classification.py. # Required Data - Cora dataset saved in cora_data for building the graph @@ -33,7 +34,10 @@ For node_classification.py # Running instructions For cora.py -... +python3 cora.py [-n] [-e] [-o] + -n / --nodes Path to cora file containing nodes + -e / --edges Path to cora file containing edges + -o / --output Path where the graph should be saved For node_classification.py python3 node_classification.py [-g] [-e] [-s] [-i] [-n] diff --git a/EP/Cora_node_classification/cora.py b/EP/Cora_node_classification/cora.py index 95c21dfdba73ffa25f34f8cfa1a83aa79f4478c5..1eed9f8dc3a0835a6775d9f9052754f4d5a02327 100644 --- a/EP/Cora_node_classification/cora.py +++ b/EP/Cora_node_classification/cora.py @@ -1,14 +1,20 @@ """ +@project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics +@requirements: cora data, numpy, networkX, pickle @info -Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable). +Getting a networkx graph from Cora. Graph will be saved in a pickle file. @usage -get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") --> return graph with nodes and edges -To write the graph informations in file: -def write_graph_to_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = "") +python3 cora.py [-n] [-e] [-o] + -n / --nodes Path to cora file containing nodes + -e / --edges Path to cora file containing edges + -o / --output Path where the graph should be saved + +As a module (used in node_classification.py) you can access the graph with +read_pickle_graph("path_to_cora_graph") """ +import argparse import networkx as nx import numpy as np import pickle as pkl @@ -116,8 +122,11 @@ def read_pickle_graph(path = "graph.pkl"): graph = pkl.load(f) return graph - if __name__ == "__main__": # execute only if run as a script - get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") - get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433) + parser = argparse.ArgumentParser(description="Skript for building cora graph.") + parser.add_argument("-n", "--nodes", default="/cora_data/cora.content", help="path to file containing cora nodes") + parser.add_argument("-e", "--edges", default="/home/utaemon/SP/cora/cora.cites", help="path to file containing edges/citations") + parser.add_argument("-o", "--output", default="", help="path where the graph should be saved") + args = parser.parse_args() + write_pickle_graph_file(path_nodes=args.nodes, path_edges=args.edges, path_output_graph=args.output)