diff --git a/EP/Cora_node_classification/README.md b/EP/Cora_node_classification/README.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..526942e7f3c2f27eca99c640c67becddd632ef99 100644 --- a/EP/Cora_node_classification/README.md +++ b/EP/Cora_node_classification/README.md @@ -0,0 +1,45 @@ +# AUTHORS +Lyuba Dimitrova, Nadia Arslan, Nicolas Weber, Utaemon Toyota + +# PROJECT +Softwareprojekt WS2018/19 +Betreuerin: Prof. Dr. Anette Frank +Graph Embedding Propagation + +# Cora Node Classification +To evaluate the trained graph and the embeddings the task of node classification will be executed. First, the data of cora will be imported into a networkX graph, which will be saved in a pickle file to use it for the training of the embeddings with our EP-SP algorithm. Afterwards the trained embedding will be evaluated with LibLinear L2-Logistic Regression provided from sklearn. + +# Required Data +- Cora dataset saved in cora_data for building the graph +- Embeddings for node classification + +# Dependencies +For cora.py +-networkx for building the graph +-numpy to save one-hot vocabulary vectors +-pickle to save data in a pickle file + +For node_classification.py +-cora.py +-random_nodes_for_node_classification.py for getting random node sets for test, training and validation sets +-pickle +-numpy +-sklearn for evaluation +-random for getting random test, trainings and validation sets +-sys +-argparse +-heapq for getting a heatmap from confusion matrix +-sklearn for confusion matrix and f1 score + +# Running instructions +For cora.py +... + +For node_classification.py +python3 node_classification.py [-g] [-e] [-s] [-i] [-n] + -g / --graph Path to pickled networkX-graph + -e / --embeddings Path to pickled embeddings + -s / --seed Seed for randomization. If this argument is called, only a node classification for this specific seed will be executed + -i / --iterations Number of iterations of node classification. Counter of iteration is equal to random seed + -n / --number Number of instances per class for training + -c / --regularization Inverse of regularization strength diff --git a/EP/Cora_node_classification/cora.py b/EP/Cora_node_classification/cora.py index a8cfc3c075d14c02a836e5b399ca3e811b1891c1..95c21dfdba73ffa25f34f8cfa1a83aa79f4478c5 100644 --- a/EP/Cora_node_classification/cora.py +++ b/EP/Cora_node_classification/cora.py @@ -1,21 +1,16 @@ """ @info Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable). -Initialize Embeddings for n dimensions with initialize-module. -Arrays are initialized in normal or uniform random format (default = normal). @usage get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") -> return graph with nodes and edges To write the graph informations in file: def write_graph_to_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = "") -To write the dictionary with initalizing Embeddings in file: -def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "") """ import networkx as nx import numpy as np -import initialize import pickle as pkl def list_of_classes(): @@ -111,9 +106,6 @@ def add_max_values_to_graph(path_nodes, path_edges): #update Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges) return Cora_graph -def get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433): - return initialize.get_embeddings(rand_type=rand_type, dimension = dimension, quantity=quantity) - def write_pickle_graph_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = ""): g = add_max_values_to_graph(path_nodes, path_edges) with open(path_output_graph + "graph.pkl", "wb") as output: