diff --git a/scripts/node_classification/README.md b/scripts/node_classification/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2e9668e6f13e4b904d81c51b9e2a2b3cae25bdee --- /dev/null +++ b/scripts/node_classification/README.md @@ -0,0 +1,34 @@ +# AUTHORS +Lyuba Dimitrova, Nadia Arslan, Nicolas Weber, Utaemon Toyota + +# PROJECT +Softwareprojekt WS2018/19 +Betreuerin: Prof. Dr. Anette Frank +Graph Embedding Propagation + +# Cora Node Classification +To evaluate the trained graph and the embeddings the task of node classification will be executed. First, the data of cora will be imported into a networkX graph, which will be saved in a pickle file to use it for the training of the embeddings with our EP-SP algorithm. Afterwards the trained embeddings will be evaluated with LibLinear L2-Logistic Regression provided from sklearn over a transductive setting with 1000 random nodes for validation, 1000 random nodes for testing and 20 random nodes per class for training. For each iteration where the sets are newly splitted the random seed is set to to the iteration number. +Graph building is provided on cora.py, the evaluation on node_classification.py. + +# Required Data +- Cora Graph saved in data/cora/graph/ +- Embeddings for node classification data/cora/embeddings/ + +# Dependencies +-pickle +-numpy +-sklearn for evaluation +-random for getting random test, trainings and validation sets +-sys +-argparse +-heapq for getting a heatmap from confusion matrix +-sklearn for confusion matrix and f1 score + +# Running instructions +python3 nc_experiments.py [-g] [-e] [-s] [-i] [-n] + -g / --graph Path to pickled networkX-graph + -e / --embeddings Path to pickled embeddings + -s / --seed Seed for randomization. If this argument is called, only a node classification for this specific seed will be executed + -i / --iterations Number of iterations of node classification. Counter of iteration is equal to random seed + -n / --number Number of instances per class for training + -c / --regularization Inverse of regularization strength diff --git a/scripts/preprocessing/cora/README.md b/scripts/preprocessing/cora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6b7d3cc621fb01d5d1b340b3d5184b9a94e264ae --- /dev/null +++ b/scripts/preprocessing/cora/README.md @@ -0,0 +1,25 @@ +# AUTHORS +Lyuba Dimitrova, Nadia Arslan, Nicolas Weber, Utaemon Toyota + +# PROJECT +Softwareprojekt WS2018/19 +Betreuerin: Prof. Dr. Anette Frank +Graph Embedding Propagation + +# Building Cora Graph +With this skript a networkX graph will be created from the raw data. +Cause of using numpy for bow-array-representations the data has to be saved in pickle format, and not f.e. json. + +# Required Data +- Cora raw data saved in /data/cora/raw/ + +# Dependencies +-networkx for building the graph +-numpy to save one-hot vocabulary vectors +-pickle to save data in a pickle file + +# Running instructions +python3 cora.py [-n] [-e] [-o] + -n / --nodes Path to cora file containing nodes + -e / --edges Path to cora file containing edges + -o / --output Path where the graph should be saved diff --git a/scripts/preprocessing/cora/cora.py b/scripts/preprocessing/cora/cora.py index 5b931c78a41ed09cddb100c052f61eb9ba352d40..f423b0a5d3ec141c74215ef2fd9dc6df447a43da 100644 --- a/scripts/preprocessing/cora/cora.py +++ b/scripts/preprocessing/cora/cora.py @@ -5,12 +5,10 @@ Arrays are initialized in normal or uniform random format (default = normal). #Usage -get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites") +get_graph(path_nodes="/../../data/cora/raw/cora.content", path_edges="/../../data/cora/raw/cora.cites") -> return graph with nodes and edges To write the graph informations in file: -def write_graph_to_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/") -To write the dictionary with initalizing Embeddings in file: -def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "/home/utaemon/SP/") +def write_graph_to_file(path_nodes="/../../data/cora/raw/cora.content", path_edges="/../../data/cora/raw/cora.cites", path_output_graph = "/../../data/cora/graph/") """ import networkx as nx