diff --git a/EP/Cora_node_classification/README.md b/EP/Cora_node_classification/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/EP/Cora_node_classification/cora.py b/EP/Cora_node_classification/cora.py index cb8520a9592145f63bd5801d8436e15d58058cc1..a8cfc3c075d14c02a836e5b399ca3e811b1891c1 100644 --- a/EP/Cora_node_classification/cora.py +++ b/EP/Cora_node_classification/cora.py @@ -1,16 +1,16 @@ """ +@info Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable). Initialize Embeddings for n dimensions with initialize-module. Arrays are initialized in normal or uniform random format (default = normal). - -#Usage -get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites") +@usage +get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") -> return graph with nodes and edges To write the graph informations in file: -def write_graph_to_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/") +def write_graph_to_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = "") To write the dictionary with initalizing Embeddings in file: -def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "/home/utaemon/SP/") +def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "") """ import networkx as nx @@ -21,7 +21,7 @@ import pickle as pkl def list_of_classes(): return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"] -def read_file_and_get_nodes(graph_name, path="/home/utaemon/SP/cora/cora.content"): +def read_file_and_get_nodes(graph_name, path="/cora_data/cora.content"): class_list = list_of_classes() max_bow_len = 0 node_mapping = {} @@ -49,7 +49,7 @@ def read_file_and_get_nodes(graph_name, path="/home/utaemon/SP/cora/cora.content graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))} return node_mapping -def read_file_and_get_edges(graph_name, node_mapping, path="/home/utaemon/SP/cora/cora.cites"): +def read_file_and_get_edges(graph_name, node_mapping, path="/cora_data/cora.cites"): with open(path) as file: for line in file.readlines(): a, b = line.split() @@ -114,12 +114,12 @@ def add_max_values_to_graph(path_nodes, path_edges): #update def get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433): return initialize.get_embeddings(rand_type=rand_type, dimension = dimension, quantity=quantity) -def write_pickle_graph_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/"): +def write_pickle_graph_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = ""): g = add_max_values_to_graph(path_nodes, path_edges) with open(path_output_graph + "graph.pkl", "wb") as output: pkl.dump(g, output) -def read_pickle_graph(path = "/home/utaemon/SP/graph.pkl"): +def read_pickle_graph(path = "graph.pkl"): with open(path, 'rb') as f: graph = pkl.load(f) return graph @@ -127,5 +127,5 @@ def read_pickle_graph(path = "/home/utaemon/SP/graph.pkl"): if __name__ == "__main__": # execute only if run as a script - get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites") + get_graph(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites") get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433) diff --git a/EP/Cora_node_classification/node_classification.py b/EP/Cora_node_classification/node_classification.py index ee430ae94089cf105cb1e07d9c84362ba34358b1..1ab131557a918ca1caecbe2a294c4ac6fffa1cb8 100644 --- a/EP/Cora_node_classification/node_classification.py +++ b/EP/Cora_node_classification/node_classification.py @@ -4,6 +4,7 @@ @author: Utaemon Toyota @date: 31.1.2019 @project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics +@requirements: cora.py and random_nodes_for_node_classification.py as well as the cora data @usage: python3 node_classification.py [-g] [-e] [-s] [-i] [-n] -g / --graph Path to pickled networkX-graph -e / --embeddings Path to pickled embeddings diff --git a/EP/Cora_node_classification/random_nodes_for_node_classification.py b/EP/Cora_node_classification/random_nodes_for_node_classification.py index 59212b91ca26a340dae12ecbf094292a58793645..b4ae51dd2f2eb182d662b9733add75291a55c8a2 100644 --- a/EP/Cora_node_classification/random_nodes_for_node_classification.py +++ b/EP/Cora_node_classification/random_nodes_for_node_classification.py @@ -1,3 +1,9 @@ +""" +@requirements: cora.py +@info: Will be imported from node_classification. +""" + + import cora import pickle as pkl import random @@ -21,7 +27,7 @@ def get_random_num_nodes(set_elm, num, seed): random.seed(seed) return set(random.sample(set_elm, num)) -def get_num_random_nodes_for_all_classes_read(path = "/home/utaemon/SP/graph.pkl", num = 20, seed = 1): +def get_num_random_nodes_for_all_classes_read(path = "graph.pkl", num = 20, seed = 1): """get specific number of nodes per class, same number for all classes""" cora_dict = dict_of_node_classes_read(path) sampled_random_id_set = set() @@ -29,17 +35,3 @@ def get_num_random_nodes_for_all_classes_read(path = "/home/utaemon/SP/graph.pkl for id in get_random_num_nodes(cora_dict[key], num, seed): sampled_random_id_set.add(id) return sampled_random_id_set - - -#unused -''' -def get_num_of_random_nodes(path = "/home/utaemon/SP/graph.pkl", seed=0, num = 3): - """Get random nodes.""" - random.seed(seed) - cora_nodes = set(read_graph(path).nodes) - return set(random.sample(cora_nodes, num)) - -def pickle_output(method = get_num_random_nodes_for_all_classes_read(), output = "random_nodes.pkl"): - with open(output, "wb") as file: - pkl.dump(method, file) -''' \ No newline at end of file diff --git a/EP/cora_embeddings_2.pkl b/EP/cora_embeddings_2.pkl deleted file mode 100644 index adaa623ac5ff1f21584b7675263c197f34443b58..0000000000000000000000000000000000000000 Binary files a/EP/cora_embeddings_2.pkl and /dev/null differ diff --git a/EP/cora_embeddings_norm.pkl b/EP/cora_embeddings_norm.pkl deleted file mode 100644 index 41e8144fb8472ca1fffa492e1593e6d334b8e8c8..0000000000000000000000000000000000000000 Binary files a/EP/cora_embeddings_norm.pkl and /dev/null differ diff --git a/EP/node_classification_1000.txt b/EP/node_classification_1000.txt deleted file mode 100644 index 795ab1dd26085081773fd854d5795d71b89c95d2..0000000000000000000000000000000000000000 --- a/EP/node_classification_1000.txt +++ /dev/null @@ -1,145 +0,0 @@ -C=0.1 - -max -0.652 -avg -0.5900960000000008 - -[0.561, 0.574, 0.566, 0.595, 0.594, 0.578, 0.596, 0.578, 0.566, 0.54, 0.582, 0.591, 0.559, 0.581, 0.555, 0.618, 0.615, 0.6, 0.579, 0.572, 0.554, 0.599, 0.626, 0.614, 0.566, 0.585, 0.566, 0.597, 0.612, 0.613, 0.566, 0.579, 0.611, 0.562, 0.627, 0.624, 0.618, 0.599, 0.61, 0.612, 0.613, 0.551, 0.627, 0.581, 0.613, 0.587, 0.593, 0.574, 0.593, 0.572, 0.616, 0.606, 0.59, 0.576, 0.615, 0.589, 0.605, 0.572, 0.558, 0.553, 0.613, 0.597, 0.614, 0.575, 0.593, 0.621, 0.587, 0.55, 0.595, 0.597, 0.56, 0.599, 0.563, 0.6, 0.569, 0.584, 0.581, 0.574, 0.572, 0.618, 0.635, 0.591, 0.583, 0.549, 0.571, 0.593, 0.561, 0.594, 0.58, 0.597, 0.607, 0.6, 0.61, 0.571, 0.584, 0.61, 0.579, 0.583, 0.616, 0.608, 0.578, 0.584, 0.613, 0.617, 0.583, 0.557, 0.581, 0.595, 0.564, 0.562, 0.621, 0.613, 0.54, 0.6, 0.592, 0.613, 0.625, 0.577, 0.609, 0.606, 0.571, 0.611, 0.559, 0.62, 0.588, 0.604, 0.614, 0.565, 0.617, 0.604, 0.601, 0.62, 0.565, 0.585, 0.642, 0.553, 0.574, 0.599, 0.586, 0.599, 0.582, 0.627, 0.569, 0.608, 0.634, 0.618, 0.604, 0.565, 0.583, 0.564, 0.595, 0.587, 0.564, 0.564, 0.589, 0.589, 0.582, 0.594, 0.624, 0.595, 0.586, 0.629, 0.561, 0.568, 0.563, 0.6, 0.576, 0.575, 0.627, 0.591, 0.628, 0.636, 0.579, 0.573, 0.599, 0.633, 0.589, 0.578, 0.568, 0.623, 0.559, 0.585, 0.567, 0.622, 0.632, 0.617, 0.615, 0.592, 0.618, 0.596, 0.596, 0.588, 0.578, 0.564, 0.571, 0.572, 0.652, 0.55, 0.571, 0.638, 0.583, 0.558, 0.567, 0.608, 0.597, 0.538, 0.579, 0.601, 0.603, 0.593, 0.603, 0.598, 0.572, 0.604, 0.578, 0.595, 0.628, 0.603, 0.603, 0.588, 0.598, 0.557, 0.59, 0.586, 0.568, 0.582, 0.608, 0.589, 0.593, 0.59, 0.582, 0.576, 0.603, 0.577, 0.606, 0.579, 0.609, 0.615, 0.591, 0.559, 0.549, 0.591, 0.614, 0.607, 0.576, 0.586, 0.617, 0.594, 0.57, 0.606, 0.593, 0.553, 0.592, 0.581, 0.58, 0.613, 0.609, 0.579, 0.582, 0.615, 0.619, 0.569, 0.579, 0.599, 0.591, 0.61, 0.592, 0.572, 0.626, 0.619, 0.573, 0.608, 0.615, 0.579, 0.583, 0.574, 0.604, 0.603, 0.602, 0.618, 0.551, 0.616, 0.597, 0.576, 0.587, 0.586, 0.593, 0.581, 0.584, 0.589, 0.574, 0.594, 0.58, 0.585, 0.598, 0.597, 0.581, 0.62, 0.587, 0.626, 0.617, 0.613, 0.579, 0.57, 0.616, 0.594, 0.58, 0.616, 0.631, 0.558, 0.589, 0.598, 0.587, 0.562, 0.572, 0.594, 0.59, 0.587, 0.583, 0.6, 0.609, 0.588, 0.601, 0.568, 0.6, 0.624, 0.604, 0.564, 0.569, 0.618, 0.639, 0.566, 0.624, 0.609, 0.556, 0.579, 0.62, 0.59, 0.591, 0.521, 0.607, 0.597, 0.557, 0.597, 0.619, 0.573, 0.574, 0.608, 0.537, 0.603, 0.601, 0.587, 0.607, 0.635, 0.573, 0.603, 0.572, 0.59, 0.572, 0.619, 0.617, 0.552, 0.586, 0.594, 0.584, 0.604, 0.621, 0.591, 0.605, 0.567, 0.606, 0.601, 0.57, 0.606, 0.552, 0.572, 0.584, 0.577, 0.587, 0.612, 0.542, 0.584, 0.581, 0.589, 0.596, 0.588, 0.619, 0.559, 0.575, 0.602, 0.607, 0.616, 0.576, 0.631, 0.623, 0.595, 0.597, 0.568, 0.574, 0.602, 0.568, 0.613, 0.608, 0.599, 0.595, 0.632, 0.588, 0.589, 0.585, 0.596, 0.598, 0.634, 0.582, 0.613, 0.609, 0.632, 0.588, 0.597, 0.553, 0.56, 0.575, 0.567, 0.609, 0.531, 0.584, 0.579, 0.584, 0.568, 0.601, 0.601, 0.603, 0.573, 0.594, 0.564, 0.608, 0.593, 0.584, 0.581, 0.556, 0.584, 0.587, 0.585, 0.586, 0.625, 0.603, 0.615, 0.631, 0.611, 0.611, 0.591, 0.585, 0.559, 0.58, 0.573, 0.614, 0.612, 0.6, 0.605, 0.557, 0.575, 0.574, 0.576, 0.62, 0.624, 0.609, 0.571, 0.592, 0.58, 0.596, 0.613, 0.581, 0.595, 0.58, 0.587, 0.576, 0.602, 0.58, 0.585, 0.604, 0.623, 0.575, 0.603, 0.565, 0.596, 0.563, 0.604, 0.608, 0.593, 0.578, 0.603, 0.625, 0.598, 0.583, 0.605, 0.581, 0.551, 0.59, 0.556, 0.59, 0.597, 0.605, 0.585, 0.598, 0.618, 0.589, 0.576, 0.601, 0.591, 0.617, 0.565, 0.57, 0.568, 0.593, 0.591, 0.566, 0.581, 0.604, 0.61, 0.594, 0.595, 0.586, 0.607, 0.552, 0.614, 0.604, 0.619, 0.57, 0.564, 0.612, 0.639, 0.601, 0.626, 0.602, 0.564, 0.588, 0.592, 0.577, 0.56, 0.585, 0.579, 0.578, 0.583, 0.561, 0.609, 0.549, 0.598, 0.625, 0.607, 0.611, 0.587, 0.61, 0.614, 0.572, 0.602, 0.594, 0.601, 0.617, 0.579, 0.602, 0.583, 0.589, 0.608, 0.559, 0.604, 0.618, 0.611, 0.588, 0.614, 0.594, 0.601, 0.58, 0.567, 0.603, 0.531, 0.579, 0.646, 0.573, 0.598, 0.565, 0.58, 0.596, 0.587, 0.553, 0.586, 0.589, 0.599, 0.58, 0.6, 0.595, 0.563, 0.611, 0.618, 0.575, 0.586, 0.588, 0.588, 0.598, 0.563, 0.564, 0.594, 0.637, 0.58, 0.568, 0.6, 0.605, 0.609, 0.569, 0.589, 0.607, 0.588, 0.6, 0.602, 0.615, 0.577, 0.583, 0.613, 0.622, 0.576, 0.576, 0.616, 0.596, 0.57, 0.609, 0.555, 0.614, 0.601, 0.578, 0.569, 0.625, 0.576, 0.601, 0.57, 0.591, 0.569, 0.575, 0.593, 0.62, 0.557, 0.578, 0.559, 0.55, 0.586, 0.598, 0.565, 0.596, 0.577, 0.593, 0.557, 0.598, 0.562, 0.621, 0.606, 0.556, 0.593, 0.591, 0.577, 0.544, 0.588, 0.57, 0.585, 0.575, 0.596, 0.619, 0.567, 0.585, 0.58, 0.593, 0.559, 0.576, 0.587, 0.613, 0.594, 0.624, 0.603, 0.549, 0.585, 0.543, 0.585, 0.586, 0.596, 0.584, 0.578, 0.571, 0.601, 0.59, 0.557, 0.583, 0.584, 0.569, 0.575, 0.569, 0.595, 0.589, 0.597, 0.592, 0.556, 0.57, 0.558, 0.588, 0.635, 0.604, 0.634, 0.59, 0.62, 0.566, 0.58, 0.636, 0.565, 0.545, 0.617, 0.599, 0.611, 0.605, 0.562, 0.593, 0.571, 0.588, 0.627, 0.557, 0.549, 0.583, 0.615, 0.596, 0.62, 0.554, 0.604, 0.598, 0.589, 0.598, 0.614, 0.547, 0.61, 0.547, 0.582, 0.588, 0.578, 0.597, 0.613, 0.581, 0.549, 0.579, 0.553, 0.543, 0.584, 0.632, 0.568, 0.592, 0.612, 0.58, 0.567, 0.61, 0.562, 0.574, 0.551, 0.589, 0.603, 0.621, 0.576, 0.63, 0.613, 0.599, 0.599, 0.602, 0.578, 0.546, 0.596, 0.583, 0.622, 0.641, 0.575, 0.547, 0.589, 0.569, 0.601, 0.602, 0.567, 0.628, 0.584, 0.58, 0.567, 0.578, 0.599, 0.586, 0.58, 0.636, 0.593, 0.63, 0.582, 0.589, 0.605, 0.519, 0.585, 0.588, 0.595, 0.605, 0.585, 0.606, 0.587, 0.581, 0.579, 0.608, 0.54, 0.621, 0.618, 0.599, 0.614, 0.617, 0.609, 0.564, 0.639, 0.57, 0.577, 0.573, 0.61, 0.599, 0.55, 0.586, 0.52, 0.576, 0.617, 0.587, 0.61, 0.591, 0.597, 0.561, 0.558, 0.586, 0.596, 0.551, 0.509, 0.614, 0.587, 0.573, 0.616, 0.608, 0.57, 0.606, 0.601, 0.585, 0.587, 0.594, 0.528, 0.57, 0.625, 0.607, 0.603, 0.571, 0.62, 0.595, 0.593, 0.614, 0.604, 0.566, 0.592, 0.574, 0.594, 0.592, 0.604, 0.602, 0.603, 0.582, 0.615, 0.588, 0.589, 0.625, 0.603, 0.574, 0.574, 0.595, 0.588, 0.592, 0.565, 0.604, 0.562, 0.589, 0.565, 0.559, 0.572, 0.581, 0.557, 0.613, 0.584, 0.57, 0.592, 0.581, 0.589, 0.597, 0.577, 0.637, 0.59, 0.599, 0.568, 0.612, 0.634, 0.586, 0.586, 0.609, 0.581, 0.608, 0.58, 0.565, 0.595, 0.575, 0.584, 0.563, 0.56, 0.624, 0.589, 0.621, 0.597, 0.575, 0.57, 0.597, 0.566, 0.582, 0.575, 0.584, 0.618, 0.586, 0.63, 0.584, 0.613, 0.649, 0.603, 0.575, 0.574, 0.575, 0.577, 0.605, 0.608, 0.622, 0.582, 0.58, 0.573, 0.581, 0.584, 0.542, 0.572, 0.589, 0.577, 0.592, 0.577, 0.584, 0.6, 0.581, 0.628, 0.616, 0.564, 0.578, 0.574, 0.602, 0.587, 0.602, 0.596, 0.602, 0.57, 0.606, 0.572, 0.595, 0.554, 0.578, 0.591, 0.549, 0.598, 0.613, 0.545, 0.573, 0.635, 0.587, 0.611, 0.589, 0.598, 0.611, 0.611, 0.576, 0.564, 0.575, 0.566, 0.601, 0.642, 0.584, 0.616, 0.565, 0.608, 0.606, 0.585, 0.587, 0.592, 0.612, 0.583, 0.593, 0.612, 0.6, 0.583, 0.629, 0.616, 0.594, 0.598, 0.58, 0.572] - - -C Bestimmung -/usr/bin/python3 /home/utaemon/PycharmProjects/Softwareprojekt/node_classification.py -Iteration/Random Seed: 0 -C = 0.01 -0.558 -C = 0.1 -0.564 -C = 0.5 -0.564 -C = 1.0 -0.561 -C = 5.0 -0.542 -C = 10.0 -0.544 -Iteration/Random Seed: 1 -C = 0.01 -0.642 -C = 0.1 -0.635 -C = 0.5 -0.628 -C = 1.0 -0.619 -C = 5.0 -0.611 -C = 10.0 -0.61 -Iteration/Random Seed: 2 -C = 0.01 -0.549 -C = 0.1 -0.57 -C = 0.5 -0.553 -C = 1.0 -0.548 -C = 5.0 -0.539 -C = 10.0 -0.538 -Iteration/Random Seed: 3 -C = 0.01 -0.629 -C = 0.1 -0.624 -C = 0.5 -0.608 -C = 1.0 -0.603 -C = 5.0 -0.578 -C = 10.0 -0.575 -Iteration/Random Seed: 4 -C = 0.01 -0.62 -C = 0.1 -0.638 -C = 0.5 -0.632 -C = 1.0 -0.632 -C = 5.0 -0.629 -C = 10.0 -0.631 -Iteration/Random Seed: 5 -C = 0.01 -0.595 -C = 0.1 -0.615 -C = 0.5 -0.62 -C = 1.0 -0.622 -C = 5.0 -0.616 -C = 10.0 -0.61 -Iteration/Random Seed: 6 -C = 0.01 -0.607 -C = 0.1 -0.626 -C = 0.5 -0.618 -C = 1.0 -0.61 -C = 5.0 -0.598 -C = 10.0 -0.589 -Iteration/Random Seed: 7 -C = 0.01 -0.596 -C = 0.1 -0.603 -C = 0.5 -0.587 -C = 1.0 -0.581 -C = 5.0 -0.563 -C = 10.0 -0.561 -Iteration/Random Seed: 8 -C = 0.01 -0.602 -C = 0.1 -0.619 -C = 0.5 -0.617 -C = 1.0 -0.611 -C = 5.0 -0.608 -C = 10.0 -0.605 -Iteration/Random Seed: 9 -C = 0.01 -0.58 -C = 0.1 -0.595 -C = 0.5 -0.585 -C = 1.0 -0.577 -C = 5.0 -0.563 -C = 10.0 -0.56 - -Process finished with exit code 0 -