Skip to content
Snippets Groups Projects
Commit 3dba9dfb authored by toyota's avatar toyota
Browse files

undo

parent 7a37af64
No related branches found
No related tags found
No related merge requests found
""" """
@project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable).
@requirements: cora data, numpy, networkX, pickle Initialize Embeddings for n dimensions with initialize-module.
@info Arrays are initialized in normal or uniform random format (default = normal).
Getting a networkx graph from Cora. Graph will be saved in a pickle file.
@usage #Usage
python3 cora.py [-n] [-e] [-o] get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
-n / --nodes Path to cora file containing nodes -> return graph with nodes and edges
-e / --edges Path to cora file containing edges To write the graph informations in file:
-o / --output Path where the graph should be saved def write_graph_to_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/")
To write the dictionary with initalizing Embeddings in file:
def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "/home/utaemon/SP/")
""" """
import argparse
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import pickle as pkl import pickle as pkl
import os
def list_of_classes(): def list_of_classes():
return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"] return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"]
def read_file_and_get_nodes(graph_name, path="/../../data/cora/raw/cora.content"): def read_file_and_get_nodes(graph_name, path):
class_list = list_of_classes() class_list = list_of_classes()
max_bow_len = 0 max_bow_len = 0
node_mapping = {} node_mapping = {}
...@@ -47,7 +49,7 @@ def read_file_and_get_nodes(graph_name, path="/../../data/cora/raw/cora.content" ...@@ -47,7 +49,7 @@ def read_file_and_get_nodes(graph_name, path="/../../data/cora/raw/cora.content"
graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))} graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))}
return node_mapping return node_mapping
def read_file_and_get_edges(graph_name, node_mapping, path="/../../data/cora/raw/cora.cites"): def read_file_and_get_edges(graph_name, node_mapping, path):
with open(path) as file: with open(path) as file:
for line in file.readlines(): for line in file.readlines():
a, b = line.split() a, b = line.split()
...@@ -55,6 +57,7 @@ def read_file_and_get_edges(graph_name, node_mapping, path="/../../data/cora/raw ...@@ -55,6 +57,7 @@ def read_file_and_get_edges(graph_name, node_mapping, path="/../../data/cora/raw
#---------------------create graph-------------- #---------------------create graph--------------
def get_graph(path_nodes, path_edges): def get_graph(path_nodes, path_edges):
Cora_graph = nx.Graph() Cora_graph = nx.Graph()
node_mapping = read_file_and_get_nodes(Cora_graph, path_nodes) node_mapping = read_file_and_get_nodes(Cora_graph, path_nodes)
...@@ -108,21 +111,26 @@ def add_max_values_to_graph(path_nodes, path_edges): #update ...@@ -108,21 +111,26 @@ def add_max_values_to_graph(path_nodes, path_edges): #update
Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges) Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges)
return Cora_graph return Cora_graph
def write_pickle_graph_file(path_nodes="/../../data/cora/raw/cora.content", path_edges="/../../data/cora/raw/cora.cites", path_output_graph = "/../../data/cora/graph/"): # not used, initialization happens in EP
'''
def get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433):
return initialize.get_embeddings(rand_type=rand_type, dimension = dimension, quantity=quantity)
'''
def write_pickle_graph_file(path_nodes, path_edges, output_path):
g = add_max_values_to_graph(path_nodes, path_edges) g = add_max_values_to_graph(path_nodes, path_edges)
with open(path_output_graph + "cora_graph.pkl", "wb") as output:
path = os.path.split(output_path)[0]
if not os.path.exists(path):
os.mkdir(path)
with open(output_path, "wb") as output:
pkl.dump(g, output) pkl.dump(g, output)
def read_pickle_graph(path = "graph.pkl"): #will be used on node_classification.py for accessing the graph
with open(path, 'rb') as f:
graph = pkl.load(f)
return graph
'''
if __name__ == "__main__": if __name__ == "__main__":
# execute only if run as a script # execute only if run as a script
parser = argparse.ArgumentParser(description="Skript for building cora graph.") get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
parser.add_argument("-n", "--nodes", default="/../../data/cora/raw/cora.content", help="path to file containing cora nodes") # get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433)
parser.add_argument("-e", "--edges", default="/../../data/cora/raw/cora.cites", help="path to file containing edges/citations") '''
parser.add_argument("-o", "--output", default="/../../data/cora/graph/", help="path where the graph should be saved")
args = parser.parse_args()
write_pickle_graph_file(path_nodes=args.nodes, path_edges=args.edges, path_output_graph=args.output)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment