Skip to content
Snippets Groups Projects
Commit 3dba9dfb authored by toyota's avatar toyota
Browse files

undo

parent 7a37af64
No related branches found
No related tags found
No related merge requests found
"""
@project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics
@requirements: cora data, numpy, networkX, pickle
@info
Getting a networkx graph from Cora. Graph will be saved in a pickle file.
@usage
python3 cora.py [-n] [-e] [-o]
-n / --nodes Path to cora file containing nodes
-e / --edges Path to cora file containing edges
-o / --output Path where the graph should be saved
Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable).
Initialize Embeddings for n dimensions with initialize-module.
Arrays are initialized in normal or uniform random format (default = normal).
#Usage
get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
-> return graph with nodes and edges
To write the graph informations in file:
def write_graph_to_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/")
To write the dictionary with initalizing Embeddings in file:
def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "/home/utaemon/SP/")
"""
import argparse
import networkx as nx
import numpy as np
import pickle as pkl
import os
def list_of_classes():
return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"]
def read_file_and_get_nodes(graph_name, path="/../../data/cora/raw/cora.content"):
def read_file_and_get_nodes(graph_name, path):
class_list = list_of_classes()
max_bow_len = 0
node_mapping = {}
......@@ -47,7 +49,7 @@ def read_file_and_get_nodes(graph_name, path="/../../data/cora/raw/cora.content"
graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))}
return node_mapping
def read_file_and_get_edges(graph_name, node_mapping, path="/../../data/cora/raw/cora.cites"):
def read_file_and_get_edges(graph_name, node_mapping, path):
with open(path) as file:
for line in file.readlines():
a, b = line.split()
......@@ -55,6 +57,7 @@ def read_file_and_get_edges(graph_name, node_mapping, path="/../../data/cora/raw
#---------------------create graph--------------
def get_graph(path_nodes, path_edges):
Cora_graph = nx.Graph()
node_mapping = read_file_and_get_nodes(Cora_graph, path_nodes)
......@@ -108,21 +111,26 @@ def add_max_values_to_graph(path_nodes, path_edges): #update
Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges)
return Cora_graph
def write_pickle_graph_file(path_nodes="/../../data/cora/raw/cora.content", path_edges="/../../data/cora/raw/cora.cites", path_output_graph = "/../../data/cora/graph/"):
# not used, initialization happens in EP
'''
def get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433):
return initialize.get_embeddings(rand_type=rand_type, dimension = dimension, quantity=quantity)
'''
def write_pickle_graph_file(path_nodes, path_edges, output_path):
g = add_max_values_to_graph(path_nodes, path_edges)
with open(path_output_graph + "cora_graph.pkl", "wb") as output:
path = os.path.split(output_path)[0]
if not os.path.exists(path):
os.mkdir(path)
with open(output_path, "wb") as output:
pkl.dump(g, output)
def read_pickle_graph(path = "graph.pkl"): #will be used on node_classification.py for accessing the graph
with open(path, 'rb') as f:
graph = pkl.load(f)
return graph
'''
if __name__ == "__main__":
# execute only if run as a script
parser = argparse.ArgumentParser(description="Skript for building cora graph.")
parser.add_argument("-n", "--nodes", default="/../../data/cora/raw/cora.content", help="path to file containing cora nodes")
parser.add_argument("-e", "--edges", default="/../../data/cora/raw/cora.cites", help="path to file containing edges/citations")
parser.add_argument("-o", "--output", default="/../../data/cora/graph/", help="path where the graph should be saved")
args = parser.parse_args()
write_pickle_graph_file(path_nodes=args.nodes, path_edges=args.edges, path_output_graph=args.output)
get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
# get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433)
'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment