Skip to content
Snippets Groups Projects
Commit ec9bba65 authored by toyota's avatar toyota
Browse files

bug fix

parent 0b0c3aa7
No related branches found
No related tags found
No related merge requests found
"""
Getting a networkx graph from Cora. Graph can be saved in pickle file.
Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable).
Initialize Embeddings for n dimensions with initialize-module.
Arrays are initialized in normal or uniform random format (default = normal).
......@@ -21,39 +21,50 @@ import pickle as pkl
def list_of_classes():
return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"]
def read_file_and_get_nodes(graph_name, path="cora/cora.content"):
def read_file_and_get_nodes(graph_name, path="/home/utaemon/SP/cora/cora.content"):
class_list = list_of_classes()
max_bow_len = 0
node_mapping = {}
length_array = np.empty((0))
with open(path) as file:
counter = 0
for line in file.readlines():
split_line = np.array(line.split())
paper_id, split_line = split_line[0], split_line[1:]
node_mapping[paper_id] = counter
paper_id = counter
counter += 1
paper_class, all_bow_of_paper = split_line[-1], split_line[:-1]
paper_bow = np.where(all_bow_of_paper == "1") #get indices which words occur
if len(paper_bow) > max_bow_len:
max_bow_len = len(paper_bow[0]
paper_bow = np.where(all_bow_of_paper == "1") #get indices which words occur
length_array = np.append(length_array, len(paper_bow[0]))
if len(paper_bow[0]) > max_bow_len:
max_bow_len = len(paper_bow[0])
paper_class = class_list.index(paper_class) #get index of class to numeralize
#add infos to Graph
graph_name.add_node(int(paper_id))
graph_name.node[int(paper_id)]["class"] = paper_class
graph_name.node[int(paper_id)]["bow"] = paper_bow
graph_name.graph["max_bow"] = max_bow_len
graph_name.graph["len_bow"] = 1433
graph_name.node[int(paper_id)]["bow"] = paper_bow[0]
graph_name.node[int(paper_id)]["paper_id"] = [paper_id]
graph_name.graph["bow"]={"maxlen": max_bow_len, "vocab": 1433, "lengths": length_array}
graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))}
return node_mapping
def read_file_and_get_edges(graph_name, path="cora/cora.cites"):
def read_file_and_get_edges(graph_name, node_mapping, path="/home/utaemon/SP/cora/cora.cites"):
with open(path) as file:
for line in file.readlines():
a, b = line.split()
graph_name.add_edge(int(a),int(b))
graph_name.add_edge(node_mapping[a],node_mapping[b])
#initialize Graph
def get_graph(path_nodes="cora/cora.content", path_edges="cora/cora.cites"):
#---------------------create graph--------------
def get_graph(path_nodes, path_edges):
Cora_graph = nx.Graph()
read_file_and_get_nodes(Cora_graph, path_nodes)
read_file_and_get_edges(Cora_graph, path_edges)
node_mapping = read_file_and_get_nodes(Cora_graph, path_nodes)
read_file_and_get_edges(Cora_graph, node_mapping, path_edges)
return Cora_graph
#for getting maxima
#----------for getting maxima--------
def get_neighbours_dict(path_nodes, path_edges):
dict_neighbours = {}
for edge in get_graph(path_nodes, path_edges).edges:
......@@ -100,9 +111,11 @@ def add_max_values_to_graph(path_nodes, path_edges): #update
Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges)
return Cora_graph
#write graph into pickle file
def get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433):
return initialize.get_embeddings(rand_type=rand_type, dimension = dimension, quantity=quantity)
def write_pickle_graph_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/"):
g = get_graph(path_nodes, path_edges)
g = add_max_values_to_graph(path_nodes, path_edges)
with open(path_output_graph + "graph.pkl", "wb") as output:
pkl.dump(g, output)
......@@ -111,6 +124,8 @@ def read_pickle_graph(path = "/home/utaemon/SP/graph.pkl"):
graph = pkl.load(f)
return graph
if __name__ == "__main__":
# execute only if run as a script
get_graph(path_nodes="cora/cora.content", path_edges="cora/cora.cites")
get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433)
"""
Getting a networkx graph from Cora. Graph can be saved in txt file. CARE: numpy-arrays are converted to lists due to errors (NumPy array is not JSON serializable).
Initialize Embeddings for n dimensions with initialize-module.
Arrays are initialized in normal or uniform random format (default = normal).
#Usage
get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
-> return graph with nodes and edges
To write the graph informations in file:
def write_graph_to_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/")
To write the dictionary with initalizing Embeddings in file:
def write_dict_to_file(rand_type="normal_random", dimension = 128, quantity=1433, path_output_emb = "/home/utaemon/SP/")
"""
import networkx as nx
import numpy as np
import initialize
import pickle as pkl
def list_of_classes():
return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"]
def read_file_and_get_nodes(graph_name, path="/home/utaemon/SP/cora/cora.content"):
class_list = list_of_classes()
max_bow_len = 0
node_mapping = {}
length_array = np.empty((0))
with open(path) as file:
counter = 0
for line in file.readlines():
split_line = np.array(line.split())
paper_id, split_line = split_line[0], split_line[1:]
node_mapping[paper_id] = counter
paper_id = counter
counter += 1
paper_class, all_bow_of_paper = split_line[-1], split_line[:-1]
paper_bow = np.where(all_bow_of_paper == "1") #get indices which words occur
length_array = np.append(length_array, len(paper_bow[0]))
if len(paper_bow[0]) > max_bow_len:
max_bow_len = len(paper_bow[0])
paper_class = class_list.index(paper_class) #get index of class to numeralize
#add infos to Graph
graph_name.add_node(int(paper_id))
graph_name.node[int(paper_id)]["class"] = paper_class
graph_name.node[int(paper_id)]["bow"] = paper_bow[0]
graph_name.node[int(paper_id)]["paper_id"] = [paper_id]
graph_name.graph["bow"]={"maxlen": max_bow_len, "vocab": 1433, "lengths": length_array}
graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))}
return node_mapping
def read_file_and_get_edges(graph_name, node_mapping, path="/home/utaemon/SP/cora/cora.cites"):
with open(path) as file:
for line in file.readlines():
a, b = line.split()
graph_name.add_edge(node_mapping[a],node_mapping[b])
#---------------------create graph--------------
def get_graph(path_nodes, path_edges):
Cora_graph = nx.Graph()
node_mapping = read_file_and_get_nodes(Cora_graph, path_nodes)
read_file_and_get_edges(Cora_graph, node_mapping, path_edges)
return Cora_graph
#----------for getting maxima--------
def get_neighbours_dict(path_nodes, path_edges):
dict_neighbours = {}
for edge in get_graph(path_nodes, path_edges).edges:
if edge[0] not in dict_neighbours:
dict_neighbours[edge[0]] = {edge[1]}
else:
dict_neighbours[edge[0]].add(edge[1])
if edge[1] not in dict_neighbours:
dict_neighbours[edge[1]] = {edge[0]}
else:
dict_neighbours[edge[1]].add(edge[0])
return dict_neighbours
def get_node_bow_len(path_nodes, path_edges):
bow_len_dict = {}
nodes = get_graph(path_nodes, path_edges).nodes(data=True)
for node in nodes:
bow_len_dict[node[0]] = len(node[1]["bow"]) #node[0] = paper_id, node[1] contains node information
return bow_len_dict
def get_max_neighbours(path_nodes, path_edges):
neighbours_dict = get_neighbours_dict(path_nodes, path_edges)
max_neighbours = 0
for key in neighbours_dict:
if len(neighbours_dict[key]) > max_neighbours:
max_neighbours = len(neighbours_dict[key])
return max_neighbours
def get_max_bow_neighbours(path_nodes, path_edges):
node_bow_len_dict = get_node_bow_len(path_nodes, path_edges)
neighbours_dict = get_neighbours_dict(path_nodes, path_edges)
max_neighbour_bow_len = 0
for key in neighbours_dict:
temp_bow_max_len = 0
for neighbour in neighbours_dict[key]:
temp_bow_max_len += node_bow_len_dict[neighbour]
if temp_bow_max_len > max_neighbour_bow_len:
max_neighbour_bow_len = temp_bow_max_len
return max_neighbour_bow_len
def add_max_values_to_graph(path_nodes, path_edges): #update Graph with max values
Cora_graph = get_graph(path_nodes, path_edges)
Cora_graph.graph["bow"]["maxlen_neighbours"] = get_max_bow_neighbours(path_nodes, path_edges)
Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges)
return Cora_graph
def get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433):
return initialize.get_embeddings(rand_type=rand_type, dimension = dimension, quantity=quantity)
def write_pickle_graph_file(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites", path_output_graph = "/home/utaemon/SP/"):
g = add_max_values_to_graph(path_nodes, path_edges)
with open(path_output_graph + "graph.pkl", "wb") as output:
pkl.dump(g, output)
def read_pickle_graph(path = "/home/utaemon/SP/graph.pkl"):
with open(path, 'rb') as f:
graph = pkl.load(f)
return graph
if __name__ == "__main__":
# execute only if run as a script
get_graph(path_nodes="/home/utaemon/SP/cora/cora.content", path_edges="/home/utaemon/SP/cora/cora.cites")
get_init_emb(rand_type="normal_random", dimension = 128, quantity=1433)
File added
File added
"""
Initialize Embeddings for n dimensions.
Arrays are initialized in normal or uniform random format.
The seed has the default value 1.
#Usage
Embeddings.normal_random(dimension = 128, seed = 1)
Embeddings.uniform_random(dimension = 128, seed = 1)
or to get a set of Embeddings:
get_embeddings(rand_type="normal_random", dimension = 128, quantity=1)
"""
import numpy as np
class Embeddings:
def normal_random(dimension = 128, seed = 1):
np.random.seed(seed)
return np.random.normal(size=dimension)
def uniform_random(dimension = 128, seed = 1):
np.random.seed(seed)
return np.random.uniform(size=dimension)
def get_embeddings(rand_type="normal_random", dimension = 128, quantity=1):
emb_dict = {}
if rand_type == "normal_random":
for i in range(0, quantity):
emb_dict[i] = Embeddings.normal_random(dimension = dimension, seed = i)
elif rand_type == "uniform_random":
for i in range(0, quantity):
emb_dict[i] = Embeddings.uniform_random(dimension = dimension, seed = i)
return emb_dict
\ No newline at end of file
......@@ -18,7 +18,7 @@ import pickle as pkl
import numpy as np
from sklearn.linear_model import LogisticRegression
import random
import random_nodes
import random_nodes_for_node_classification
import sys
import argparse
......@@ -121,7 +121,7 @@ if __name__ == "__main__":
parser.add_argument("-i", "--iterations", type=int, default = 10, help="number of iterations of node classification. Counter of iteration is random seed.")
parser.add_argument("-n", "--number", type=int, default = 20, help="number of instances per class for training")
parser.add_argument("-t", "--testset", type=int, default = 1000, help="number of random instances in testset")
parser.add_argument("-c", "--regularization", type=int|float, default=0.1, help="Inverse of regularization strength")
parser.add_argument("-c", "--regularization", type=float, default=0.1, help="Inverse of regularization strength")
args = parser.parse_args()
if "-s" in sys.argv[1:]:
node_classification(path_graph=args.graph, path_embeddings=args.embeddings, seed= args.seed, num_per_class=args.number, C=args.regularization)
......
File added
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment