From 7ed4414ff5d5e8a526f1eaf1315dd5f02bf217b4 Mon Sep 17 00:00:00 2001
From: Utaemon Toyota <toyota@cl.uni-heidelberg.de>
Date: Wed, 27 Feb 2019 21:21:42 +0100
Subject: [PATCH] change paths

---
 EP/Cora_node_classification/cora.py            | 12 ++++++------
 .../node_classification.py                     | 14 ++++++--------
 .../random_nodes_for_node_classification.py    |  2 +-
 Senseval_Prep/senseval_preprocessing.py        | 18 ++++++++----------
 4 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/EP/Cora_node_classification/cora.py b/EP/Cora_node_classification/cora.py
index c1a3d95..5f192dd 100644
--- a/EP/Cora_node_classification/cora.py
+++ b/EP/Cora_node_classification/cora.py
@@ -22,7 +22,7 @@ import pickle as pkl
 def list_of_classes():
     return ["Case_Based", "Genetic_Algorithms", "Neural_Networks", "Probabilistic_Methods", "Reinforcement_Learning", "Rule_Learning", "Theory"]
 
-def read_file_and_get_nodes(graph_name, path="/cora_data/cora.content"):
+def read_file_and_get_nodes(graph_name, path="/../../data/cora/raw/cora.content"):
     class_list = list_of_classes()
     max_bow_len = 0
     node_mapping = {}
@@ -50,7 +50,7 @@ def read_file_and_get_nodes(graph_name, path="/cora_data/cora.content"):
     graph_name.graph["paper_id"] = {"maxlen": 1, "vocab": (len(graph_name)), "lengths": np.ones(len(graph_name))}
     return node_mapping
 
-def read_file_and_get_edges(graph_name, node_mapping, path="/cora_data/cora.cites"):
+def read_file_and_get_edges(graph_name, node_mapping, path="/../../data/cora/raw/cora.cites"):
     with open(path) as file:
         for line in file.readlines():
             a, b = line.split()
@@ -112,7 +112,7 @@ def add_max_values_to_graph(path_nodes, path_edges):                    #update
     Cora_graph.graph["paper_id"]["maxlen_neighbours"] = get_max_neighbours(path_nodes, path_edges)
     return Cora_graph
 
-def write_pickle_graph_file(path_nodes="/cora_data/cora.content", path_edges="/cora_data/cora.cites", path_output_graph = ""):
+def write_pickle_graph_file(path_nodes="/../../data/cora/raw/cora.content", path_edges="/../../data/cora/raw/cora.cites", path_output_graph = "/../../data/cora/graph/"):
     g = add_max_values_to_graph(path_nodes, path_edges)
     with open(path_output_graph + "graph.pkl", "wb") as output:
         pkl.dump(g, output)
@@ -125,8 +125,8 @@ def read_pickle_graph(path = "graph.pkl"):				#will be used on node_classificati
 if __name__ == "__main__":
     # execute only if run as a script
     parser = argparse.ArgumentParser(description="Skript for building cora graph.")
-    parser.add_argument("-n", "--nodes", default="/cora_data/cora.content", help="path to file containing cora nodes")
-    parser.add_argument("-e", "--edges", default="/home/utaemon/SP/cora/cora.cites", help="path to file containing edges/citations")
-    parser.add_argument("-o", "--output", default="", help="path where the graph should be saved")
+    parser.add_argument("-n", "--nodes", default="/../../data/cora/raw/cora.content", help="path to file containing cora nodes")
+    parser.add_argument("-e", "--edges", default="/../../data/cora/raw/cora.cites", help="path to file containing edges/citations")
+    parser.add_argument("-o", "--output", default="/../../data/cora/graph/", help="path where the graph should be saved")
     args = parser.parse_args()
     write_pickle_graph_file(path_nodes=args.nodes, path_edges=args.edges, path_output_graph=args.output)
diff --git a/EP/Cora_node_classification/node_classification.py b/EP/Cora_node_classification/node_classification.py
index 1ab1315..8588350 100644
--- a/EP/Cora_node_classification/node_classification.py
+++ b/EP/Cora_node_classification/node_classification.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python3
 
 """
-@author: Utaemon Toyota
-@date: 31.1.2019
 @project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics
 @requirements: cora.py and random_nodes_for_node_classification.py as well as the cora data
 @usage: python3 node_classification.py [-g] [-e] [-s] [-i] [-n]
@@ -45,7 +43,7 @@ def get_class_list(path):
     return np.array(class_list)
 
 #------------------------classification
-def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "graph.pkl", path_emb = "cora_embeddings_uniform_m20.pkl"):
+def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "/../../data/cora/graph/graph.pkl", path_emb = "/../../data/cora/embeddings/cora_embeddings_uniform_m20.pkl"):
     training_nodes = training(path_graph, seed=seed, num = num)
     emb = get_embeddings(path_emb)
     cl = get_class_list(path_graph)
@@ -72,7 +70,7 @@ def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "gra
     print (score)
     return score
 
-def classify_func(range_seeds = 10, num = 20, num_test_instances = 1000, path_graph = "graph.pkl", path_emb = "cora_embeddings_uniform_m20.pkl"):
+def classify_func(range_seeds = 10, num = 20, num_test_instances = 1000, path_graph = "graph.pkl", path_emb = "/../../data/cora/embeddings/cora_embeddings_uniform_m20.pkl"):
     C_li = [0.01, 0.1, 0.5, 1.0, 5.0, 10.0]
     for i in range(range_seeds):
         print ("Iteration/Random Seed:", i)
@@ -80,7 +78,7 @@ def classify_func(range_seeds = 10, num = 20, num_test_instances = 1000, path_gr
             classify(C, seed=i, num = num, num_test_instances = num_test_instances, path_graph = path_graph, path_emb = path_emb)
 
 #------------------------Node Classification
-def node_classification(path_graph = "graph.pkl", path_embeddings = "cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, seed=20, num_per_class = 20, C = 0.1):
+def node_classification(path_graph = "/../../data/cora/graph/graph.pkl", path_embeddings = "/../../data/cora/embeddings/cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, seed=20, num_per_class = 20, C = 0.1):
     logisticRegr = LogisticRegression(C=C, solver='liblinear', multi_class='ovr')
     training_nodes = training(path_graph, seed=seed, num = num_per_class)
     emb = get_embeddings(path_embeddings)
@@ -113,7 +111,7 @@ def node_classification(path_graph = "graph.pkl", path_embeddings = "cora_embedd
     #print ("Confusion Matrix:\n", conf_matrix)
     return score_macro, conf_matrix
 
-def node_classification_random_seeds(path_graph = "graph.pkl", path_embeddings = "cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, num_per_class = 20, iterations = 50, C = 0.1):
+def node_classification_random_seeds(path_graph = "/../../data/cora/graph/graph.pkl", path_embeddings = "/../../data/cora/embeddings/cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, num_per_class = 20, iterations = 50, C = 0.1):
     scores = []
     for i in range (0,iterations):
         scores.append(node_classification(path_graph = path_graph, path_embeddings = path_embeddings, num_test_instances = num_test_instances, seed=i, num_per_class = num_per_class, C=C)[0])
@@ -133,8 +131,8 @@ if __name__ == "__main__":
     # execute only if run as a script
 
     parser = argparse.ArgumentParser(description="Node Classification script.")
-    parser.add_argument("-g", "--graph", default = "graph.pkl", help="path to graph")
-    parser.add_argument("-e", "--embeddings", default = "cora_embeddings_uniform_m20.pkl", help="path to embeddings")
+    parser.add_argument("-g", "--graph", default = "/../../data/cora/graph/graph.pkl", help="path to graph")
+    parser.add_argument("-e", "--embeddings", default = "/../../data/cora/embeddings/cora_embeddings_uniform_m20.pkl", help="path to embeddings")
     parser.add_argument("-s", "--seed", type=int, help="random seed for one node classification. If this will be specified, always the function node_classification() will be executed.")
     parser.add_argument("-i", "--iterations", type=int, default = 10, help="number of iterations of node classification. Counter of iteration is random seed.")
     parser.add_argument("-n", "--number", type=int, default = 20, help="number of instances per class for training")
diff --git a/EP/Cora_node_classification/random_nodes_for_node_classification.py b/EP/Cora_node_classification/random_nodes_for_node_classification.py
index b4ae51d..9cea5b7 100644
--- a/EP/Cora_node_classification/random_nodes_for_node_classification.py
+++ b/EP/Cora_node_classification/random_nodes_for_node_classification.py
@@ -27,7 +27,7 @@ def get_random_num_nodes(set_elm, num, seed):
     random.seed(seed)
     return set(random.sample(set_elm, num))
 
-def get_num_random_nodes_for_all_classes_read(path = "graph.pkl", num = 20, seed = 1):
+def get_num_random_nodes_for_all_classes_read(path = "/../../data/cora/graph/graph.pkl", num = 20, seed = 1):
     """get specific number of nodes per class, same number for all classes"""
     cora_dict = dict_of_node_classes_read(path)
     sampled_random_id_set = set()
diff --git a/Senseval_Prep/senseval_preprocessing.py b/Senseval_Prep/senseval_preprocessing.py
index c24bf89..65c9ae9 100644
--- a/Senseval_Prep/senseval_preprocessing.py
+++ b/Senseval_Prep/senseval_preprocessing.py
@@ -1,10 +1,8 @@
 #!/usr/bin/env python3
 
 """
-@author: Utaemon Toyota
-@date: 25.2.2019
 @project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics
-@members: Nadia Arslan, Lyuba Dimitrova, Nicolas Weber, Utaemon Toyota
+@members: Nadia Arslan, Lyuba Dimitrova, Utaemon Toyota, Nicolas Weber
 @required data: Senseval english-all-word test data and their penn treebank files in the same directory.
 @usage: python3 senseval_preprocessing.py [-s] [-g] [-v]
         -s / --stopwords    Path to txt-file with stopwords
@@ -18,11 +16,11 @@ import pickle as pkl
 from nltk.stem import WordNetLemmatizer
 wnl = WordNetLemmatizer()
 
-file_path2 = "Senseval2/eng-all-words_seneval2.test.xml"           #senseval2
-file_path3 = "Senseval3/english-all-words.xml"                    #senseval3
+file_path2 = "/../../data/senseval2/raw/eng-all-words_seneval2.test.xml"           #senseval2
+file_path3 = "/../../data/senseval3/raw/english-all-words.xml"                    #senseval3
 
-tree_paths2 = {"d00": "Senseval2/wsj_0089.mrg", "d01": "Senseval2/wsj_0465.mrg", "d02": "Senseval2/wsj_1286.mrg"}      #senseval2
-tree_paths3 = {"d000": "Senseval3/cl23.mrg", "d001": "Senseval3/wsj_1695.mrg", "d002":"Senseval3/wsj_1778.mrg"}       #senseval3
+tree_paths2 = {"d00": "/../../data/senseval2/raw/wsj_0089.mrg", "d01": "/../../data/senseval2/raw/wsj_0465.mrg", "d02": "/../../data/senseval2/raw/wsj_1286.mrg"}      #senseval2
+tree_paths3 = {"d000": "/../../data/senseval3/raw/cl23.mrg", "d001": "/../../data/senseval3/raw/wsj_1695.mrg", "d002":"/../../data/senseval3/raw/wsj_1778.mrg"}       #senseval3
 
 def get_stopword_list(stop_path):
     with open (stop_path, "r") as f:
@@ -246,7 +244,7 @@ def get_sats(tokens, info):
             new_info.append(info[idx])
     return [new_tokens, new_info]
 
-def write_pkl(version = 3, stop_path="stopwords.txt", gloss_path = "gloss_mapping.txt"):
+def write_pkl(version = 3, stop_path="/../../data/other/stopwords.txt", gloss_path = "/../../data/wordnet/mappings/gloss_mapping.txt"):
     file_path = ""
     tree_path = ""
     if version == 2:
@@ -264,8 +262,8 @@ def write_pkl(version = 3, stop_path="stopwords.txt", gloss_path = "gloss_mappin
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Senseval Preprocessing script.")
-    parser.add_argument("-s", "--stopwords", default="stopwords.txt", help="path to stopwords-txt-file")
-    parser.add_argument("-g", "--gloss", default="gloss_mapping.txt", help = "path to gloss mapping txt-file")
+    parser.add_argument("-s", "--stopwords", default="/../../data/other/stopwords.txt", help="path to stopwords-txt-file")
+    parser.add_argument("-g", "--gloss", default="/../../data/wordnet/mappings/gloss_mapping.txt", help = "path to gloss mapping txt-file")
     parser.add_argument("-v", "--version", default = 3, help="2 or 3 for senseval version")
     args = parser.parse_args()
     write_pkl(version=int(args.version), stop_path=args.stopwords, gloss_path=args.gloss)
-- 
GitLab