accuracy -> f1-score

f3ccf17e · dimitrova · d49980a5 · f3ccf17e
Commit f3ccf17e authored 6 years ago by dimitrova
--- a/EP/Cora_node_classification/node_classification.py
+++ b/EP/Cora_node_classification/node_classification.py
@@ -22,7 +22,7 @@ import random_nodes_for_node_classification
 import sys
 import argparse
 from heapq import nlargest as nmax
-from sklearn.metrics import confusion_matrix
+from sklearn.metrics import confusion_matrix, f1_score


 def training(path_graph, seed=0, num= 20):
@@ -44,7 +44,7 @@ def get_class_list(path):
    return np.array(class_list)

 #------------------------classification
-def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "/home/utaemon/SP/graph.pkl", path_emb = "cora_embeddings_uniform_m20.pkl"):
+def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "graph.pkl", path_emb = "cora_embeddings_uniform_m20.pkl"):
    training_nodes = training(path_graph, seed=seed, num = num)
    emb = get_embeddings(path_emb)
    cl = get_class_list(path_graph)
@@ -80,7 +80,7 @@ def classify_func(range_seeds = 10, num = 20, num_test_instances = 1000, path_gr

 #------------------------Node Classification
 def node_classification(path_graph = "graph.pkl", path_embeddings = "cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, seed=20, num_per_class = 20, C = 0.1):
-    logisticRegr = LogisticRegression(C=C)
+    logisticRegr = LogisticRegression(C=C, solver='liblinear', multi_class='ovr')
    training_nodes = training(path_graph, seed=seed, num = num_per_class)
    emb = get_embeddings(path_embeddings)
    cl = get_class_list(path_graph)
@@ -98,12 +98,19 @@ def node_classification(path_graph = "graph.pkl", path_embeddings = "cora_embedd

    logisticRegr.fit(train_emb, train_labels)

-    predicted_labels = logisticRegr.score(test_emb, test_labels)
+    #predicted_labels = logisticRegr.score(test_emb, test_labels)
    predictions = logisticRegr.predict(test_emb)
+
+    # new
+    score_macro = f1_score(test_labels, predictions, average='macro')
+    score_micro = f1_score(test_labels, predictions, average='micro')
+
    conf_matrix = confusion_matrix(test_labels, predictions)
-    print ("Node Classification on random seed ", seed, "->", predicted_labels)
-    print ("Confusion Matrix:\n", conf_matrix)
-    return predicted_labels, conf_matrix
+    print("Micro F1-score on random seed ", seed, "->", score_micro)
+    print("Macro F1-score on random seed ", seed, "->", score_macro)
+    #print ("Node Classification on random seed ", seed, "->", predicted_labels)
+    #print ("Confusion Matrix:\n", conf_matrix)
+    return score_macro, conf_matrix

 def node_classification_random_seeds(path_graph = "graph.pkl", path_embeddings = "cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, num_per_class = 20, iterations = 50, C = 0.1):
    scores = []
@@ -115,7 +122,7 @@ def node_classification_random_seeds(path_graph = "graph.pkl", path_embeddings =
    print ("Maximum: ", scores[ten_max[0]], "Seed: ", ten_max[0])
    avg = sum(scores)/len(scores)
    print ("Average: ",avg)
-    print ("10 Higest Scores:")
+    print ("10 Highest Scores:")
    for i in range(10):
        print (i + 1, ":", scores[ten_max[i]], "on Seed", ten_max[i])
    #return scores, ten_max[0][0], avg
@@ -127,13 +134,13 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Node Classification script.")
    parser.add_argument("-g", "--graph", default = "graph.pkl", help="path to graph")
    parser.add_argument("-e", "--embeddings", default = "cora_embeddings_uniform_m20.pkl", help="path to embeddings")
-    parser.add_argument("-s", "--seed", type=int, default = 0, help="random seed for one node classification. If this will be specified, always the function node_classification() will be executed.")
+    parser.add_argument("-s", "--seed", type=int, help="random seed for one node classification. If this will be specified, always the function node_classification() will be executed.")
    parser.add_argument("-i", "--iterations", type=int, default = 10, help="number of iterations of node classification. Counter of iteration is random seed.")
    parser.add_argument("-n", "--number", type=int, default = 20, help="number of instances per class for training")
    parser.add_argument("-t", "--testset", type=int, default = 1000, help="number of random instances in testset")
    parser.add_argument("-c", "--regularization", type=float, default=0.1, help="Inverse of regularization strength")
    args = parser.parse_args()
-    if "-s" in sys.argv[1:]:
+    if args.seed:
        node_classification(path_graph=args.graph, path_embeddings=args.embeddings, seed= args.seed, num_per_class=args.number, C=args.regularization)
    else:
        node_classification_random_seeds(path_graph=args.graph,