Skip to content
Snippets Groups Projects
Commit f3ccf17e authored by dimitrova's avatar dimitrova
Browse files

accuracy -> f1-score

parent d49980a5
No related branches found
No related tags found
No related merge requests found
......@@ -22,7 +22,7 @@ import random_nodes_for_node_classification
import sys
import argparse
from heapq import nlargest as nmax
from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, f1_score
def training(path_graph, seed=0, num= 20):
......@@ -44,7 +44,7 @@ def get_class_list(path):
return np.array(class_list)
#------------------------classification
def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "/home/utaemon/SP/graph.pkl", path_emb = "cora_embeddings_uniform_m20.pkl"):
def classify(C, seed = 0, num = 20, num_test_instances = 1000, path_graph = "graph.pkl", path_emb = "cora_embeddings_uniform_m20.pkl"):
training_nodes = training(path_graph, seed=seed, num = num)
emb = get_embeddings(path_emb)
cl = get_class_list(path_graph)
......@@ -80,7 +80,7 @@ def classify_func(range_seeds = 10, num = 20, num_test_instances = 1000, path_gr
#------------------------Node Classification
def node_classification(path_graph = "graph.pkl", path_embeddings = "cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, seed=20, num_per_class = 20, C = 0.1):
logisticRegr = LogisticRegression(C=C)
logisticRegr = LogisticRegression(C=C, solver='liblinear', multi_class='ovr')
training_nodes = training(path_graph, seed=seed, num = num_per_class)
emb = get_embeddings(path_embeddings)
cl = get_class_list(path_graph)
......@@ -98,12 +98,19 @@ def node_classification(path_graph = "graph.pkl", path_embeddings = "cora_embedd
logisticRegr.fit(train_emb, train_labels)
predicted_labels = logisticRegr.score(test_emb, test_labels)
#predicted_labels = logisticRegr.score(test_emb, test_labels)
predictions = logisticRegr.predict(test_emb)
# new
score_macro = f1_score(test_labels, predictions, average='macro')
score_micro = f1_score(test_labels, predictions, average='micro')
conf_matrix = confusion_matrix(test_labels, predictions)
print ("Node Classification on random seed ", seed, "->", predicted_labels)
print ("Confusion Matrix:\n", conf_matrix)
return predicted_labels, conf_matrix
print("Micro F1-score on random seed ", seed, "->", score_micro)
print("Macro F1-score on random seed ", seed, "->", score_macro)
#print ("Node Classification on random seed ", seed, "->", predicted_labels)
#print ("Confusion Matrix:\n", conf_matrix)
return score_macro, conf_matrix
def node_classification_random_seeds(path_graph = "graph.pkl", path_embeddings = "cora_embeddings_uniform_m20.pkl", num_test_instances = 1000, num_per_class = 20, iterations = 50, C = 0.1):
scores = []
......@@ -115,7 +122,7 @@ def node_classification_random_seeds(path_graph = "graph.pkl", path_embeddings =
print ("Maximum: ", scores[ten_max[0]], "Seed: ", ten_max[0])
avg = sum(scores)/len(scores)
print ("Average: ",avg)
print ("10 Higest Scores:")
print ("10 Highest Scores:")
for i in range(10):
print (i + 1, ":", scores[ten_max[i]], "on Seed", ten_max[i])
#return scores, ten_max[0][0], avg
......@@ -127,13 +134,13 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Node Classification script.")
parser.add_argument("-g", "--graph", default = "graph.pkl", help="path to graph")
parser.add_argument("-e", "--embeddings", default = "cora_embeddings_uniform_m20.pkl", help="path to embeddings")
parser.add_argument("-s", "--seed", type=int, default = 0, help="random seed for one node classification. If this will be specified, always the function node_classification() will be executed.")
parser.add_argument("-s", "--seed", type=int, help="random seed for one node classification. If this will be specified, always the function node_classification() will be executed.")
parser.add_argument("-i", "--iterations", type=int, default = 10, help="number of iterations of node classification. Counter of iteration is random seed.")
parser.add_argument("-n", "--number", type=int, default = 20, help="number of instances per class for training")
parser.add_argument("-t", "--testset", type=int, default = 1000, help="number of random instances in testset")
parser.add_argument("-c", "--regularization", type=float, default=0.1, help="Inverse of regularization strength")
args = parser.parse_args()
if "-s" in sys.argv[1:]:
if args.seed:
node_classification(path_graph=args.graph, path_embeddings=args.embeddings, seed= args.seed, num_per_class=args.number, C=args.regularization)
else:
node_classification_random_seeds(path_graph=args.graph,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment