From 3fd007818efc150f43917f0dd24fbc7f525dd743 Mon Sep 17 00:00:00 2001 From: Maximilian Blunck <blunck@cl.uni-heidelberg.de> Date: Sun, 7 Jan 2018 18:33:34 +0100 Subject: [PATCH] F1 score output & adaptation to renamed pos_feature --- training_testing.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/training_testing.py b/training_testing.py index b6efc5c..58e2f81 100644 --- a/training_testing.py +++ b/training_testing.py @@ -6,7 +6,7 @@ import numpy as np from sklearn import svm from sklearn import tree from sklearn.model_selection import cross_val_score -import postagger +import pos_feature def create_vector(corpus_instance, vocabulary=None, pos_vocabulary=None): @@ -17,12 +17,9 @@ def create_vector(corpus_instance, vocabulary=None, pos_vocabulary=None): Example for corpus instance: OrderedDict([('LABEL', '0'), ('FILENAME', '36_19_RPRRQDRSHDV6J.txt'), ('STARS', '5.0'), ('TITLE', etc. """ f1 = ngram_feature.extract(corpus_instance, vocabulary) - f2 = postagger.extract(corpus_instance, pos_vocabulary) + f2 = pos_feature.extract(corpus_instance, pos_vocabulary) f4 = sent_rating_feature.extract(corpus_instance) - print(f2) - print(len(f2)) - return np.concatenate((f1, f2, f4)) @@ -44,8 +41,7 @@ if __name__ == '__main__': bigram_vocab = ngram_feature.get_vocabulary(train_set, 2) # pos_bags - pos_bigram_vocab = postagger.get_pos_vocabulary(train_set) - #print(pos_bigram_vocab) #already lookin' good + pos_bigram_vocab = pos_feature.get_pos_vocabulary(train_set) # inputs: train_inputs = [create_vector(el, unigram_vocab, pos_bigram_vocab) @@ -75,12 +71,15 @@ if __name__ == '__main__': train_multiple([svm_clf, tree_clf], train_inputs, train_labels) # validation - svm_score = cross_val_score(svm_clf, train_inputs, train_labels, cv=5).mean()#, scoring='f1') - tree_score = cross_val_score(tree_clf, train_inputs, train_labels, cv=5).mean()#, scoring='f1') + svm_acc = cross_val_score(svm_clf, train_inputs, train_labels, cv=5, scoring='accuracy').mean() + tree_acc = cross_val_score(tree_clf, train_inputs, train_labels, cv=5, scoring='accuracy').mean() + + svm_f1 = cross_val_score(svm_clf, train_inputs, train_labels, cv=5, scoring='f1').mean() + tree_f1 = cross_val_score(tree_clf, train_inputs, train_labels, cv=5, scoring='f1').mean() print("\n--Cross Validation Scores-- ") - print("\nSVM: {}".format(svm_score)) - print("\nTree: {}".format(tree_score)) + print("\nSVM: Accuracy: {}, F1-Score: {}".format(svm_acc, svm_f1)) + print("\nTree: Accuracy: {}, F1-Score: {}".format(tree_acc, tree_f1)) # testing # print("\nSVM: Score on test Data:") -- GitLab