Skip to content
Snippets Groups Projects
Commit 08e8a4d2 authored by blunck's avatar blunck
Browse files

Added comments for future development

parent 71b30bfa
No related branches found
No related tags found
No related merge requests found
......@@ -119,6 +119,16 @@ def to_bag_of_words(corpus):
#fun fact: len(bag_of_words) is 25325 for corpus.csv
def extract(corpus_instance, pos_vocab):
"nimmt einzelnes dict, und gibt featurevector der größe len(bag-bigram) zurück"
pass
def get_pos_vocabulary(corpus):
"geht über ganzes corpus, gibt bigram-bag zurück"
pass
if __name__ == '__main__':
corpus = read_corpus("minicorpus.csv")
tagged_corpus = corpus_pos_tagger(corpus)
......@@ -130,3 +140,4 @@ if __name__ == '__main__':
for vector in corpus_vector:
print(vector)
......@@ -5,6 +5,7 @@ import ngram_feature
import numpy as np
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
import postagger
def create_vector(corpus_instance, vocabulary=None):
......@@ -14,6 +15,7 @@ def create_vector(corpus_instance, vocabulary=None):
single corpus instance)
"""
f1 = ngram_feature.extract(corpus_instance, vocabulary)
# f2 = postagger.to_bigram_vector(corpus_instance, pos_vocab)
f4 = sent_rating_feature.extract(corpus_instance)
return np.concatenate((f1,f4))
......@@ -31,6 +33,10 @@ if __name__ == '__main__':
# vocabularies
unigram_vocab = ngram_feature.get_vocabulary(train_set, 1)
bigram_vocab = ngram_feature.get_vocabulary(train_set, 2)
# pos_bags
# bigram_pos_vocab = postagger.get_pos_vocabulary(train_set) (entspricht corpus in postagger.py)
# inputs:
train_inputs = [create_vector(el, unigram_vocab)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment