diff --git a/contrast_feature.py b/contrast_feature.py index e2ef84ad8c25389ef80755e023b66435d78b7219..224a77c5b3077e2bae76b74488a87540a218c9ab 100644 --- a/contrast_feature.py +++ b/contrast_feature.py @@ -49,10 +49,9 @@ def extract(corpus_instance): sent_verb = analyser.polarity_scores(verb)['compound'] sent_situation = analyser.polarity_scores(situation)['compound'] - if (sent_verb > 0.0 and sent_situation < 0.0) or (sent_verb < 0.0 and sent_situation > 0.0): - print("phrase: {} {} sent verb: {} sent situation: {}".format(verb, situation, sent_verb, sent_situation)) - - + #if (sent_verb > 0.0 and sent_situation < 0.0) or (sent_verb < 0.0 and sent_situation > 0.0): + print("phrase: {} {} sent verb: {} sent situation: {}".format(verb, situation, sent_verb, sent_situation)) + if __name__ == '__main__': corpus = corpus.read_corpus("corpus_shuffled.csv")[:1000] diff --git a/ngram_feature.py b/ngram_feature.py index 2a5354d1c1bfa25c611d7108e2abfa7e87cc649b..7a940d99f3d9a9adc6c07164dd2f1fc0aaba87b2 100644 --- a/ngram_feature.py +++ b/ngram_feature.py @@ -15,7 +15,7 @@ def extract(corpus_instance, corpus_dict_key, vocabulary): return vector.toarray()[0] -def get_vocabulary(corpus, corpus_dict_key, n): +def get_vocabulary(corpus, corpus_dict_key, n_range): """ Creates vocabulary based on given corpus. """ @@ -23,7 +23,7 @@ def get_vocabulary(corpus, corpus_dict_key, n): for line in corpus: all_reviews.append(line[corpus_dict_key]) - vectorizer = CountVectorizer(ngram_range=(n, n)) + vectorizer = CountVectorizer(ngram_range=n_range) vectorizer.fit(all_reviews) return vectorizer.vocabulary_