Newer
Older
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
class ContrastFeature(Feature):
"""
Class representing feature f6, based on Riloff et al. (2013)
extract-method returns a feature-vector of length 1 containing the number of
contrasts found in a review
"""
def get_feature_names(self):
return ['riloff-contrast']
def extract(self, corpus_instance):
tokens = corpus_instance['TOKENS']
tagged = nltk.pos_tag(tokens)
tags_only = [y[0] for (x,y) in tagged]
tokens_only = [x for (x,y) in tagged]
# pos sentiment phrases
verb_phrase_list = ["V"]
# only situation pos-tag combos like the following should be matched
uni_pos_list = ["V"]
bi_pos_list = ["VV", "VR", "RV", "TV", "VN", "VN", "VN", "VP", "VJ"]
tri_pos_list = ["VVV", "VVR", "VRV", "VVR", "VRR", "RVV", "VNR", "VIN", "VTV", "VIP"]
excl_N_tri_pos_list = ["VVN", "VNN", "VJN", "VDN", "RVN"] # -JN = next tag is not J/N
excl_JN_tri_pos_list = ["VRJ", "VVJ", "VRJ", "RVJ"]
# generate possible pos-tag combintations
phrase_patterns = []
excl_N_phrase_patterns = []
excl_JN_phrase_patterns = []
for a in verb_phrase_list:
for b in uni_pos_list:
phrase_patterns.append(a+b)
for c in bi_pos_list:
phrase_patterns.append(a+c)
for d in tri_pos_list:
phrase_patterns.append(a+d)
for e in excl_N_tri_pos_list:
excl_N_phrase_patterns.append(a+e)
for f in excl_JN_tri_pos_list:
excl_JN_phrase_patterns.append(a+f)
contrasts = 0
candidates = []
# get all phrases matching the patterns
#TODO: elim doubles
for i in range(len(tags_only)):
fourgram = "".join(tags_only[i:(i+4)])
trigram = "".join(tags_only[i:(i+3)])
bigram = "".join(tags_only[i:(i+2)])
if fourgram in phrase_patterns:
candidates.append(self.__get_phrase(i, 4, tokens_only, tags_only))
elif fourgram in excl_N_phrase_patterns:
try:
if tokens_only[i+4] != 'N':
candidates.append(self.__get_phrase(i, 4, tokens_only, tags_only))
except IndexError:
candidates.append(self.__get_phrase(i, 4, tokens_only, tags_only))
elif fourgram in excl_JN_phrase_patterns:
try:
if tokens_only[i+4] != 'N' and tokens_only[i+4] != 'J':
candidates.append(self.__get_phrase(i, 4, tokens_only, tags_only))
except IndexError:
candidates.append(self.__get_phrase(i, 4, tokens_only, tags_only))
elif trigram in phrase_patterns:
candidates.append(self.__get_phrase(i, 3, tokens_only, tags_only))
elif bigram in phrase_patterns:
candidates.append(self.__get_phrase(i, 2, tokens_only, tags_only))
# determine sentiment of extracted phrased
if candidates != []:
for phrase in candidates:
verb = phrase[0]
situation = phrase[1]
sent_verb = TextBlob(verb).sentiment.polarity
sent_situation = TextBlob(situation).sentiment.polarity
# if verb and situation are in contrast to another: increase feature value by one
if (sent_verb > 0.0 and sent_situation < 0.0) or (sent_verb < 0.0 and sent_situation > 0.0):
#print("phrase: {} {} sent verb: {} sent situation: {}".format(verb, situation, sent_verb, sent_situation))
contrasts += 1
return np.array([contrasts])
def __get_phrase(self, i, n, tokens_only, tags_only):
# builds phrase corresponding to the matched POS-tag-combo
try:
pos_sent_phrase = tokens_only[i]
neg_situation_phrase = " ".join(tokens_only[(i+1):(i+n)])
if tags_only[i-1] == 'R':
pos_sent_phrase = tokens_only[i-1] +" "+ pos_sent_phrase
return (pos_sent_phrase, neg_situation_phrase)
return (pos_sent_phrase, neg_situation_phrase)