Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
softwareprojektws17
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Steffen Knapp
softwareprojektws17
Commits
9cfb91e3
Commit
9cfb91e3
authored
7 years ago
by
blunck
Browse files
Options
Downloads
Patches
Plain Diff
Completed Riloff contrast feature
parent
5156a102
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
contrast_feature.py
+88
-34
88 additions, 34 deletions
contrast_feature.py
with
88 additions
and
34 deletions
contrast_feature.py
+
88
−
34
View file @
9cfb91e3
import
corpus
import
nltk
import
numpy
as
np
from
vaderSentiment.vaderSentiment
import
SentimentIntensityAnalyzer
def
get_phrase
(
i
,
n
,
tokens_only
,
tags_only
):
#fourgram: n=4
try
:
pos_sent_phrase
=
tokens_only
[
i
]
neg_situation_phrase
=
"
"
.
join
(
tokens_only
[(
i
+
1
):(
i
+
n
)])
try
:
if
tags_only
[
i
-
1
]
==
'
R
'
:
pos_sent_phrase
=
tokens_only
[
i
-
1
]
+
"
"
+
pos_sent_phrase
except
IndexError
:
return
(
pos_sent_phrase
,
neg_situation_phrase
)
return
(
pos_sent_phrase
,
neg_situation_phrase
)
except
IndexError
:
pass
def
extract
(
corpus_instance
):
tokens
=
corpus_instance
[
'
TOKENS
'
]
tagged
=
nltk
.
pos_tag
(
tokens
)
# only pos-tag combos like the following should be matched
uni_pos_list
=
[
"
VB
"
]
bi_pos_list
=
[
"
VBVB
"
,
"
VBRB
"
,
"
RBVB
"
,
"
TOVB
"
,
"
VBNN
"
,
"
VBNNP
"
,
"
VBNNS
"
,
"
VBPRP
"
,
"
VBPRP$
"
,
"
VBJJ
"
,
"
VBJJS
"
]
#tri_pos_list = []
tags_only
=
[
y
[
0
]
for
(
x
,
y
)
in
tagged
]
tokens_only
=
[
x
for
(
x
,
y
)
in
tagged
]
# pos sentiment phrases
verb_phrase_list
=
[
"
V
"
]
# only situation pos-tag combos like the following should be matched
uni_pos_list
=
[
"
V
"
]
bi_pos_list
=
[
"
VV
"
,
"
VR
"
,
"
RV
"
,
"
TV
"
,
"
VN
"
,
"
VN
"
,
"
VN
"
,
"
VP
"
,
"
VJ
"
]
tri_pos_list
=
[
"
VVV
"
,
"
VVR
"
,
"
VRV
"
,
"
VVR
"
,
"
VRR
"
,
"
RVV
"
,
"
VNR
"
,
"
VIN
"
,
"
VTV
"
,
"
VIP
"
]
excl_N_tri_pos_list
=
[
"
VVN
"
,
"
VNN
"
,
"
VJN
"
,
"
VDN
"
,
"
RVN
"
]
# -JN = next tag is not J/N
excl_JN_tri_pos_list
=
[
"
VRJ
"
,
"
VVJ
"
,
"
VRJ
"
,
"
RVJ
"
]
# generate possible pos-tag comintations
phrase_patterns
=
[]
excl_N_phrase_patterns
=
[]
excl_JN_phrase_patterns
=
[]
for
a
in
verb_phrase_list
:
for
b
in
uni_pos_list
:
phrase_patterns
.
append
(
a
+
b
)
for
c
in
bi_pos_list
:
phrase_patterns
.
append
(
a
+
c
)
for
d
in
tri_pos_list
:
phrase_patterns
.
append
(
a
+
d
)
for
e
in
excl_N_tri_pos_list
:
excl_N_phrase_patterns
.
append
(
a
+
e
)
for
f
in
excl_JN_tri_pos_list
:
excl_JN_phrase_patterns
.
append
(
a
+
f
)
contrasts
=
0
candidates
=
[]
# go through all tags and find phrases: VB + tag-combo of list above
for
i
in
range
(
len
(
tagged
)):
if
i
+
4
<=
len
(
tagged
):
# phrase should begin with verb
if
tagged
[
i
][
1
]
==
'
VB
'
:
uni_pos
=
tagged
[
i
+
1
][
1
]
bi_pos
=
tagged
[
i
+
1
][
1
]
+
tagged
[
i
+
2
][
1
]
#tri_pos = tagged[i+1][1] + tagged[i+2][1] + tagged[i+3][1]
#if tri_pos in tri_pos_list:
if
bi_pos
in
bi_pos_list
:
phrase
=
tagged
[
i
:(
i
+
3
)]
candidates
.
append
(
phrase
)
# get all phrases matching the patterns
#TODO: elim doubles
for
i
in
range
(
len
(
tags_only
)):
fourgram
=
""
.
join
(
tags_only
[
i
:(
i
+
4
)])
trigram
=
""
.
join
(
tags_only
[
i
:(
i
+
3
)])
bigram
=
""
.
join
(
tags_only
[
i
:(
i
+
2
)])
if
fourgram
in
phrase_patterns
:
candidates
.
append
(
get_phrase
(
i
,
4
,
tokens_only
,
tags_only
))
elif
fourgram
in
excl_N_phrase_patterns
:
try
:
if
tokens_only
[
i
+
4
]
!=
'
N
'
:
candidates
.
append
(
get_phrase
(
i
,
4
,
tokens_only
,
tags_only
))
except
IndexError
:
candidates
.
append
(
get_phrase
(
i
,
4
,
tokens_only
,
tags_only
))
elif
fourgram
in
excl_JN_phrase_patterns
:
try
:
if
tokens_only
[
i
+
4
]
!=
'
N
'
and
tokens_only
[
i
+
4
]
!=
'
J
'
:
candidates
.
append
(
get_phrase
(
i
,
4
,
tokens_only
,
tags_only
))
except
IndexError
:
candidates
.
append
(
get_phrase
(
i
,
4
,
tokens_only
,
tags_only
))
elif
trigram
in
phrase_patterns
:
candidates
.
append
(
get_phrase
(
i
,
3
,
tokens_only
,
tags_only
))
elif
uni_pos
in
uni_pos_list
:
elif
bigram
in
phrase_patterns
:
candidates
.
append
(
get_phrase
(
i
,
2
,
tokens_only
,
tags_only
))
phrase
=
tagged
[
i
:(
i
+
2
)]
candidates
.
append
(
phrase
)
# determine sentiment of extracted phrased
if
candidates
!=
[]:
for
phrase
in
candidates
:
verb
=
phrase
[
0
][
0
]
situation
=
""
for
word
in
phrase
[
1
:
len
(
phrase
)]:
situation
+=
word
[
0
]
+
"
"
verb
=
phrase
[
0
]
situation
=
phrase
[
1
]
analyser
=
SentimentIntensityAnalyzer
()
sent_verb
=
analyser
.
polarity_scores
(
verb
)[
'
compound
'
]
sent_situation
=
analyser
.
polarity_scores
(
situation
)[
'
compound
'
]
#if (sent_verb > 0.0 and sent_situation < 0.0) or (sent_verb < 0.0 and sent_situation > 0.0):
print
(
"
phrase: {} {} sent verb: {} sent situation: {}
"
.
format
(
verb
,
situation
,
sent_verb
,
sent_situation
))
if
(
sent_verb
>
0.0
and
sent_situation
<
0.0
)
or
(
sent_verb
<
0.0
and
sent_situation
>
0.0
):
#print("phrase: {} {} sent verb: {} sent situation: {}".format(verb, situation, sent_verb, sent_situation))
contrasts
+=
1
return
np
.
array
([
contrasts
])
if
__name__
==
'
__main__
'
:
corpus
=
corpus
.
read_corpus
(
"
corpus_shuffled.csv
"
)
[:
1000
]
corpus
=
corpus
.
read_corpus
(
"
corpus_shuffled.csv
"
)
for
instance
in
corpus
:
extract
(
instance
)
\ No newline at end of file
extract
(
instance
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment