Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
Data Augmentation for Metonymy Resolution
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
friebolin
Data Augmentation for Metonymy Resolution
Commits
655d7e11
Commit
655d7e11
authored
2 years ago
by
friebolin
Browse files
Options
Downloads
Patches
Plain Diff
Update inference
parent
4466681b
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Code/inference.py
+50
-15
50 additions, 15 deletions
Code/inference.py
documentation/.DS_Store
+0
-0
0 additions, 0 deletions
documentation/.DS_Store
with
50 additions
and
15 deletions
Code/inference.py
+
50
−
15
View file @
655d7e11
"""
Demo for inference: User enters a sentence and our trained BERT model predicts if the target word is literal or non-literal
"""
import
json
import
torch
import
preprocess
...
...
@@ -8,34 +10,67 @@ import train
from
torch.utils.data
import
DataLoader
,
RandomSampler
# Get user input
print
(
"
Enter a sentence:
"
)
print
(
"
Enter a sentence
and enclose the target word(s) between asteriks (e.g.
\"
I love *New York*
\"
)
:
"
)
sentence
=
input
()
sentence
=
sentence
.
split
()
print
(
"
Specify the target word position using square brackets (e.g. [0,2])
"
)
target_pos
=
input
()
target_json
=
json
.
loads
(
target_pos
)
print
(
type
(
target_json
))
print
(
"
Enter the label: 0 for literal, 1 for non-literal
"
)
def
extract_target_words
(
input_string
):
target_words
=
[]
pattern
=
r
'
\*(.*?)\*
'
matches
=
re
.
findall
(
pattern
,
input_string
)
for
match
in
matches
:
target_words
.
append
(
match
.
strip
())
return
target_words
target_word
=
extract_target_words
(
sentence
)
split_target
=
target_word
[
0
].
split
()
def
remove_asterisks_and_split
(
input_string
):
pattern
=
r
"
\*
"
# Remove asterisks and split the input string into a list of words
words
=
re
.
sub
(
pattern
,
""
,
input_string
).
split
()
return
words
split_sentence
=
remove_asterisks_and_split
(
sentence
)
def
find_target_position
(
split_sentence
,
split_target
):
start
=
-
1
end
=
-
1
for
i
in
range
(
len
(
split_sentence
)):
if
split_sentence
[
i
:
i
+
len
(
split_target
)]
==
split_target
:
start
=
i
end
=
i
+
len
(
split_target
)
-
1
break
return
[
start
,
end
+
1
]
pos
=
find_target_position
(
split_sentence
,
split_target
)
target_json
=
json
.
loads
(
pos
)
print
(
f
"
The target word is
{
target_word
}
and at the position
{
pos
}
.
"
)
print
(
"
Now enter the label: 0 for literal, 1 for non-literal
"
)
label
=
int
(
input
())
#label_json=json.loads(label)
print
(
"
Is this your target word:
"
,
sentence
[
target_json
[
0
]:
target_json
[
1
]])
# Convert to data sample for BERT
data_sample
=
[{
"
sentence
"
:
sentence
,
"
pos
"
:
target_json
,
"
label
"
:
label
}]
print
(
data_sample
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"
bert-base-uncased
"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"
bert-base-uncased
"
)
input_as_dataset
=
preprocess
.
tokenizer_new
(
tokenizer
,
data_sample
,
max_length
=
512
)
# Load model
device
=
torch
.
device
(
'
cuda
'
if
torch
.
cuda
.
is_available
()
else
'
cpu
'
)
model
=
models
.
WordClassificationModel
.
from_pretrained
(
"
bert-base-uncased
"
)
model_path
=
"
saved_models/bert_.pth
"
model_path
=
"
saved_models/bert.pth
"
model
=
torch
.
load
(
model_path
,
map_location
=
device
)
model
.
eval
()
model
.
eval
()
train_sampler
=
RandomSampler
(
data_sample
)
train_dataloader
=
DataLoader
(
data_sample
,
sampler
=
train_sampler
,
batch_size
=
1
)
...
...
@@ -52,9 +87,9 @@ for batch in train_dataloader:
end_positions
=
batch
[
4
]
outputs
=
model
(
**
inputs
)
prediction
=
torch
.
argmax
(
outputs
[
0
])
if
predic
i
ton
==
1
:
if
predict
i
on
==
1
:
print
(
"
metonymy
"
)
elif
predic
i
ton
==
0
:
elif
predict
i
on
==
0
:
print
(
"
literal
"
)
#print("Outputs: ",
This diff is collapsed.
Click to expand it.
documentation/.DS_Store
+
0
−
0
View file @
655d7e11
No preview for this file type
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment