Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
Data Augmentation for Metonymy Resolution
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
friebolin
Data Augmentation for Metonymy Resolution
Commits
ecb8e50c
Commit
ecb8e50c
authored
2 years ago
by
kulcsar
Browse files
Options
Downloads
Patches
Plain Diff
add interpolation
parent
31541e44
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
Code/main.py
+39
-19
39 additions, 19 deletions
Code/main.py
Code/models.py
+1
-1
1 addition, 1 deletion
Code/models.py
Code/train.py
+48
-7
48 additions, 7 deletions
Code/train.py
with
88 additions
and
27 deletions
Code/main.py
+
39
−
19
View file @
ecb8e50c
...
...
@@ -67,17 +67,17 @@ def run(raw_args):
#train...
print
(
"
training..
"
)
if
args
.
train_loop
==
"
swp
"
:
evaluation_test
,
evaluation_train
=
train
.
train
(
model
,
args
.
architecture
,
args
.
random_seed
,
train_dataset
,
test_dataset
,
args
.
epochs
,
args
.
learning_rate
,
args
.
batch_size
,
args
.
test_batch_size
)
evaluation_test
,
evaluation_train
=
train
.
train
(
model
,
args
.
architecture
,
args
.
random_seed
,
args
.
gradient_accumulation_steps
,
args
.
mix_up
,
train_dataset
,
test_dataset
,
args
.
epochs
,
args
.
learning_rate
,
args
.
batch_size
,
args
.
test_batch_size
)
elif
args
.
train_loop
==
"
salami
"
:
evaluation_test
=
train
.
train_salami
(
model
,
args
.
random_seed
,
train_dataset
,
test_dataset
,
args
.
batch_size
,
args
.
test_batch_size
,
args
.
learning_rate
,
args
.
epochs
)
else
:
print
(
"
no eligible train loop selected
"
)
#(evaluate... is done internally) but could maybe be implemented here to make average over multiple random seeds
with
open
(
args
.
save_directory
,
"
x
"
)
as
f
:
f
.
write
(
str
(
args
))
f
.
write
(
str
(
evaluation_test
))
f
.
write
(
str
(
evaluation_train
))
if
isinstance
(
args
.
save_directory
,
str
):
with
open
(
args
.
save_directory
,
"
x
"
)
as
f
:
f
.
write
(
str
(
args
))
f
.
write
(
str
(
evaluation_test
))
f
.
write
(
str
(
evaluation_train
))
print
(
"
saved and done
"
)
if
__name__
==
"
__main__
"
:
...
...
@@ -111,28 +111,30 @@ if __name__ == "__main__":
"
--tokenizer
"
,
choices
=
[
"
salami
"
,
"
li
"
,
"
swp
"
],
help
=
"
Which tokenizer to use when preprocessing the datasets
"
)
parser
.
add_argument
(
"
-tc
"
,
"
--tcontext
"
,
#
action="store_
fals
e",
default
=
False
,
type
=
bool
,
help
=
"
whe
a
ther or not to preprocess train set with context
"
)
action
=
"
store_
tru
e
"
,
#
default=False,
#
type=bool,
help
=
"
whether or not to preprocess train set with context
"
)
parser
.
add_argument
(
"
-vc
"
,
"
--vcontext
"
,
default
=
False
,
type
=
bool
,
help
=
"
wheather or not to preprocess the test set with context
"
)
#default=False,
#type=bool,
action
=
"
store_true
"
,
help
=
"
whether or not to preprocess the test set with context
"
)
parser
.
add_argument
(
"
-m
"
,
"
--masking
"
,
default
=
False
,
type
=
bool
,
#
action="store_
fals
e",
help
=
"
whe
a
ther or not to mask the target word
"
)
#
default=False,
#
type=bool,
action
=
"
store_
tru
e
"
,
help
=
"
whether or not to mask the target word
"
)
parser
.
add_argument
(
"
-max
"
,
"
--max_length
"
,
...
...
@@ -145,28 +147,46 @@ if __name__ == "__main__":
"
--train_loop
"
,
choices
=
[
"
salami
"
,
"
swp
"
],
help
=
"
Which Train loop to use
"
)
parser
.
add_argument
(
"
-e
"
,
"
--epochs
"
,
type
=
int
,
help
=
"
Number of epochs for training
"
)
parser
.
add_argument
(
"
-lr
"
,
"
--learning_rate
"
,
type
=
float
,
help
=
"
Learning rate for training
"
)
parser
.
add_argument
(
"
-rs
"
,
"
--random_seed
"
,
type
=
int
,
default
=
42
,
help
=
"
Random seed for initialization of model
"
)
parser
.
add_argument
(
"
-b
"
,
"
--batch_size
"
,
help
=
"
The batch size for the training process
"
,
type
=
int
,
default
=
16
)
default
=
32
)
parser
.
add_argument
(
"
-gras
"
,
"
--gradient_accumulation_steps
"
,
help
=
"
gradient accumulation steps for training
"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"
-mixup
"
,
"
--mix_up
"
,
help
=
"
whether or not to apply mixup during training
"
,
action
=
"
store_true
"
)
#Test arguments
parser
.
add_argument
(
...
...
@@ -174,7 +194,7 @@ if __name__ == "__main__":
"
--test_batch_size
"
,
help
=
"
The batch size for the training process
"
,
type
=
int
,
default
=
6
4
)
default
=
1
6
)
#Save and Organisation
parser
.
add_argument
(
...
...
This diff is collapsed.
Click to expand it.
Code/models.py
+
1
−
1
View file @
ecb8e50c
...
...
@@ -62,7 +62,7 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib
span_output
[
i
]
=
output
[
i
][
start_position
[
i
]:
end_position
[
i
]].
mean
(
dim
=
0
)
logits
=
self
.
classifier
(
span_output
)
outputs
=
(
logits
,)
+
outputs
[
2
:]
outputs
=
(
logits
,)
+
outputs
[:
2
]
if
labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
...
...
This diff is collapsed.
Click to expand it.
Code/train.py
+
48
−
7
View file @
ecb8e50c
...
...
@@ -22,13 +22,16 @@ torch.cuda.empty_cache()
def
train
(
model
,
seed
,
train_dataset
,
test_dataset
,
num_epochs
,
learning_rate
,
batch_size
,
test_batch_size
):
def
train
(
model
,
name
,
seed
,
gradient_accumulation_steps
,
mixup
,
train_dataset
,
test_dataset
,
num_epochs
,
learning_rate
,
batch_size
,
test_batch_size
):
"""
Write Train loop for model with certain train dataset
"""
#set_seed(seed)
#if model_name[0] == "b":
# model=BertForWordClassification.from_pretrained(model_name).to("cuda")
#elif model_name[0] == "r":
# model=RobertaForWordClassification.from_pretrained(model_name),to("cuda")
print
(
"
batch size:
"
,
batch_size
)
print
(
"
test batch size:
"
,
test_batch_size
)
print
(
"
mix up:
"
,
mixup
)
model
.
train
().
to
(
"
cuda
"
)
train_sampler
=
RandomSampler
(
train_dataset
)
train_dataloader
=
DataLoader
(
train_dataset
,
sampler
=
train_sampler
,
batch_size
=
batch_size
)
...
...
@@ -43,24 +46,38 @@ def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, b
for
epoch
in
range
(
num_epochs
):
#for param_tensor in model.state_dict():
# print(param_tensor, "\t", model.state_dict()[param_tensor])
index
=
0
for
batch
in
train_dataloader
:
if
model
.
name_or_path
[
0
]
==
"
b
"
:
if
name
[
0
]
==
"
b
"
:
inputs
=
{
'
input_ids
'
:
batch
[
0
],
'
attention_mask
'
:
batch
[
1
],
'
token_type_ids
'
:
batch
[
2
],
'
start_position
'
:
batch
[
3
],
'
end_position
'
:
batch
[
4
],
'
labels
'
:
batch
[
5
]}
if
model
.
name_or_path
[
0
]
==
"
r
"
:
labels
=
batch
[
5
]
if
name
[
0
]
==
"
r
"
:
inputs
=
{
'
input_ids
'
:
batch
[
0
],
'
attention_mask
'
:
batch
[
1
],
'
start_position
'
:
batch
[
2
],
'
end_position
'
:
batch
[
3
],
'
labels
'
:
batch
[
4
]}
labels
=
batch
[
4
]
outputs
=
model
(
**
inputs
)
#print("outputs: ", outputs)
#print("outputs 0: ", outputs[0])
loss
=
outputs
[
0
]
print
(
"
length of outputs;
"
,
len
(
outputs
))
for
i
in
range
(
len
(
outputs
)):
print
(
"
outputs {0}: {1}
"
.
format
(
i
,
outputs
[
i
].
size
()))
if
mixup
==
True
:
#print("length of outputs: ", len(outputs))
mixup_function
(
outputs
[
2
],
labels
)
#print(outputs[2].size())
#print(outputs[0].size())
loss
.
backward
()
#if (index+1)%gradient_accumulation_steps==0:
optimizer
.
step
()
lr_scheduler
.
step
()
optimizer
.
zero_grad
()
...
...
@@ -69,8 +86,8 @@ def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, b
#print("one epoch done")
#print(model_name)
evaluation_test
=
evaluation
.
evaluate_model
(
model
,
test_dataset
,
learning_rate
,
test_batch_size
)
evaluation_train
=
evaluation
.
evaluate_model
(
model
,
train_dataset
,
learning_rate
,
test_batch_size
)
evaluation_test
=
evaluation
.
evaluate_model
(
model
,
name
,
test_dataset
,
learning_rate
,
test_batch_size
)
evaluation_train
=
evaluation
.
evaluate_model
(
model
,
name
,
train_dataset
,
learning_rate
,
test_batch_size
)
print
(
"
DEV:
"
,
evaluation_test
)
print
(
"
TRAIN:
"
,
evaluation_train
)
...
...
@@ -79,7 +96,31 @@ def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, b
def
mixup_function
(
batch_of_matrices
,
batch_of_labels
):
runs
=
math
.
floor
(
batch_of_matrices
.
size
()[
0
]
/
2
)
counter
=
0
for
i
in
range
(
runs
):
print
(
"
doing interpolation...
"
)
matrix1
=
batch_of_matrices
[
counter
]
label1
=
batch_of_labels
[
counter
]
matrix2
=
batch_of_matrices
[
counter
+
1
]
label2
=
batch_of_labels
[
counter
+
1
]
interpolate
(
matrix1
,
label1
,
matrix2
,
label2
,
0.4
,
0.05
)
counter
+=
2
print
(
"
mixup done
"
)
def
interpolate
(
matrix1
,
label1
,
matrix2
,
label2
,
l
,
threshold
):
new_matrix
=
(
matrix1
*
l
)
+
(
matrix2
*
(
1
-
l
))
new_label
=
(
label1
*
l
)
+
(
label2
*
(
1
-
l
))
if
new_label
>
0.5
+
threshold
:
new_label
=
1
elif
new_label
<
0.5
-
threshold
:
new_label
=
0
else
:
print
(
"
in undefinded zone
"
)
return
None
return
new_matrix
,
new_label
def
train_salami
(
model
,
seed
,
train_set
,
test_set
,
batch_size
,
test_batch_size
,
learning_rate
,
epochs
):
results
=
[]
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment