Skip to content
Snippets Groups Projects
Commit ecb8e50c authored by kulcsar's avatar kulcsar
Browse files

add interpolation

parent 31541e44
No related branches found
No related tags found
No related merge requests found
......@@ -67,17 +67,17 @@ def run(raw_args):
#train...
print("training..")
if args.train_loop=="swp":
evaluation_test, evaluation_train = train.train(model,args.architecture,args.random_seed, train_dataset, test_dataset, args.epochs, args.learning_rate, args.batch_size, args.test_batch_size)
evaluation_test, evaluation_train = train.train(model, args.architecture,args.random_seed, args.gradient_accumulation_steps, args.mix_up, train_dataset, test_dataset, args.epochs, args.learning_rate, args.batch_size, args.test_batch_size)
elif args.train_loop=="salami":
evaluation_test = train.train_salami(model,args.random_seed, train_dataset, test_dataset, args.batch_size, args.test_batch_size, args.learning_rate, args.epochs)
else:
print("no eligible train loop selected")
#(evaluate... is done internally) but could maybe be implemented here to make average over multiple random seeds
with open(args.save_directory, "x") as f:
f.write(str(args))
f.write(str(evaluation_test))
f.write(str(evaluation_train))
if isinstance(args.save_directory, str):
with open(args.save_directory, "x") as f:
f.write(str(args))
f.write(str(evaluation_test))
f.write(str(evaluation_train))
print("saved and done")
if __name__ == "__main__":
......@@ -111,28 +111,30 @@ if __name__ == "__main__":
"--tokenizer",
choices=["salami", "li", "swp"],
help="Which tokenizer to use when preprocessing the datasets")
parser.add_argument(
"-tc",
"--tcontext",
#action="store_false",
default=False,
type=bool,
help="wheather or not to preprocess train set with context")
action="store_true",
#default=False,
#type=bool,
help="whether or not to preprocess train set with context")
parser.add_argument(
"-vc",
"--vcontext",
default=False,
type=bool,
help="wheather or not to preprocess the test set with context")
#default=False,
#type=bool,
action="store_true",
help="whether or not to preprocess the test set with context")
parser.add_argument(
"-m",
"--masking",
default=False,
type=bool,
#action="store_false",
help="wheather or not to mask the target word")
#default=False,
#type=bool,
action="store_true",
help="whether or not to mask the target word")
parser.add_argument(
"-max",
"--max_length",
......@@ -145,28 +147,46 @@ if __name__ == "__main__":
"--train_loop",
choices=["salami", "swp"],
help="Which Train loop to use")
parser.add_argument(
"-e",
"--epochs",
type=int,
help="Number of epochs for training")
parser.add_argument(
"-lr",
"--learning_rate",
type=float,
help="Learning rate for training")
parser.add_argument(
"-rs",
"--random_seed",
type=int,
default=42,
help="Random seed for initialization of model")
parser.add_argument(
"-b",
"--batch_size",
help="The batch size for the training process",
type=int,
default=16)
default=32)
parser.add_argument(
"-gras",
"--gradient_accumulation_steps",
help="gradient accumulation steps for training",
type=int,
default=1)
parser.add_argument(
"-mixup",
"--mix_up",
help="whether or not to apply mixup during training",
action="store_true")
#Test arguments
parser.add_argument(
......@@ -174,7 +194,7 @@ if __name__ == "__main__":
"--test_batch_size",
help="The batch size for the training process",
type=int,
default=64)
default=16)
#Save and Organisation
parser.add_argument(
......
......@@ -62,7 +62,7 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib
span_output[i] = output[i][start_position[i]:end_position[i]].mean(dim=0)
logits = self.classifier(span_output)
outputs = (logits,) + outputs[2:]
outputs = (logits,) + outputs[:2]
if labels is not None:
loss_fct = CrossEntropyLoss()
......
......@@ -22,13 +22,16 @@ torch.cuda.empty_cache()
def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
def train(model, name, seed,gradient_accumulation_steps,mixup, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
"""Write Train loop for model with certain train dataset"""
#set_seed(seed)
#if model_name[0] == "b":
# model=BertForWordClassification.from_pretrained(model_name).to("cuda")
#elif model_name[0] == "r":
# model=RobertaForWordClassification.from_pretrained(model_name),to("cuda")
print("batch size: ", batch_size)
print("test batch size: ", test_batch_size)
print("mix up: ", mixup)
model.train().to("cuda")
train_sampler = RandomSampler(train_dataset)
train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
......@@ -43,24 +46,38 @@ def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, b
for epoch in range(num_epochs):
#for param_tensor in model.state_dict():
# print(param_tensor, "\t", model.state_dict()[param_tensor])
index=0
for batch in train_dataloader:
if model.name_or_path[0] == "b":
if name[0] == "b":
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
'start_position': batch[3],
'end_position': batch[4],
'labels': batch[5]}
if model.name_or_path[0] == "r":
labels=batch[5]
if name[0] == "r":
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'start_position': batch[2],
'end_position': batch[3],
'labels': batch[4]}
labels = batch[4]
outputs=model(**inputs)
#print("outputs: ", outputs)
#print("outputs 0: ", outputs[0])
loss=outputs[0]
print("length of outputs; ", len(outputs))
for i in range(len(outputs)):
print("outputs {0}: {1}".format(i, outputs[i].size()))
if mixup == True:
#print("length of outputs: ", len(outputs))
mixup_function(outputs[2], labels)
#print(outputs[2].size())
#print(outputs[0].size())
loss.backward()
#if (index+1)%gradient_accumulation_steps==0:
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
......@@ -69,8 +86,8 @@ def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, b
#print("one epoch done")
#print(model_name)
evaluation_test = evaluation.evaluate_model(model, test_dataset, learning_rate, test_batch_size)
evaluation_train = evaluation.evaluate_model(model, train_dataset, learning_rate, test_batch_size)
evaluation_test = evaluation.evaluate_model(model, name, test_dataset, learning_rate, test_batch_size)
evaluation_train = evaluation.evaluate_model(model, name, train_dataset, learning_rate, test_batch_size)
print("DEV: ", evaluation_test)
print("TRAIN: ", evaluation_train)
......@@ -79,7 +96,31 @@ def train(model, seed, train_dataset, test_dataset, num_epochs, learning_rate, b
def mixup_function(batch_of_matrices, batch_of_labels):
runs = math.floor(batch_of_matrices.size()[0]/2)
counter=0
for i in range(runs):
print("doing interpolation...")
matrix1=batch_of_matrices[counter]
label1=batch_of_labels[counter]
matrix2=batch_of_matrices[counter+1]
label2=batch_of_labels[counter+1]
interpolate(matrix1, label1, matrix2, label2, 0.4, 0.05)
counter+=2
print("mixup done")
def interpolate(matrix1, label1, matrix2, label2, l, threshold):
new_matrix=(matrix1*l)+(matrix2 * (1-l))
new_label=(label1*l)+(label2*(1-l))
if new_label > 0.5+threshold:
new_label=1
elif new_label < 0.5-threshold:
new_label=0
else:
print("in undefinded zone")
return None
return new_matrix, new_label
def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs):
results=[]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment