Skip to content
Snippets Groups Projects
Commit 101221a6 authored by kulcsar's avatar kulcsar
Browse files

mixup epoch 4 test

parent 1d068c58
No related branches found
No related tags found
No related merge requests found
Namespace(architecture='bert-base-uncased', model_type='one', train_dataset='../datasets/data_splits/semeval_train.txt', test_dataset='../datasets/data_splits/semeval_test.txt', tokenizer='swp', tcontext=False, vcontext=False, masking=False, max_length=512, train_loop='swp', epochs=5, learning_rate=5e-05, random_seed=1, batch_size=10, gradient_accumulation_steps=15, mix_up=True, threshold=0.1, lambda_value=0.2, mixup_epoch=4, test_batch_size=10, save_directory='./Results/bert_base_rs1_mixup_threshold_01_lambda_02_semeval_epoch4.txt'){'accuracy': 0.8964757709251101, 'f1': 0.7267441860465117, 'precision': 0.7961783439490446, 'recall': 0.6684491978609626}{'accuracy': 1.0, 'f1': 1.0, 'precision': 1.0, 'recall': 1.0}
\ No newline at end of file
Namespace(architecture='bert-base-uncased', model_type='one', train_dataset='../datasets/data_splits/semeval_train.txt', test_dataset='../datasets/data_splits/semeval_test.txt', tokenizer='swp', tcontext=False, vcontext=False, masking=False, max_length=512, train_loop='swp', epochs=5, learning_rate=5e-05, random_seed=38, batch_size=10, gradient_accumulation_steps=15, mix_up=True, threshold=0.1, lambda_value=0.2, mixup_epoch=4, test_batch_size=10, save_directory='./Results/bert_base_rs38_mixup_threshold_01_lambda_02_semeval_epoch4.txt'){'accuracy': 0.8799559471365639, 'f1': 0.6646153846153846, 'precision': 0.782608695652174, 'recall': 0.5775401069518716}{'accuracy': 1.0, 'f1': 1.0, 'precision': 1.0, 'recall': 1.0}
\ No newline at end of file
Namespace(architecture='bert-base-uncased', model_type='one', train_dataset='../datasets/data_splits/semeval_train.txt', test_dataset='../datasets/data_splits/semeval_test.txt', tokenizer='swp', tcontext=False, vcontext=False, masking=False, max_length=512, train_loop='swp', epochs=5, learning_rate=5e-05, random_seed=42, batch_size=10, gradient_accumulation_steps=15, mix_up=True, threshold=0.1, lambda_value=0.2, mixup_epoch=4, test_batch_size=10, save_directory='./Results/bert_base_rs42_mixup_threshold_01_lambda_02_semeval_epoch4.txt'){'accuracy': 0.8832599118942731, 'f1': 0.6936416184971098, 'precision': 0.7547169811320755, 'recall': 0.6417112299465241}{'accuracy': 1.0, 'f1': 1.0, 'precision': 1.0, 'recall': 1.0}
\ No newline at end of file
Namespace(architecture='bert-base-uncased', model_type='one', train_dataset='../datasets/data_splits/semeval_train.txt', test_dataset='../datasets/data_splits/semeval_test.txt', tokenizer='swp', tcontext=False, vcontext=False, masking=False, max_length=512, train_loop='swp', epochs=5, learning_rate=5e-05, random_seed=666, batch_size=10, gradient_accumulation_steps=15, mix_up=True, threshold=0.1, lambda_value=0.2, mixup_epoch=4, test_batch_size=10, save_directory='./Results/bert_base_rs666_mixup_threshold_01_lambda_02_semeval_epoch4.txt'){'accuracy': 0.8887665198237885, 'f1': 0.705539358600583, 'precision': 0.7756410256410257, 'recall': 0.6470588235294118}{'accuracy': 1.0, 'f1': 1.0, 'precision': 1.0, 'recall': 1.0}
\ No newline at end of file
Namespace(architecture='bert-base-uncased', model_type='one', train_dataset='../datasets/data_splits/semeval_train.txt', test_dataset='../datasets/data_splits/semeval_test.txt', tokenizer='swp', tcontext=False, vcontext=False, masking=False, max_length=512, train_loop='swp', epochs=5, learning_rate=5e-05, random_seed=99, batch_size=10, gradient_accumulation_steps=15, mix_up=True, threshold=0.1, lambda_value=0.2, mixup_epoch=4, test_batch_size=10, save_directory='./Results/bert_base_rs99_mixup_threshold_01_lambda_02_semeval_epoch4.txt'){'accuracy': 0.8931718061674009, 'f1': 0.717201166180758, 'precision': 0.7884615384615384, 'recall': 0.6577540106951871}{'accuracy': 1.0, 'f1': 1.0, 'precision': 1.0, 'recall': 1.0}
\ No newline at end of file
......@@ -67,7 +67,7 @@ def run(raw_args):
#train...
print("training..")
if args.train_loop=="swp":
evaluation_test, evaluation_train = train.train(model, args.architecture,args.random_seed, args.gradient_accumulation_steps, args.mix_up, args.threshold, args.lambda_value, train_dataset, test_dataset, args.epochs, args.learning_rate, args.batch_size, args.test_batch_size)
evaluation_test, evaluation_train = train.train(model, args.architecture,args.random_seed, args.gradient_accumulation_steps, args.mix_up, args.threshold, args.lambda_value, args.mixup_epoch, train_dataset, test_dataset, args.epochs, args.learning_rate, args.batch_size, args.test_batch_size)
elif args.train_loop=="salami":
evaluation_test = train.train_salami(model,args.random_seed, train_dataset, test_dataset, args.batch_size, args.test_batch_size, args.learning_rate, args.epochs)
else:
......@@ -201,6 +201,13 @@ if __name__ == "__main__":
type=float,
default=0.4)
parser.add_argument(
"-mixup_epoch",
"--mixup_epoch",
help="specify the epoch(s) in which to apply mixup",
type=int,
default=1)
#Test arguments
parser.add_argument(
......
......@@ -22,7 +22,7 @@ torch.cuda.empty_cache()
def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda_value, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda_value,mixup_epoch, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
"""Write Train loop for model with certain train dataset"""
#set_seed(seed)
#if model_name[0] == "b":
......@@ -74,8 +74,10 @@ def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda
#print("length of outputs; ", len(outputs))
#for i in range(len(outputs)):
print("Loss: ", loss)
if mixup == True:
loss.backward(retain_graph=True)
if mixup == True: #and epoch>=mixup_epoch-1:
#loss.backward(retain_graph=True)
loss.backward()
print("epoch: {0}, retained".format(epoch))
else:
loss.backward()
#if (index+1)%gradient_accumulation_steps==0:
......@@ -84,29 +86,29 @@ def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda
optimizer.zero_grad()
model.zero_grad()
# # print("outputs {0}: {1}".format(i, outputs[i].size()))
if mixup == True:
#print("length of outputs: ", len(outputs))
new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
#for matrix in new_matrix_batch
new_matrix_batch.to("cuda")
new_labels_batch.to("cuda")
span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
for i in range(new_matrix_batch.shape[0]):
span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
print("span output size: ", span_output.size())
print("span output: ", span_output)
logits=model.classifier(span_output)
print("logits: ", logits.size())
print("labels size: ", new_labels_batch.size())
loss_fct = CrossEntropyLoss()
loss=loss_fct(logits.view(-1, 2).to("cuda"), new_labels_batch.view(-1).to("cuda"))
print("MixUp: ", loss)
#update entire model
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.zero_grad()
if epoch==mixup_epoch:
if mixup == True:
#print("length of outputs: ", len(outputs))
new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
#for matrix in new_matrix_batch
new_matrix_batch.to("cuda")
new_labels_batch.to("cuda")
span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
for i in range(new_matrix_batch.shape[0]):
span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
#print("span output size: ", span_output.size())
#print("span output: ", span_output)
logits=model.classifier(span_output.detach()) #print("logits: ", logits.size())
#print("labels size: ", new_labels_batch.size())
loss_fct = CrossEntropyLoss()
loss_2=loss_fct(logits.view(-1, 2).to("cuda"), new_labels_batch.view(-1).to("cuda"))
print("MixUp: ", loss_2)
#update entire model
loss_2.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.zero_grad()
#print(outputs[2].size())
......@@ -144,7 +146,7 @@ def mixup_function(batch_of_matrices, batch_of_labels, l, t):
counter+=2
results=torch.stack(results)
result_labels=torch.LongTensor(result_labels)
print("mixup done")
#print("mixup done")
return results, result_labels
def interpolate(matrix1, label1, matrix2, label2, l, threshold):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment