Skip to content
Snippets Groups Projects
Commit 926e4750 authored by friebolin's avatar friebolin
Browse files
parents 75b239bb d7a0226f
No related branches found
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -45,7 +45,7 @@ def evaluate_model(model, name,test_dataset, batch_size, imdb=False):
with torch.no_grad():
if name[0] == "b":
if imdb==False:
print("Evaluating Bert model")
#print("Evaluating Bert model")
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
......@@ -53,14 +53,14 @@ def evaluate_model(model, name,test_dataset, batch_size, imdb=False):
'end_position': batch[4],
'labels': batch[5]}
elif imdb==True:
print("Evaluating Bert model on imdb")
#print("Evaluating Bert model on imdb")
inputs={'input_ids':batch[0],
'attention_mask':batch[1],
'token_type_ids':batch[2],
'labels':batch[3]}
if name[0] == "r":
print("Evaluating roberta model")
#print("Evaluating roberta model")
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'start_position': batch[2],
......
......@@ -6,7 +6,7 @@ import evaluate
import json
import random
import math
import train
import Code.train
import copy
from tqdm.auto import tqdm
from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig, BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer, AutoConfig
......@@ -117,7 +117,7 @@ class WordClassificationModel(torch.nn.Module):
if self.tmix==True and mixepoch == True:
outputs = (logits,) + outputs[2:]
loss = train.cross_entropy(logits[:math.floor((logits.size()[0]/2))], outputs[1][:math.floor((outputs[1].size()[0]/2))], lambda_value) #special CEL for soft labels
loss = Code.train.cross_entropy(logits[:math.floor((logits.size()[0]/2))], outputs[1][:math.floor((outputs[1].size()[0]/2))], lambda_value) #special CEL for soft labels
outputs = (loss,) + outputs
else:
......
......@@ -218,7 +218,7 @@ def tokenizer_new(tokenizer, input, max_length, masked=False, old_dataset=False,
else:
all_token_type_ids.append(context_token_type_ids)
if tokenizer.name_or_path[0] =="r":
print("roberta tokenizer")
dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to("cuda") ,
torch.tensor(all_attention_masks, dtype=torch.long).to("cuda") ,
......
import torch
import tqdm
import numpy as np
import evaluation
import Code.evaluation
import evaluate
import json
import random
import math
from tqdm.auto import tqdm
from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig, BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer
from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig, BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer, Trainer, TrainingArguments
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from transformers import AdamW, get_scheduler
from torch import nn
......@@ -27,28 +27,29 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_
"""Train loop for models. Iterates over epochs and batches and gives inputs to model. After training, call evaluation.py for evaluation of finetuned model.
Params:
model: model out of models.py
name: str
train_dataset: Dataset
test_dataset: Dataset
seed: int
batch_size:
test_batch_size:
num_epochs: int
imdb: bool
mixup: bool
lambda_value: float
mixepoch:int
tmix: bool
mixlayer: int in {0, 11}
learning_rate: float
mlp_leaning_rate:float
model: model out of models.py ->WordClassificationModel, BertForWordClassification or RobertaForWordClassification
name: str -> specifies architecture of model (either bert-base-uncased or roberta-base)
train_dataset: Dataset -> Train dataset as Torch.Dataset Object (created in preprocess.py)
test_dataset: Dataset ->Test dataset as Torch.Dataset Object (created in preprocess.py)
seed: int -> Random seed
batch_size: ->batch size for training
test_batch_size: -> batch size for testing
num_epochs: int -> number of epochs
imdb: bool ->whether or not imdb dataset is used
mixup: bool ->whether or not to use mixup in training
lambda_value: float ->if mixup or tmix selected, what lambda value to use
mixepoch:int -> specifies in what epoch to use mixup
tmix: bool ->whether or not tmix is used in training (used to differentiate between mixing in training and not mixing in evaluation)
mixlayer: int in {0, 11} ->what layer to mix in tmix
learning_rate: float ->learning rate for Bert/Roberta Model, or WordClassificationModel including linear classifier
mlp_leaning_rate:float ->separate learning rate for multi layer perceptron
Returns: Evaluation Results for train and test dataset in Accuracy, F1, Precision and Recall"""
model.train().to(device)
train_sampler = RandomSampler(train_dataset)
train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size, shuffle=True)
train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
num_training_steps=num_epochs*len(train_dataloader)
if mlp_learning_rate==None:
......@@ -76,11 +77,15 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_
'start_position': batch[3],
'end_position': batch[4],
'labels': batch[5]}
labels=batch[5]
start_positions=batch[3]
end_positions=batch[4]
if imdb==True:
inputs={'input_ids':batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
'labels': batch[3]}
if tmix==True:
if imdb == False:
print("this is mixup epoch")
......@@ -93,6 +98,9 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_
'mixepoch': True,
'mixlayer':mixlayer,
'lambda_value':lambda_value}
labels=batch[5]
start_positions=batch[3]
end_positions=batch[4]
if imdb==True:
print("this is a mixup epoch with imdb")
inputs={'input_ids':batch[0],
......@@ -123,7 +131,7 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_
model.zero_grad()
if epoch==mixepoch:
#print("mixepoch")
print("mixepoch")
if mixup == True:
#calculate new last hidden states and predictions(logits)
new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value)
......@@ -155,7 +163,7 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_
return evaluation_test, evaluation_train
def cross_entropy(logits, target):
def cross_entropy(logits, target, l):
"""
Computes the cross-entropy loss between the predicted logits and the target labels.
......@@ -178,10 +186,10 @@ def cross_entropy(logits, target):
if value == 1 or value == 0: #check if non-mixed label
one_hot = torch.tensor([1-value,value], device='cuda:0') #creating one-hot vector e.g. [0. ,1.]
loss_clear_labels = -((one_hot[0] * logprobs[0][0]) + (one_hot[1] * logprobs[0][1]))
#calculation with indexing (- 1-label * )
#calculation with indexing
results = torch.cat((loss_clear_labels.view(1), results), dim=0)
else:
mixed_vec = torch.tensor([value, 1-value]) #creating on-hot mixed vec.
mixed_vec = torch.tensor([l, 1-l]) #creating on-hot mixed vec.
logprobs = torch.nn.functional.log_softmax(lg, dim=1)#logits in log probabilities
loss_mixed_labels = -((mixed_vec[0] * logprobs[0][0]) + (mixed_vec[1] * logprobs[0][1]))
#calculation for mixed with indexing
......@@ -219,7 +227,7 @@ def mixup_function(batch_of_matrices, batch_of_labels, l):
return results, result_labels
def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs):
def train_salami(model,name, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs):
"""Train loop of the salami group"""
results=[]
training_args = TrainingArguments(
......@@ -243,7 +251,7 @@ def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size,
train_dataset=train_set,
eval_dataset=test_set,
args=training_args,
compute_metrics=evaluation.evaluate_model
compute_metrics=Code.evaluation.compute_metrics
)
trainer.train()
......
......@@ -67,12 +67,12 @@ def run(raw_args):
if args.train_loop=="swp":
evaluation_test, evaluation_train = Code.train.train(model, args.architecture, train_dataset, test_dataset, args.random_seed,args.batch_size, args.test_batch_size,args.epochs,args.imdb, args.mix_up, args.lambda_value, args.mixepoch, args.tmix, args.mixlayer, args.learning_rate, args.second_learning_rate, args.model_save_path)
elif args.train_loop=="salami":
evaluation_test = Code.train.train_salami(model,args.random_seed, train_dataset, test_dataset, args.batch_size, args.test_batch_size, args.learning_rate, args.epochs)
evaluation_test = Code.train.train_salami(model,args.architecture, args.random_seed, train_dataset, test_dataset, args.batch_size, args.test_batch_size, args.learning_rate, args.epochs)
else:
print("no eligible train loop selected")
#save
if isinstance(args.save_directory, str):
#save
if args.save_directory !=None:
with open(args.save_directory, "x") as f:
f.write(str(args))
f.write(str(evaluation_test))
......@@ -208,8 +208,7 @@ if __name__ == "__main__":
"-lambda",
"--lambda_value",
help="speficies the lambda value for mixup",
type=float,
default=0.4)
type=float)
parser.add_argument(
"-mixepoch",
......@@ -231,7 +230,8 @@ if __name__ == "__main__":
parser.add_argument(
"-sd",
"--save_directory",
help="Destination directory for the output results of the run")
help="Destination directory for the output results of the run",
default=None)
parser.add_argument(
"-msp",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment