diff --git a/Code/__pycache__/evaluation.cpython-39.pyc b/Code/__pycache__/evaluation.cpython-39.pyc index dae64c5ab2f6a7dd905001c25ed0152e6cf0c655..9a7e021150bae6e0bbeaa94079b8785a9dceb07e 100644 Binary files a/Code/__pycache__/evaluation.cpython-39.pyc and b/Code/__pycache__/evaluation.cpython-39.pyc differ diff --git a/Code/__pycache__/models.cpython-39.pyc b/Code/__pycache__/models.cpython-39.pyc index 2390a8e8d639f4966c650a0b98e94675d6a98bd8..dbbd8c146ec7697832874ba03aa864c4ce287e4c 100644 Binary files a/Code/__pycache__/models.cpython-39.pyc and b/Code/__pycache__/models.cpython-39.pyc differ diff --git a/Code/__pycache__/preprocess.cpython-39.pyc b/Code/__pycache__/preprocess.cpython-39.pyc index 6b6fd34fb43f99c6a0898ce234ea5f46b52a0a4b..453b2918d23d60fe92b4ec6801b68ec13a3e3598 100644 Binary files a/Code/__pycache__/preprocess.cpython-39.pyc and b/Code/__pycache__/preprocess.cpython-39.pyc differ diff --git a/Code/__pycache__/train.cpython-39.pyc b/Code/__pycache__/train.cpython-39.pyc index 1a386b93ee4f4191efb8dda7fa30800056b3d3bc..e6950329e68a341eb011364c84304cd25eb49bcf 100644 Binary files a/Code/__pycache__/train.cpython-39.pyc and b/Code/__pycache__/train.cpython-39.pyc differ diff --git a/Code/evaluation.py b/Code/evaluation.py index 3ada77aaae3f344f9a4638846b2b97a68b14f693..6212fd1d728b7724133959897570a128347df814 100644 --- a/Code/evaluation.py +++ b/Code/evaluation.py @@ -45,7 +45,7 @@ def evaluate_model(model, name,test_dataset, batch_size, imdb=False): with torch.no_grad(): if name[0] == "b": if imdb==False: - print("Evaluating Bert model") + #print("Evaluating Bert model") inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], @@ -53,14 +53,14 @@ def evaluate_model(model, name,test_dataset, batch_size, imdb=False): 'end_position': batch[4], 'labels': batch[5]} elif imdb==True: - print("Evaluating Bert model on imdb") + #print("Evaluating Bert model on imdb") inputs={'input_ids':batch[0], 'attention_mask':batch[1], 'token_type_ids':batch[2], 'labels':batch[3]} if name[0] == "r": - print("Evaluating roberta model") + #print("Evaluating roberta model") inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'start_position': batch[2], diff --git a/Code/models.py b/Code/models.py index f488662e9b5a9c027e6de596fd58be90c7d3954f..6be2a5072379ea6cbda4b50d7a054d0f24777a73 100644 --- a/Code/models.py +++ b/Code/models.py @@ -6,7 +6,7 @@ import evaluate import json import random import math -import train +import Code.train import copy from tqdm.auto import tqdm from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig, BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer, AutoConfig @@ -117,7 +117,7 @@ class WordClassificationModel(torch.nn.Module): if self.tmix==True and mixepoch == True: outputs = (logits,) + outputs[2:] - loss = train.cross_entropy(logits[:math.floor((logits.size()[0]/2))], outputs[1][:math.floor((outputs[1].size()[0]/2))], lambda_value) #special CEL for soft labels + loss = Code.train.cross_entropy(logits[:math.floor((logits.size()[0]/2))], outputs[1][:math.floor((outputs[1].size()[0]/2))], lambda_value) #special CEL for soft labels outputs = (loss,) + outputs else: diff --git a/Code/preprocess.py b/Code/preprocess.py index 7c179cae581441849b2b9d185076a81412ff25c4..45c0fe24f7a4f0233664e8d106b83f2c6a42fa95 100644 --- a/Code/preprocess.py +++ b/Code/preprocess.py @@ -218,7 +218,7 @@ def tokenizer_new(tokenizer, input, max_length, masked=False, old_dataset=False, else: all_token_type_ids.append(context_token_type_ids) - + if tokenizer.name_or_path[0] =="r": print("roberta tokenizer") dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to("cuda") , torch.tensor(all_attention_masks, dtype=torch.long).to("cuda") , diff --git a/Code/train.py b/Code/train.py index a215b15503c7748fe1e450158e2b7f26ccea4e22..e45ea60e4eb4779e0c7fbbce3354b58b42d3fc29 100644 --- a/Code/train.py +++ b/Code/train.py @@ -1,13 +1,13 @@ import torch import tqdm import numpy as np -import evaluation +import Code.evaluation import evaluate import json import random import math from tqdm.auto import tqdm -from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig, BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer +from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig, BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer, Trainer, TrainingArguments from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset from transformers import AdamW, get_scheduler from torch import nn @@ -27,28 +27,29 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_ """Train loop for models. Iterates over epochs and batches and gives inputs to model. After training, call evaluation.py for evaluation of finetuned model. Params: - model: model out of models.py - name: str - train_dataset: Dataset - test_dataset: Dataset - seed: int - batch_size: - test_batch_size: - num_epochs: int - imdb: bool - mixup: bool - lambda_value: float - mixepoch:int - tmix: bool - mixlayer: int in {0, 11} - learning_rate: float - mlp_leaning_rate:float + + model: model out of models.py ->WordClassificationModel, BertForWordClassification or RobertaForWordClassification + name: str -> specifies architecture of model (either bert-base-uncased or roberta-base) + train_dataset: Dataset -> Train dataset as Torch.Dataset Object (created in preprocess.py) + test_dataset: Dataset ->Test dataset as Torch.Dataset Object (created in preprocess.py) + seed: int -> Random seed + batch_size: ->batch size for training + test_batch_size: -> batch size for testing + num_epochs: int -> number of epochs + imdb: bool ->whether or not imdb dataset is used + mixup: bool ->whether or not to use mixup in training + lambda_value: float ->if mixup or tmix selected, what lambda value to use + mixepoch:int -> specifies in what epoch to use mixup + tmix: bool ->whether or not tmix is used in training (used to differentiate between mixing in training and not mixing in evaluation) + mixlayer: int in {0, 11} ->what layer to mix in tmix + learning_rate: float ->learning rate for Bert/Roberta Model, or WordClassificationModel including linear classifier + mlp_leaning_rate:float ->separate learning rate for multi layer perceptron Returns: Evaluation Results for train and test dataset in Accuracy, F1, Precision and Recall""" model.train().to(device) train_sampler = RandomSampler(train_dataset) - train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size, shuffle=True) + train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size) num_training_steps=num_epochs*len(train_dataloader) if mlp_learning_rate==None: @@ -76,11 +77,15 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_ 'start_position': batch[3], 'end_position': batch[4], 'labels': batch[5]} + labels=batch[5] + start_positions=batch[3] + end_positions=batch[4] if imdb==True: inputs={'input_ids':batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3]} + if tmix==True: if imdb == False: print("this is mixup epoch") @@ -93,6 +98,9 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_ 'mixepoch': True, 'mixlayer':mixlayer, 'lambda_value':lambda_value} + labels=batch[5] + start_positions=batch[3] + end_positions=batch[4] if imdb==True: print("this is a mixup epoch with imdb") inputs={'input_ids':batch[0], @@ -123,7 +131,7 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_ model.zero_grad() if epoch==mixepoch: - #print("mixepoch") + print("mixepoch") if mixup == True: #calculate new last hidden states and predictions(logits) new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value) @@ -155,7 +163,7 @@ def train(model, name,train_dataset, test_dataset, seed, batch_size, test_batch_ return evaluation_test, evaluation_train -def cross_entropy(logits, target): +def cross_entropy(logits, target, l): """ Computes the cross-entropy loss between the predicted logits and the target labels. @@ -178,10 +186,10 @@ def cross_entropy(logits, target): if value == 1 or value == 0: #check if non-mixed label one_hot = torch.tensor([1-value,value], device='cuda:0') #creating one-hot vector e.g. [0. ,1.] loss_clear_labels = -((one_hot[0] * logprobs[0][0]) + (one_hot[1] * logprobs[0][1])) - #calculation with indexing (- 1-label * ) + #calculation with indexing results = torch.cat((loss_clear_labels.view(1), results), dim=0) else: - mixed_vec = torch.tensor([value, 1-value]) #creating on-hot mixed vec. + mixed_vec = torch.tensor([l, 1-l]) #creating on-hot mixed vec. logprobs = torch.nn.functional.log_softmax(lg, dim=1)#logits in log probabilities loss_mixed_labels = -((mixed_vec[0] * logprobs[0][0]) + (mixed_vec[1] * logprobs[0][1])) #calculation for mixed with indexing @@ -219,7 +227,7 @@ def mixup_function(batch_of_matrices, batch_of_labels, l): return results, result_labels -def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs): +def train_salami(model,name, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs): """Train loop of the salami group""" results=[] training_args = TrainingArguments( @@ -243,7 +251,7 @@ def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, train_dataset=train_set, eval_dataset=test_set, args=training_args, - compute_metrics=evaluation.evaluate_model + compute_metrics=Code.evaluation.compute_metrics ) trainer.train() diff --git a/main.py b/main.py index 8a33f4ff0965ece565502accf64130a78ee37176..fe8c434ab49eb37416c55ae23e872a1238166cfc 100644 --- a/main.py +++ b/main.py @@ -67,12 +67,12 @@ def run(raw_args): if args.train_loop=="swp": evaluation_test, evaluation_train = Code.train.train(model, args.architecture, train_dataset, test_dataset, args.random_seed,args.batch_size, args.test_batch_size,args.epochs,args.imdb, args.mix_up, args.lambda_value, args.mixepoch, args.tmix, args.mixlayer, args.learning_rate, args.second_learning_rate, args.model_save_path) elif args.train_loop=="salami": - evaluation_test = Code.train.train_salami(model,args.random_seed, train_dataset, test_dataset, args.batch_size, args.test_batch_size, args.learning_rate, args.epochs) + evaluation_test = Code.train.train_salami(model,args.architecture, args.random_seed, train_dataset, test_dataset, args.batch_size, args.test_batch_size, args.learning_rate, args.epochs) else: print("no eligible train loop selected") - #save - if isinstance(args.save_directory, str): + #save + if args.save_directory !=None: with open(args.save_directory, "x") as f: f.write(str(args)) f.write(str(evaluation_test)) @@ -208,8 +208,7 @@ if __name__ == "__main__": "-lambda", "--lambda_value", help="speficies the lambda value for mixup", - type=float, - default=0.4) + type=float) parser.add_argument( "-mixepoch", @@ -231,7 +230,8 @@ if __name__ == "__main__": parser.add_argument( "-sd", "--save_directory", - help="Destination directory for the output results of the run") + help="Destination directory for the output results of the run", + default=None) parser.add_argument( "-msp",