Skip to content
Snippets Groups Projects
train.py 7.41 KiB
Newer Older
kulcsar's avatar
kulcsar committed
import torch
import tqdm
import numpy as np
import evaluation
kulcsar's avatar
kulcsar committed
import evaluate
kulcsar's avatar
kulcsar committed
import json
import random
import math
from tqdm.auto import tqdm
from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig,  BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from transformers import AdamW, get_scheduler
from torch import nn
from torch.nn import CrossEntropyLoss
import matplotlib.pyplot as plt
import os
import pandas as pd
import sklearn

metric=evaluate.load("accuracy")
torch.cuda.empty_cache()

umlauf's avatar
umlauf committed
#with torch.autocast("cuda"):
umlauf's avatar
umlauf committed

kulcsar's avatar
kulcsar committed
def train(model, name, imdb, seed,mixup,lambda_value, mixepoch, tmix, mixlayer, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
	"""Train loop for models. Iterates over epochs and batches and gives inputs to model. After training, call evaluation.py for evaluation of finetuned model."""
umlauf's avatar
umlauf committed
	model.train().to("cuda")
	train_sampler = RandomSampler(train_dataset)
	train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
	num_training_steps=num_epochs*len(train_dataloader)
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	optimizer=AdamW(model.parameters(), lr=learning_rate, eps=1e-8, weight_decay=0.1)
	lr_scheduler=get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=10, num_training_steps=num_training_steps)
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	model.zero_grad()
	for epoch in range(num_epochs):
		index=0
kulcsar's avatar
kulcsar committed
		
umlauf's avatar
umlauf committed
		for batch in train_dataloader:
kulcsar's avatar
kulcsar committed
			print(len(batch))
kulcsar's avatar
kulcsar committed
			if name[0] == "b":
				if tmix==False:
					inputs = {'input_ids': batch[0],
							'attention_mask': batch[1],
							'token_type_ids': batch[2],
							'start_position': batch[3],
							'end_position': batch[4],
							'labels': batch[5]}
				if tmix==True:
					if imdb == False:
						print("this is mixup epoch")
						inputs={'input_ids': batch[0],
									'attention_mask': batch[1],
									'token_type_ids': batch[2],
									'start_position': batch[3],
									'end_position': batch[4],
									'labels': batch[5],
									'mixepoch': True,
									'mixlayer':mixlayer,
kulcsar's avatar
kulcsar committed
									'lambda_value':lambda_value}
					if imdb==True:
						print("this is a mixup epoch with imdb")
						inputs={'input_ids':batch[0],
								'attention_mask': batch[1],
								'token_type_ids': batch[2],
								'labels': batch[3],
								'mixepoch': True,
								'mixlayer': mixlayer,
								'lambda_value': lambda_value}
kulcsar's avatar
kulcsar committed
			if name[0] == "r":
umlauf's avatar
umlauf committed
				inputs = {'input_ids': batch[0],
						  'attention_mask': batch[1],
						  'start_position': batch[2],
						  'end_position': batch[3],
						  'labels': batch[4]}
				labels = batch[4]
				start_positions=batch[2]
				end_positions=batch[3]
			outputs=model(**inputs)
			loss=outputs[0]
			print("Loss: ", loss)
kulcsar's avatar
kulcsar committed
			loss.backward()
umlauf's avatar
umlauf committed
			optimizer.step()
			lr_scheduler.step()
			optimizer.zero_grad()
			model.zero_grad()
kulcsar's avatar
kulcsar committed

			if epoch==mixepoch:
kulcsar's avatar
kulcsar committed
				print("mixepoch")
umlauf's avatar
umlauf committed
				if mixup == True:
kulcsar's avatar
kulcsar committed
					#calculate new last hidden states and predictions(logits)
umlauf's avatar
umlauf committed
					new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
					new_matrix_batch.to("cuda")
					new_labels_batch.to("cuda")
					span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
					for i in range(new_matrix_batch.shape[0]):
						span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
kulcsar's avatar
kulcsar committed
					logits=model.classifier(span_output.detach())
umlauf's avatar
umlauf committed
					logits = logits.view(-1, 2).to("cuda")
umlauf's avatar
umlauf committed
					target = new_labels_batch.view(-1).to("cuda")
umlauf's avatar
umlauf committed
					loss_2 = cross_entropy(logits, target, lambda_value)
umlauf's avatar
umlauf committed
					#update entire model
					loss_2.backward()
					optimizer.step()
					lr_scheduler.step()
					optimizer.zero_grad()
					model.zero_grad()
umlauf's avatar
umlauf committed
	
friebolin's avatar
friebolin committed
	torch.save(model, "saved_models/bert_baseline.pth")
kulcsar's avatar
kulcsar committed
	#evaluate trained model
kulcsar's avatar
kulcsar committed
	evaluation_test = evaluation.evaluate_model(model, name,  test_dataset, test_batch_size, imdb)
	evaluation_train = evaluation.evaluate_model(model, name, train_dataset, test_batch_size, imdb)
umlauf's avatar
umlauf committed
 
kulcsar's avatar
kulcsar committed
	print("TEST: ", evaluation_test)
umlauf's avatar
umlauf committed
	print("TRAIN: ", evaluation_train)
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed
	return evaluation_test, evaluation_train
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed
def cross_entropy(logits, target, l):
umlauf's avatar
umlauf committed
	results = torch.tensor([], device='cuda')
umlauf's avatar
umlauf committed
	for i in range (logits.shape[0]):
umlauf's avatar
umlauf committed
		lg = logits[i:i+1,:] #comment to explain the process in this Code Line
umlauf's avatar
umlauf committed
		t = target[i]
umlauf's avatar
umlauf committed
		#makes the logits in log (base e) probabilities
umlauf's avatar
umlauf committed
		logprobs = torch.nn.functional.log_softmax(lg, dim=1)
umlauf's avatar
umlauf committed
		value = t.item() #gets Item (0. or 1.)
		if value == 1 or value == 0:
			one_hot = torch.tensor([1-value,value], device='cuda:0') #creating one-hot vector e.g. [0. ,1.]
umlauf's avatar
umlauf committed
			#class 1 and 2 mixed
umlauf's avatar
umlauf committed
			loss_clear_labels = -((one_hot[0] * logprobs[0][0]) + (one_hot[1] * logprobs[0][1]))
			results = torch.cat((loss_clear_labels.view(1), results), dim=0)
umlauf's avatar
umlauf committed
		else:
umlauf's avatar
umlauf committed
			value_r = round(value, 1) #to make it equal to lambda_value e.g. 0.4
umlauf's avatar
umlauf committed
			#Wert mit Flag
			mixed_vec = torch.tensor([value_r, 1-value_r])
			print("Mixed Vec: ", mixed_vec)
			logprobs = torch.nn.functional.log_softmax(lg, dim=1)
			print("Log:", logprobs)
        	#loss_mixed_labels = -torch.mul(mixed_vec, logprobs).sum()
			loss_mixed_labels = -((mixed_vec[0] * logprobs[0][0]) + (mixed_vec[1] * logprobs[0][1]))
			print("Loss Mixed Lables l: ", loss_mixed_labels)
			results = torch.cat((loss_mixed_labels.view(1), results), dim=0)
			print("Results Mixed 1: ", results)
umlauf's avatar
umlauf committed
	print("ALL BATCH Results: ", results)
umlauf's avatar
umlauf committed
	batch_loss = results.mean() #compute average
umlauf's avatar
umlauf committed
	#print("Batch Loss: ", batch_loss)
umlauf's avatar
umlauf committed
	return batch_loss
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed

kulcsar's avatar
kulcsar committed
def mixup_function(batch_of_matrices, batch_of_labels, l):
	"""Function to perform mixup on a batch of matrices and labels with a given lambda
	"""
kulcsar's avatar
kulcsar committed
	runs = math.floor(batch_of_matrices.size()[0]/2)
	counter=0
kulcsar's avatar
kulcsar committed
	results=[]
	result_labels=[]
kulcsar's avatar
kulcsar committed
	for i in range(runs):
kulcsar's avatar
kulcsar committed
		#get matrices and labels out of batch
kulcsar's avatar
kulcsar committed
		matrix1=batch_of_matrices[counter]
		label1=batch_of_labels[counter]
		matrix2=batch_of_matrices[counter+1]
		label2=batch_of_labels[counter+1]
kulcsar's avatar
kulcsar committed

		#do interpolation
		new_matrix=matrix1*l + (1-l)*matrix2
		new_label=l*label1 + (1-l)*label2
		
kulcsar's avatar
kulcsar committed
		if new_matrix != None:
			results.append(new_matrix)
			result_labels.append(new_label)
kulcsar's avatar
kulcsar committed
		counter+=2
kulcsar's avatar
kulcsar committed
	results=torch.stack(results)
kulcsar's avatar
kulcsar committed
	result_labels= torch.stack(result_labels) #torch.LongTensor(result_labels)
kulcsar's avatar
kulcsar committed
	return results, result_labels
kulcsar's avatar
kulcsar committed

kulcsar's avatar
kulcsar committed
	
kulcsar's avatar
kulcsar committed
def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs):
kulcsar's avatar
kulcsar committed
	"""Train loop of the salami group"""
kulcsar's avatar
kulcsar committed
	results=[]
	training_args = TrainingArguments(
		output_dir="./results",  # output directory
		num_train_epochs=epochs,  # total # of training epochs
		per_device_train_batch_size=batch_size,  # batch size per device during training
		per_device_eval_batch_size=test_batch_size,  # batch size for evaluation
		warmup_steps=10,  # number of warmup steps for learning rate scheduler
		weight_decay=0.1,  # strength of weight decay
		learning_rate=learning_rate,
		evaluation_strategy="no",  # evaluates never, per epoch, or every eval_steps
		eval_steps=10,
		logging_dir="./logs",  # directory for storing logs
		seed=seed,  # explicitly set seed
		save_strategy="no",  # do not save checkpoints
	)


	trainer=Trainer(
		model=model,
		train_dataset=train_set,
		eval_dataset=test_set,
		args=training_args,
kulcsar's avatar
kulcsar committed
		compute_metrics=evaluation.evaluate_model
kulcsar's avatar
kulcsar committed
		)

	trainer.train()
	test_set_results=trainer.evaluate()
	results.append(test_set_results)
	print(test_set_results)

	return results

kulcsar's avatar
kulcsar committed
import torch
import tqdm
import numpy as np