Skip to content
Snippets Groups Projects
train.py 10.8 KiB
Newer Older
kulcsar's avatar
kulcsar committed
import torch
import tqdm
import numpy as np
import evaluation
kulcsar's avatar
kulcsar committed
import evaluate
kulcsar's avatar
kulcsar committed
import json
import random
import math
from tqdm.auto import tqdm
from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig,  BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from transformers import AdamW, get_scheduler
from torch import nn
from torch.nn import CrossEntropyLoss
import matplotlib.pyplot as plt
import os
import pandas as pd
import sklearn

metric=evaluate.load("accuracy")
torch.cuda.empty_cache()

umlauf's avatar
umlauf committed
#with torch.autocast("cuda"):
umlauf's avatar
umlauf committed

kulcsar's avatar
kulcsar committed
def train(model, name, imdb, seed,gradient_accumulation_steps,mixup, threshold, lambda_value, mixepoch, tmix, mixlayer, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
umlauf's avatar
umlauf committed
	"""Write Train loop for model with certain train dataset"""
	#set_seed(seed)
	#if model_name[0] == "b":
	#	model=BertForWordClassification.from_pretrained(model_name).to("cuda")
	#elif model_name[0] == "r":
	#	model=RobertaForWordClassification.from_pretrained(model_name),to("cuda")
	print("batch size: ", batch_size)
	print("test batch size: ", test_batch_size)
	print("mix up: ", mixup)
	model.train().to("cuda")
	train_sampler = RandomSampler(train_dataset)
	train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
	num_training_steps=num_epochs*len(train_dataloader)
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	optimizer=AdamW(model.parameters(), lr=learning_rate, eps=1e-8, weight_decay=0.1)
	lr_scheduler=get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=10, num_training_steps=num_training_steps)
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	model.zero_grad()
	#progress_bar=tqdm(range(num_training_steps))
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	for epoch in range(num_epochs):
		#for param_tensor in model.state_dict():
		#	print(param_tensor, "\t", model.state_dict()[param_tensor])
umlauf's avatar
umlauf committed
		print("Epoche: ", epoch)
umlauf's avatar
umlauf committed
		index=0
kulcsar's avatar
kulcsar committed
		
umlauf's avatar
umlauf committed
		for batch in train_dataloader:
kulcsar's avatar
kulcsar committed
			print(len(batch))
kulcsar's avatar
kulcsar committed
			if name[0] == "b":
				if tmix==False:
					inputs = {'input_ids': batch[0],
							'attention_mask': batch[1],
							'token_type_ids': batch[2],
							'start_position': batch[3],
							'end_position': batch[4],
							'labels': batch[5]}
					labels=batch[5]
					start_positions=batch[3]
					end_positions=batch[4]
				if tmix==True:
kulcsar's avatar
kulcsar committed
					#print("Hello, tmix is set as true")
kulcsar's avatar
kulcsar committed
					if epoch == mixepoch:
kulcsar's avatar
kulcsar committed
						if imdb == False:
							print("this is miuxup epoch")
							#print(batch[5])
							#print("mixlayer: ", mixlayer)
							#print("lambda: ", lambda_value)
kulcsar's avatar
kulcsar committed
                        
kulcsar's avatar
kulcsar committed
							inputs={'input_ids': batch[0],
										'attention_mask': batch[1],
										'token_type_ids': batch[2],
										'start_position': batch[3],
										'end_position': batch[4],
										'labels': batch[5],
										'mixepoch': True,
										'mixlayer':mixlayer,
										'lambda_value':lambda_value}
						if imdb==True:
kulcsar's avatar
kulcsar committed
							print("this is a mixup epoch with imdb")
							inputs={'input_ids':batch[0],
									'attention_mask': batch[1],
									'token_type_ids': batch[2],
									'labels': batch[3],
									'mixepoch': True,
									'mixlayer': mixlayer,
									'lambda_value': lambda_value}
							
kulcsar's avatar
kulcsar committed
					else:
						if imdb == False:
							print("this is a non mixup epoch")
							#print(batch[5])
							inputs={'input_ids': batch[0],
										'attention_mask': batch[1],
										'token_type_ids': batch[2],
										'start_position': batch[3],
										'end_position': batch[4],
										'labels': batch[5],
										'mixepoch': False,
										'mixlayer':mixlayer,
										'lambda_value':lambda_value}
						elif imdb == True:
							inputs={'input_ids': batch[0],
kulcsar's avatar
kulcsar committed
									'attention_mask': batch[1],
									'token_type_ids': batch[2],
									'labels': batch[3],
kulcsar's avatar
kulcsar committed
									'mixepoch': False,
									'mixlayer': mixlayer,
kulcsar's avatar
kulcsar committed
									'lambda_value':lambda_value}
kulcsar's avatar
kulcsar committed
			if name[0] == "r":
umlauf's avatar
umlauf committed
				inputs = {'input_ids': batch[0],
						  'attention_mask': batch[1],
						  'start_position': batch[2],
						  'end_position': batch[3],
						  'labels': batch[4]}
				labels = batch[4]
				start_positions=batch[2]
				end_positions=batch[3]
			outputs=model(**inputs)
			#print("outputs: ", outputs)
			#print("outputs 0: ", outputs[0])
			loss=outputs[0]
			#print("length of outputs; ", len(outputs))
			#for i in range(len(outputs)):
			print("Loss: ", loss)
			if mixup == True: #and epoch>=mixup_epoch-1:
				#loss.backward(retain_graph=True)
				loss.backward()
				print("epoch: {0}, retained".format(epoch))
			else:
				loss.backward()
			#if (index+1)%gradient_accumulation_steps==0:
			optimizer.step()
			lr_scheduler.step()
			optimizer.zero_grad()
			model.zero_grad()
			#	#	print("outputs {0}: {1}".format(i, outputs[i].size()))
kulcsar's avatar
kulcsar committed
			if epoch==mixepoch: #also make choosing epoch for tmix available
kulcsar's avatar
kulcsar committed
				print("mixepoch")
umlauf's avatar
umlauf committed
				if mixup == True:
kulcsar's avatar
kulcsar committed
					print("length of outputs: ", len(outputs))
					print("outputs: ", outputs)
umlauf's avatar
umlauf committed
					new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
kulcsar's avatar
kulcsar committed
               		#for matrix in new_matrix_batchi
					print("new matrix batch size: ", new_matrix_batch.size())
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
					new_matrix_batch.to("cuda")
					new_labels_batch.to("cuda")
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
					span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
					for i in range(new_matrix_batch.shape[0]):
						span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
					#print("span output size: ", span_output.size())
					#print("span output: ", span_output)
					logits=model.classifier(span_output.detach()) #target_value?
						
umlauf's avatar
umlauf committed
					#print("logits: ", logits)
umlauf's avatar
ce  
umlauf committed
					print("logits shape: ", list(logits.shape))
umlauf's avatar
umlauf committed
					# print("Newlabels: ", new_labels_batch)
umlauf's avatar
ce  
umlauf committed
					print("labels shape: ", list(new_labels_batch.shape))
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
					logits = logits.view(-1, 2).to("cuda")
umlauf's avatar
umlauf committed
					print("logits: ", logits)
umlauf's avatar
umlauf committed
					target = new_labels_batch.view(-1).to("cuda")
umlauf's avatar
umlauf committed
					print("Newlabels: ", new_labels_batch)
umlauf's avatar
umlauf committed
					loss_2 = cross_entropy(logits, target, lambda_value)
umlauf's avatar
umlauf committed
					#loss_2 = SoftCrossEntropyLoss(logits.view(-1, 2).to("cuda"), new_labels_batch.view(-1).to("cuda"))
umlauf's avatar
umlauf committed
					#loss_2 = torch.nn.functional.cross_entropy(preds, target.long())
umlauf's avatar
umlauf committed
					print("MixUp Loss: ", loss_2)
					#update entire model
					loss_2.backward()
					optimizer.step()
					lr_scheduler.step()
					optimizer.zero_grad()
					model.zero_grad()
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
				#print(outputs[2].size())
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
			#print(outputs[0].size())
			#progress_bar.update(1)
		#print("one epoch done")
umlauf's avatar
umlauf committed
	
umlauf's avatar
umlauf committed
	#print(model_name)
	evaluation_test = evaluation.evaluate_model(model, name,  test_dataset, learning_rate, test_batch_size)
	evaluation_train = evaluation.evaluate_model(model, name, train_dataset, learning_rate, test_batch_size)
 
	print("DEV: ", evaluation_test)
	print("TRAIN: ", evaluation_train)
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed
	return evaluation_test, evaluation_train
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed
#BINARY
umlauf's avatar
umlauf committed
def cross_entropy(logits, target, l):
umlauf's avatar
umlauf committed
	results = torch.tensor([], device='cuda')
umlauf's avatar
umlauf committed
	for i in range (logits.shape[0]):
umlauf's avatar
umlauf committed
		lg = logits[i:i+1,:] #comment to explain the process in this Code Line
umlauf's avatar
umlauf committed
		t = target[i]
umlauf's avatar
umlauf committed
		#makes the logits in log (base e) probabilities
umlauf's avatar
umlauf committed
		logprobs = torch.nn.functional.log_softmax(lg, dim=1)
umlauf's avatar
umlauf committed
		value = t.item() #gets Item (0. or 1.)
		if value == 1 or value == 0:
			one_hot = torch.tensor([1-value,value], device='cuda:0') #creating one-hot vector e.g. [0. ,1.]
umlauf's avatar
umlauf committed
			#class 1 and 2 mixed
umlauf's avatar
umlauf committed
			loss_clear_labels = -((one_hot[0] * logprobs[0][0]) + (one_hot[1] * logprobs[0][1]))
umlauf's avatar
umlauf committed
			#print("Clear Labels: ", loss_clear_labels)
umlauf's avatar
umlauf committed
			results = torch.cat((loss_clear_labels.view(1), results), dim=0)
umlauf's avatar
umlauf committed
			#print("Results Clear Label: ", results)
umlauf's avatar
umlauf committed
		else:
umlauf's avatar
umlauf committed
			value_r = round(value, 1) #to make it equal to lambda_value e.g. 0.4
umlauf's avatar
umlauf committed
			#Wert mit Flag
			mixed_vec = torch.tensor([value_r, 1-value_r])
			print("Mixed Vec: ", mixed_vec)
			logprobs = torch.nn.functional.log_softmax(lg, dim=1)
			print("Log:", logprobs)
        	#loss_mixed_labels = -torch.mul(mixed_vec, logprobs).sum()
			loss_mixed_labels = -((mixed_vec[0] * logprobs[0][0]) + (mixed_vec[1] * logprobs[0][1]))
			print("Loss Mixed Lables l: ", loss_mixed_labels)
			results = torch.cat((loss_mixed_labels.view(1), results), dim=0)
			print("Results Mixed 1: ", results)
umlauf's avatar
umlauf committed
	print("ALL BATCH Results: ", results)
umlauf's avatar
umlauf committed
	batch_loss = results.mean() #compute average
umlauf's avatar
umlauf committed
	#print("Batch Loss: ", batch_loss)
umlauf's avatar
umlauf committed
	return batch_loss
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
#noch in eine if schleife (beide mixed labels)
#evt. nicht immer torch.cat aufrufen

#Matrix -> 2 dim
#pro Zeile Label wenn gleich -> Standard instanz
# wenn ungleich mixed
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed

kulcsar's avatar
kulcsar committed
def mixup_function(batch_of_matrices, batch_of_labels, l, t):
kulcsar's avatar
kulcsar committed
	runs = math.floor(batch_of_matrices.size()[0]/2)
	counter=0
kulcsar's avatar
kulcsar committed
	results=[]
	result_labels=[]
kulcsar's avatar
kulcsar committed
	for i in range(runs):
kulcsar's avatar
kulcsar committed
		print("doing interpolation with lambda: {0} and threshold: {1}...".format(l, t))
kulcsar's avatar
kulcsar committed
		matrix1=batch_of_matrices[counter]
		label1=batch_of_labels[counter]
		matrix2=batch_of_matrices[counter+1]
		label2=batch_of_labels[counter+1]
kulcsar's avatar
kulcsar committed
		new_matrix, new_label=interpolate(matrix1, label1, matrix2, label2, l, t)
kulcsar's avatar
kulcsar committed
		if new_matrix != None:
			results.append(new_matrix)
			result_labels.append(new_label)
kulcsar's avatar
kulcsar committed
		counter+=2
kulcsar's avatar
kulcsar committed
	results=torch.stack(results)
kulcsar's avatar
kulcsar committed
	result_labels= torch.stack(result_labels) #torch.LongTensor(result_labels)
kulcsar's avatar
kulcsar committed
	#print("mixup done")
kulcsar's avatar
kulcsar committed
	return results, result_labels
kulcsar's avatar
kulcsar committed

def interpolate(matrix1, label1, matrix2, label2, l, threshold):
	new_matrix=(matrix1*l)+(matrix2 * (1-l))
	new_label=(label1*l)+(label2*(1-l))

kulcsar's avatar
kulcsar committed
	#if new_label > 0.5+threshold:
	#	new_label=1
	#elif new_label < 0.5-threshold:
	#	new_label=0
	#else:
	#	print("in undefinded zone")
	#	return None, None
	return new_matrix, new_label#torch.tensor([new_label])
kulcsar's avatar
kulcsar committed
	
kulcsar's avatar
kulcsar committed
def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs):
kulcsar's avatar
kulcsar committed
	results=[]
	#for num_run, seed in enumerate(random.sample(range(1, 100), num_runs)):
		#if model_name[0]=="b":
		#	model=BertForWordClassification.from_pretrained(model_name)
		#else:
		#	model=RobertaForWordClassification.from_pretrained(model_name)

		#set_seed(seed)
	training_args = TrainingArguments(
		output_dir="./results",  # output directory
		num_train_epochs=epochs,  # total # of training epochs
		per_device_train_batch_size=batch_size,  # batch size per device during training
		per_device_eval_batch_size=test_batch_size,  # batch size for evaluation
		warmup_steps=10,  # number of warmup steps for learning rate scheduler
		weight_decay=0.1,  # strength of weight decay
		learning_rate=learning_rate,
		evaluation_strategy="no",  # evaluates never, per epoch, or every eval_steps
		eval_steps=10,
		logging_dir="./logs",  # directory for storing logs
		seed=seed,  # explicitly set seed
		save_strategy="no",  # do not save checkpoints
	)


	trainer=Trainer(
		model=model,
		train_dataset=train_set,
		eval_dataset=test_set,
		args=training_args,
kulcsar's avatar
kulcsar committed
		compute_metrics=evaluation.evaluate_model
kulcsar's avatar
kulcsar committed
		)

	trainer.train()
	test_set_results=trainer.evaluate()
	results.append(test_set_results)
	print(test_set_results)

	return results

kulcsar's avatar
kulcsar committed
import torch
import tqdm
import numpy as np