Skip to content
Snippets Groups Projects
train.py 8.39 KiB
Newer Older
kulcsar's avatar
kulcsar committed
import torch
import tqdm
import numpy as np
import evaluation
kulcsar's avatar
kulcsar committed
import evaluate
kulcsar's avatar
kulcsar committed
import json
import random
import math
from tqdm.auto import tqdm
from transformers import BertTokenizer, RobertaTokenizer, BertModel, RobertaModel, RobertaPreTrainedModel, RobertaConfig,  BertConfig, BertPreTrainedModel, PreTrainedModel, AutoModel, AutoTokenizer
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from transformers import AdamW, get_scheduler
from torch import nn
from torch.nn import CrossEntropyLoss
import matplotlib.pyplot as plt
import os
import pandas as pd
import sklearn

metric=evaluate.load("accuracy")
torch.cuda.empty_cache()

umlauf's avatar
umlauf committed
#with torch.autocast("cuda"):
umlauf's avatar
umlauf committed

def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda_value, mixup_epoch, tmix, mixlayer, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
umlauf's avatar
umlauf committed
	"""Write Train loop for model with certain train dataset"""
	#set_seed(seed)
	#if model_name[0] == "b":
	#	model=BertForWordClassification.from_pretrained(model_name).to("cuda")
	#elif model_name[0] == "r":
	#	model=RobertaForWordClassification.from_pretrained(model_name),to("cuda")
	print("batch size: ", batch_size)
	print("test batch size: ", test_batch_size)
	print("mix up: ", mixup)
	model.train().to("cuda")
	train_sampler = RandomSampler(train_dataset)
	train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
	num_training_steps=num_epochs*len(train_dataloader)
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	optimizer=AdamW(model.parameters(), lr=learning_rate, eps=1e-8, weight_decay=0.1)
	lr_scheduler=get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=10, num_training_steps=num_training_steps)
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	model.zero_grad()
	#progress_bar=tqdm(range(num_training_steps))
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
	for epoch in range(num_epochs):
		#for param_tensor in model.state_dict():
		#	print(param_tensor, "\t", model.state_dict()[param_tensor])
umlauf's avatar
umlauf committed
		print("Epoche: ", epoch)
umlauf's avatar
umlauf committed
		index=0
		for batch in train_dataloader:
			if name[0] == "b":
				inputs = {'input_ids': batch[0],
						  'attention_mask': batch[1],
						  'token_type_ids': batch[2],
						  'start_position': batch[3],
						  'end_position': batch[4],
						  'labels': batch[5]}
				labels=batch[5]
				start_positions=batch[3]
				end_positions=batch[4]
			if name[0] == "r":
				inputs = {'input_ids': batch[0],
						  'attention_mask': batch[1],
						  'start_position': batch[2],
						  'end_position': batch[3],
						  'labels': batch[4]}
				labels = batch[4]
				start_positions=batch[2]
				end_positions=batch[3]
			outputs=model(**inputs)
			#print("outputs: ", outputs)
			#print("outputs 0: ", outputs[0])
			loss=outputs[0]
			#print("length of outputs; ", len(outputs))
			#for i in range(len(outputs)):
			print("Loss: ", loss)
			if mixup == True: #and epoch>=mixup_epoch-1:
				#loss.backward(retain_graph=True)
				loss.backward()
				print("epoch: {0}, retained".format(epoch))
			else:
				loss.backward()
			#if (index+1)%gradient_accumulation_steps==0:
			optimizer.step()
			lr_scheduler.step()
			optimizer.zero_grad()
			model.zero_grad()
			#	#	print("outputs {0}: {1}".format(i, outputs[i].size()))
			if epoch==mixup_epoch:
				if mixup == True:
					#print("length of outputs: ", len(outputs))
					new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
               		#for matrix in new_matrix_batch
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
					new_matrix_batch.to("cuda")
					new_labels_batch.to("cuda")
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
					span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
					for i in range(new_matrix_batch.shape[0]):
						span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
					#print("span output size: ", span_output.size())
					#print("span output: ", span_output)
					logits=model.classifier(span_output.detach()) #target_value?
						
umlauf's avatar
umlauf committed
					#print("logits: ", logits)
umlauf's avatar
ce  
umlauf committed
					print("logits shape: ", list(logits.shape))
umlauf's avatar
umlauf committed
					# print("Newlabels: ", new_labels_batch)
umlauf's avatar
ce  
umlauf committed
					print("labels shape: ", list(new_labels_batch.shape))
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
					logits = logits.view(-1, 2).to("cuda")
umlauf's avatar
umlauf committed
					print("logits: ", logits)
umlauf's avatar
umlauf committed
					target = new_labels_batch.view(-1).to("cuda")
umlauf's avatar
umlauf committed
					print("Newlabels: ", new_labels_batch)
umlauf's avatar
umlauf committed
					loss_2 = cross_entropy(logits, target)
umlauf's avatar
umlauf committed
					#loss_2 = SoftCrossEntropyLoss(logits.view(-1, 2).to("cuda"), new_labels_batch.view(-1).to("cuda"))
umlauf's avatar
umlauf committed
					#loss_2 = torch.nn.functional.cross_entropy(preds, target.long())
umlauf's avatar
umlauf committed
					print("MixUp Loss: ", loss_2)
					#update entire model
					loss_2.backward()
					optimizer.step()
					lr_scheduler.step()
					optimizer.zero_grad()
					model.zero_grad()
			
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
				#print(outputs[2].size())
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed
			#print(outputs[0].size())
			#progress_bar.update(1)
		#print("one epoch done")
umlauf's avatar
umlauf committed
	
umlauf's avatar
umlauf committed
	#print(model_name)
	evaluation_test = evaluation.evaluate_model(model, name,  test_dataset, learning_rate, test_batch_size)
	evaluation_train = evaluation.evaluate_model(model, name, train_dataset, learning_rate, test_batch_size)
 
	print("DEV: ", evaluation_test)
	print("TRAIN: ", evaluation_train)
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed
	return evaluation_test, evaluation_train
kulcsar's avatar
kulcsar committed

umlauf's avatar
umlauf committed
#log base e
umlauf's avatar
ce  
umlauf committed
#Fkt vom Meeting
def cross_entropy(logits, target):
umlauf's avatar
umlauf committed
	logprobs = torch.nn.functional.log_softmax(logits, dim = 1)
umlauf's avatar
umlauf committed
	value = target.item()
umlauf's avatar
umlauf committed
	if value == 1 or value == 0:
umlauf's avatar
umlauf committed
		value = target.item()
umlauf's avatar
umlauf committed
		one_hot = torch.tensor([1-value,value], device='cuda:0')
		loss_clear_labels = one_hot * logprobs
		return loss_clear_labels
	else:
		print("Mixed Case")
umlauf's avatar
umlauf committed

umlauf's avatar
ce  
umlauf committed
#kann weg
umlauf's avatar
umlauf committed
# def cross_entropy(logits, target):
#     log_q = torch.log_softmax(logits, dim=1)
#     return -torch.sum(log_q[range(log_q.shape[0]), target])

umlauf's avatar
umlauf committed
# def cross_entropy(logits, target):
#     # Calculate log_q
#     log_q = torch.log_softmax(logits, dim=1)
# 	#define classes/options
#     target_class = (target == 0).float()
#     target_class2 = (target == 1).float()
#     target = target.float()
# 	# calculate sum of losses of batch size
#     return -(target_class * log_q[:, 0] + target_class2 * log_q[:, 1] + (1 - target_class - target_class2) * (target * log_q[:, 1] + (1 - target) * log_q[:, 0]))
umlauf's avatar
umlauf committed

umlauf's avatar
umlauf committed

kulcsar's avatar
kulcsar committed
def mixup_function(batch_of_matrices, batch_of_labels, l, t):
kulcsar's avatar
kulcsar committed
	runs = math.floor(batch_of_matrices.size()[0]/2)
	counter=0
kulcsar's avatar
kulcsar committed
	results=[]
	result_labels=[]
kulcsar's avatar
kulcsar committed
	for i in range(runs):
kulcsar's avatar
kulcsar committed
		print("doing interpolation with lambda: {0} and threshold: {1}...".format(l, t))
kulcsar's avatar
kulcsar committed
		matrix1=batch_of_matrices[counter]
		label1=batch_of_labels[counter]
		matrix2=batch_of_matrices[counter+1]
		label2=batch_of_labels[counter+1]
kulcsar's avatar
kulcsar committed
		new_matrix, new_label=interpolate(matrix1, label1, matrix2, label2, l, t)
kulcsar's avatar
kulcsar committed
		if new_matrix != None:
			results.append(new_matrix)
			result_labels.append(new_label)
kulcsar's avatar
kulcsar committed
		counter+=2
kulcsar's avatar
kulcsar committed
	results=torch.stack(results)
kulcsar's avatar
kulcsar committed
	result_labels= torch.stack(result_labels) #torch.LongTensor(result_labels)
kulcsar's avatar
kulcsar committed
	#print("mixup done")
kulcsar's avatar
kulcsar committed
	return results, result_labels
kulcsar's avatar
kulcsar committed

def interpolate(matrix1, label1, matrix2, label2, l, threshold):
	new_matrix=(matrix1*l)+(matrix2 * (1-l))
	new_label=(label1*l)+(label2*(1-l))

kulcsar's avatar
kulcsar committed
	#if new_label > 0.5+threshold:
	#	new_label=1
	#elif new_label < 0.5-threshold:
	#	new_label=0
	#else:
	#	print("in undefinded zone")
	#	return None, None
	return new_matrix, new_label#torch.tensor([new_label])
kulcsar's avatar
kulcsar committed
	
kulcsar's avatar
kulcsar committed
def train_salami(model, seed, train_set, test_set, batch_size, test_batch_size, learning_rate, epochs):
kulcsar's avatar
kulcsar committed
	results=[]
	#for num_run, seed in enumerate(random.sample(range(1, 100), num_runs)):
		#if model_name[0]=="b":
		#	model=BertForWordClassification.from_pretrained(model_name)
		#else:
		#	model=RobertaForWordClassification.from_pretrained(model_name)

		#set_seed(seed)
	training_args = TrainingArguments(
		output_dir="./results",  # output directory
		num_train_epochs=epochs,  # total # of training epochs
		per_device_train_batch_size=batch_size,  # batch size per device during training
		per_device_eval_batch_size=test_batch_size,  # batch size for evaluation
		warmup_steps=10,  # number of warmup steps for learning rate scheduler
		weight_decay=0.1,  # strength of weight decay
		learning_rate=learning_rate,
		evaluation_strategy="no",  # evaluates never, per epoch, or every eval_steps
		eval_steps=10,
		logging_dir="./logs",  # directory for storing logs
		seed=seed,  # explicitly set seed
		save_strategy="no",  # do not save checkpoints
	)


	trainer=Trainer(
		model=model,
		train_dataset=train_set,
		eval_dataset=test_set,
		args=training_args,
kulcsar's avatar
kulcsar committed
		compute_metrics=evaluation.evaluate_model
kulcsar's avatar
kulcsar committed
		)

	trainer.train()
	test_set_results=trainer.evaluate()
	results.append(test_set_results)
	print(test_set_results)

	return results

kulcsar's avatar
kulcsar committed
import torch
import tqdm
import numpy as np