Skip to content
Snippets Groups Projects
Commit bce35e10 authored by umlauf's avatar umlauf
Browse files

new try

parent 091e7e4d
No related branches found
No related tags found
No related merge requests found
......@@ -20,121 +20,121 @@ import sklearn
metric=evaluate.load("accuracy")
torch.cuda.empty_cache()
with torch.autocast('cuda'):
def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda_value,mixup_epoch, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
"""Write Train loop for model with certain train dataset"""
#set_seed(seed)
#if model_name[0] == "b":
# model=BertForWordClassification.from_pretrained(model_name).to("cuda")
#elif model_name[0] == "r":
# model=RobertaForWordClassification.from_pretrained(model_name),to("cuda")
print("batch size: ", batch_size)
print("test batch size: ", test_batch_size)
print("mix up: ", mixup)
model.train().to("cuda")
train_sampler = RandomSampler(train_dataset)
train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
num_training_steps=num_epochs*len(train_dataloader)
optimizer=AdamW(model.parameters(), lr=learning_rate, eps=1e-8, weight_decay=0.1)
lr_scheduler=get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=10, num_training_steps=num_training_steps)
model.zero_grad()
#progress_bar=tqdm(range(num_training_steps))
for epoch in range(num_epochs):
#for param_tensor in model.state_dict():
# print(param_tensor, "\t", model.state_dict()[param_tensor])
index=0
for batch in train_dataloader:
if name[0] == "b":
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
'start_position': batch[3],
'end_position': batch[4],
'labels': batch[5]}
labels=batch[5]
start_positions=batch[3]
end_positions=batch[4]
if name[0] == "r":
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'start_position': batch[2],
'end_position': batch[3],
'labels': batch[4]}
labels = batch[4]
start_positions=batch[2]
end_positions=batch[3]
outputs=model(**inputs)
#print("outputs: ", outputs)
#print("outputs 0: ", outputs[0])
loss=outputs[0]
#print("length of outputs; ", len(outputs))
#for i in range(len(outputs)):
print("Loss: ", loss)
if mixup == True: #and epoch>=mixup_epoch-1:
#loss.backward(retain_graph=True)
loss.backward()
print("epoch: {0}, retained".format(epoch))
else:
loss.backward()
#if (index+1)%gradient_accumulation_steps==0:
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.zero_grad()
# # print("outputs {0}: {1}".format(i, outputs[i].size()))
if epoch==mixup_epoch:
if mixup == True:
#print("length of outputs: ", len(outputs))
new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
#for matrix in new_matrix_batch
new_matrix_batch.to("cuda")
new_labels_batch.to("cuda")
#add mira 1 line
#new_labels_batch = new_labels_batch.to(torch.float64)
span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
for i in range(new_matrix_batch.shape[0]):
span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
#print("span output size: ", span_output.size())
#print("span output: ", span_output)
logits=model.classifier(span_output.detach()) #target_value?
print("logits: ", logits) #print("logits: ", logits.size())
print("labels size: ", new_labels_batch.size())
loss_fct = CrossEntropyLoss()
#add mira 1 line
#new_labels_batch = new_labels_batch.to(torch.float64)
loss_2=loss_fct(logits.view(-1, 2).to("cuda"), new_labels_batch.view(-1).to("cuda"))
#.to(torch.float32)
print("MixUp Loss: ", loss_2)
#update entire model
loss_2.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.zero_grad()
#print(outputs[2].size())
#print(outputs[0].size())
#progress_bar.update(1)
#print("one epoch done")
#print(model_name)
evaluation_test = evaluation.evaluate_model(model, name, test_dataset, learning_rate, test_batch_size)
evaluation_train = evaluation.evaluate_model(model, name, train_dataset, learning_rate, test_batch_size)
print("DEV: ", evaluation_test)
print("TRAIN: ", evaluation_train)
return evaluation_test, evaluation_train
#with torch.autocast("cuda"):
def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda_value,mixup_epoch, train_dataset, test_dataset, num_epochs, learning_rate, batch_size, test_batch_size):
"""Write Train loop for model with certain train dataset"""
#set_seed(seed)
#if model_name[0] == "b":
# model=BertForWordClassification.from_pretrained(model_name).to("cuda")
#elif model_name[0] == "r":
# model=RobertaForWordClassification.from_pretrained(model_name),to("cuda")
print("batch size: ", batch_size)
print("test batch size: ", test_batch_size)
print("mix up: ", mixup)
model.train().to("cuda")
train_sampler = RandomSampler(train_dataset)
train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
num_training_steps=num_epochs*len(train_dataloader)
optimizer=AdamW(model.parameters(), lr=learning_rate, eps=1e-8, weight_decay=0.1)
lr_scheduler=get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=10, num_training_steps=num_training_steps)
model.zero_grad()
#progress_bar=tqdm(range(num_training_steps))
for epoch in range(num_epochs):
#for param_tensor in model.state_dict():
# print(param_tensor, "\t", model.state_dict()[param_tensor])
index=0
for batch in train_dataloader:
if name[0] == "b":
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
'start_position': batch[3],
'end_position': batch[4],
'labels': batch[5]}
labels=batch[5]
start_positions=batch[3]
end_positions=batch[4]
if name[0] == "r":
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'start_position': batch[2],
'end_position': batch[3],
'labels': batch[4]}
labels = batch[4]
start_positions=batch[2]
end_positions=batch[3]
outputs=model(**inputs)
#print("outputs: ", outputs)
#print("outputs 0: ", outputs[0])
loss=outputs[0]
#print("length of outputs; ", len(outputs))
#for i in range(len(outputs)):
print("Loss: ", loss)
if mixup == True: #and epoch>=mixup_epoch-1:
#loss.backward(retain_graph=True)
loss.backward()
print("epoch: {0}, retained".format(epoch))
else:
loss.backward()
#if (index+1)%gradient_accumulation_steps==0:
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.zero_grad()
# # print("outputs {0}: {1}".format(i, outputs[i].size()))
if epoch==mixup_epoch:
if mixup == True:
#print("length of outputs: ", len(outputs))
new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold)
#for matrix in new_matrix_batch
#add mira 1 line
new_labels_batch.to(torch.float32)
new_matrix_batch.to("cuda")
new_labels_batch.to("cuda")
span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda")
for i in range(new_matrix_batch.shape[0]):
span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0)
#print("span output size: ", span_output.size())
#print("span output: ", span_output)
logits=model.classifier(span_output.detach()) #target_value?
print("logits: ", logits) #print("logits: ", logits.size())
print("labels size: ", new_labels_batch.size())
loss_fct = CrossEntropyLoss()
#add mira 1 line
#new_labels_batch = new_labels_batch.to(torch.float64)
loss_2=loss_fct(logits.view(-1, 2).to("cuda"), new_labels_batch.view(-1).to("cuda"))
#.to(torch.float32)
print("MixUp Loss: ", loss_2)
#update entire model
loss_2.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.zero_grad()
#print(outputs[2].size())
#print(outputs[0].size())
#progress_bar.update(1)
#print("one epoch done")
#print(model_name)
evaluation_test = evaluation.evaluate_model(model, name, test_dataset, learning_rate, test_batch_size)
evaluation_train = evaluation.evaluate_model(model, name, train_dataset, learning_rate, test_batch_size)
print("DEV: ", evaluation_test)
print("TRAIN: ", evaluation_train)
return evaluation_test, evaluation_train
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment