diff --git a/Code/inference.py b/Code/inference.py index 2deb942e79d771af84ce76dec32c9488dd81b148..7689fdd09eae18d5ffa670b8d0fec3bb5971ff3a 100644 --- a/Code/inference.py +++ b/Code/inference.py @@ -9,6 +9,9 @@ import re import train from torch.utils.data import DataLoader, RandomSampler +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # Get user input print("Enter a sentence and enclose the target word(s) between asteriks (e.g. \"I love *New York*\"): ") sentence = input() @@ -56,14 +59,13 @@ label = int(input()) # Convert to data sample for BERT data_sample = [{"sentence": sentence, "pos": pos, "label": label}] -print(data_sample) +#print(data_sample) tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased") input_as_dataset=preprocess.tokenizer_new(tokenizer, data_sample, max_length=512) # Load model -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model=models.WordClassificationModel.from_pretrained("bert-base-uncased") diff --git a/Code/models.py b/Code/models.py index adac00aa2d6357f572ab176629a31051da5a7802..530b9f7b00dffdbec44ac8bb29ce3640989d8eac 100644 --- a/Code/models.py +++ b/Code/models.py @@ -22,6 +22,7 @@ import pandas as pd import sklearn from typing import List, Optional, Tuple, Union +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') metric=evaluate.load("accuracy") torch.cuda.empty_cache() @@ -52,9 +53,9 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib #self.mixlayer=mixlayer if tmix: print("initializing BertModelTMix") - self.embedding_model=BertModelTMix(config=AutoConfig.from_pretrained(config_name)).to("cuda") + self.embedding_model=BertModelTMix(config=AutoConfig.from_pretrained(config_name)).to(device) else: - self.embedding_model=AutoModel.from_pretrained(config_name, config=AutoConfig.from_pretrained(config_name)).to("cuda") + self.embedding_model=AutoModel.from_pretrained(config_name, config=AutoConfig.from_pretrained(config_name)).to(device) self.dropout=nn.Dropout(0.1) @@ -84,7 +85,7 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib return_dict=False, output_hidden_states=False) - output = outputs[0].to("cuda") + output = outputs[0].to(device) output = self.dropout(output) if self.imdb==False: @@ -207,10 +208,10 @@ class BertModelTMix(BertPreTrainedModel): super().__init__(config) self.config = config - self.embeddings = BertEmbeddings(config).to("cuda") - self.encoder = BertTMixEncoder(config).to("cuda") + self.embeddings = BertEmbeddings(config).to(device) + self.encoder = BertTMixEncoder(config).to(device) - self.pooler = BertPooler(config).to("cuda") if add_pooling_layer else None + self.pooler = BertPooler(config).to(device) if add_pooling_layer else None # Initialize weights and apply final processing @@ -538,12 +539,12 @@ def forward_new(forward): counter+=2 for i in range(runs, hidden_states.size()[0]): #Pad to batch size - new_matrices.append(torch.zeros([hidden_states.size()[1], hidden_states.size()[2]]).to("cuda")) + new_matrices.append(torch.zeros([hidden_states.size()[1], hidden_states.size()[2]]).to(device)) new_labels.append(0) - new_attention_masks.append(torch.zeros([1, 1, hidden_states.size()[1]]).to("cuda")) - new_matrices=torch.stack(new_matrices).to("cuda") - new_attention_masks=torch.stack(new_attention_masks).to("cuda") - new_labels=torch.Tensor(new_labels).to("cuda") + new_attention_masks.append(torch.zeros([1, 1, hidden_states.size()[1]]).to(device)) + new_matrices=torch.stack(new_matrices).to(device) + new_attention_masks=torch.stack(new_attention_masks).to(device) + new_labels=torch.Tensor(new_labels).to(device) #when performing interpolation, pass back th new hidden states and labels outputs=forward(self, hidden_states=new_matrices, head_mask=head_mask, attention_mask=new_attention_masks, encoder_hidden_states=encoder_hidden_states, diff --git a/Code/preprocess.py b/Code/preprocess.py index 1e5b5381e48c9a3615c070d6ec54f9f1b41bd2d7..a3586e19d63e8e8207172ecc8ae0bfff442173c2 100644 --- a/Code/preprocess.py +++ b/Code/preprocess.py @@ -16,6 +16,7 @@ import os import pandas as pd import sklearn +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #metric=evaluate.load("accuracy") torch.cuda.empty_cache() @@ -359,20 +360,20 @@ def tokenizer_new(tokenizer, input, max_length, masked=False, old_dataset=False, #print("len toke type ids: ", len(all_token_type_ids[0])) if tokenizer.name_or_path[0] == "r": #if tokenizer is roberta we dont have token_type ids print("roberta tokenizer") - dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to("cuda") , - torch.tensor(all_attention_masks, dtype=torch.long).to("cuda") , - torch.tensor(all_start_positions,dtype=torch.long).to("cuda"), - torch.tensor(all_end_positions, dtype=torch.long).to("cuda"), - torch.tensor(all_labels,dtype=torch.long).to("cuda")) + dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to(device) , + torch.tensor(all_attention_masks, dtype=torch.long).to(device) , + torch.tensor(all_start_positions,dtype=torch.long).to(device), + torch.tensor(all_end_positions, dtype=torch.long).to(device), + torch.tensor(all_labels,dtype=torch.long).to(device)) if tokenizer.name_or_path[0] =="b": print("bert tokenizer") - dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to("cuda"), - torch.tensor(all_attention_masks, dtype=torch.long).to("cuda"), - torch.tensor(all_token_type_ids, dtype=torch.long).to("cuda"), - torch.tensor(all_start_positions,dtype=torch.long).to("cuda"), - torch.tensor(all_end_positions, dtype=torch.long).to("cuda"), - torch.tensor(all_labels,dtype=torch.long).to("cuda")) + dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to(device), + torch.tensor(all_attention_masks, dtype=torch.long).to(device), + torch.tensor(all_token_type_ids, dtype=torch.long).to(device), + torch.tensor(all_start_positions,dtype=torch.long).to(device), + torch.tensor(all_end_positions, dtype=torch.long).to(device), + torch.tensor(all_labels,dtype=torch.long).to(device)) print("created dataset") #print(mapping_counter) @@ -396,7 +397,7 @@ def tokenizer_imdb(tokenizer, dataset, max_length): print("input_ids: ", len(all_input_ids)) print("token_type_ids: ", len(all_token_type_ids)) print("attention_masks: ", len(all_attention_masks)) - dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to("cuda"), torch.tensor(all_attention_masks, dtype=torch.long).to("cuda"), torch.tensor(all_token_type_ids, dtype=torch.long).to("cuda"), torch.tensor(all_labels, dtype=torch.long).to("cuda")) + dataset=TensorDataset(torch.tensor(all_input_ids, dtype=torch.long).to(device), torch.tensor(all_attention_masks, dtype=torch.long).to(device), torch.tensor(all_token_type_ids, dtype=torch.long).to(device), torch.tensor(all_labels, dtype=torch.long).to(device)) print("created imdb dataset") return dataset diff --git a/Code/saved_models/test.md b/Code/saved_models/test.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/Code/train.py b/Code/train.py index 6ff8f6d833c21fb61ac8d2921da86456eeaa91a5..a0abfdbd77f4628074f5d0cd9a99695a35e5960e 100644 --- a/Code/train.py +++ b/Code/train.py @@ -43,7 +43,7 @@ def train(model, name, imdb, seed,mixup,lambda_value, mixepoch, tmix, mixlayer, test_batch_size: Returns:""" - model.train().to("cuda") + model.train().to(device) train_sampler = RandomSampler(train_dataset) train_dataloader=DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size) num_training_steps=num_epochs*len(train_dataloader) @@ -110,15 +110,15 @@ def train(model, name, imdb, seed,mixup,lambda_value, mixepoch, tmix, mixlayer, #print("mixepoch") if mixup == True: #calculate new last hidden states and predictions(logits) - new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value, threshold) - new_matrix_batch.to("cuda") - new_labels_batch.to("cuda") - span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to("cuda") + new_matrix_batch, new_labels_batch = mixup_function(outputs[2], labels, lambda_value) + new_matrix_batch.to(device) + new_labels_batch.to(device) + span_output=torch.randn(new_matrix_batch.shape[0], new_matrix_batch.shape[-1]).to(device) for i in range(new_matrix_batch.shape[0]): span_output[i]=new_matrix_batch[i][start_positions[i]:end_positions[i]].mean(dim=0) logits=model.classifier(span_output.detach()) - logits = logits.view(-1, 2).to("cuda") - target = new_labels_batch.view(-1).to("cuda") + logits = logits.view(-1, 2).to(device) + target = new_labels_batch.view(-1).to(device) loss_2 = cross_entropy(logits, target, lambda_value) #update entire model