Skip to content
Snippets Groups Projects
Commit ab1fa384 authored by kulcsar's avatar kulcsar
Browse files

changes for argument passing

parent 80c93578
No related branches found
No related tags found
No related merge requests found
......@@ -38,7 +38,7 @@ def run(raw_args):
else:
print("non eligible model type selected")
elif args.model_type == "one":
model=models.WordClassificationModel(args.architecture, args.tmix, args.mixlayer, args.lambda_value).to("cuda")
model=models.WordClassificationModel(args.architecture, args.tmix).to("cuda")
else:
print("non eligible model type selected")
......
......@@ -49,7 +49,7 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib
#self.num_labels=config.num_labels
if tmix:
print("initializing BertModelTMix")
self.embedding_model=BertModelTMix(config=AutoConfig.from_pretrained(config_name), mixlayer=mixlayer, lambda_value=lambda_value)
self.embedding_model=BertModelTMix(config=AutoConfig.from_pretrained(config_name))
else:
self.embedding_model=AutoModel.from_pretrained(config_name, config=AutoConfig.from_pretrained(config_name))
......@@ -58,7 +58,7 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib
self.classifier = nn.Linear(768, 2) #first element: Hidden size, defaults to 768, should we change it to 512 like in email? Alo 768 in the other two classes as per default
#self.embedding_model.init_weights() #do we need to reimplement this?
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
start_position=None, end_position=None, labels=None, mixepoch=False):
start_position=None, end_position=None, labels=None, mixepoch=False, mixlayer=None, lambda_value=None):
if self.embedding_model.name_or_path == "":
outputs = self.embedding_model(input_ids,
attention_mask=attention_mask,
......@@ -68,7 +68,9 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib
return_dict=False,
output_hidden_states=False,
labels=labels,
mixepoch=mixepoch)
mixepoch=mixepoch,
mixlayer=mixlayer,
lambda_value=lambda_value)
else:
outputs = self.embedding_model(input_ids,
......@@ -208,16 +210,15 @@ class BertModelTMix(BertPreTrainedModel):
`add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.
"""
def __init__(self, config, add_pooling_layer=True, mixlayer=None, batch_size=None):
def __init__(self, config, add_pooling_layer=True):
super().__init__(config)
self.config = config
self.embeddings = BertEmbeddings(config)
self.encoder = BertTMixEncoder(config, batch_size=batch_size)
self.encoder = BertTMixEncoder(config)
self.pooler = BertPooler(config) if add_pooling_layer else None
self.mixlayer=mixlayer
self.batch_size=batch_size
# Initialize weights and apply final processing
self.post_init()
......@@ -260,7 +261,9 @@ class BertModelTMix(BertPreTrainedModel):
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
labels=None,
mixepoch=False
mixepoch=False,
mixlayer=None,
lambda_value=None
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
......@@ -358,10 +361,10 @@ class BertModelTMix(BertPreTrainedModel):
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
mixlayer=self.mixlayer,
labels=labels,
batch_size=self.batch_size,
mixepoch=mixepoch
mixepoch=mixepoch,
mixlayer=mixlayer,
lambda_value=lambda_value
)
sequence_output = encoder_outputs[0]
#labels=encoder_outputs[1]
......@@ -407,7 +410,6 @@ class BertTMixEncoder(torch.nn.Module):
return_dict: Optional[bool] = True,
mixlayer: int = None,
lambda_value: float=0.0,
batch_size: int=0,
mixepoch: bool = False) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
......
......@@ -58,7 +58,9 @@ def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda
'start_position': batch[3],
'end_position': batch[4],
'labels': batch[5],
'mixepoch': True}
'mixepoch': True,
'mixlayer':mixepoch,
'lambda_value':lambda_value}
else:
inputs={'input_ids': batch[0],
'attention_mask': batch[1],
......@@ -66,7 +68,9 @@ def train(model, name, seed,gradient_accumulation_steps,mixup, threshold, lambda
'start_position': batch[3],
'end_position': batch[4],
'labels': batch[5],
'mixepoch': False}
'mixepoch': False,
'mixlayer':mixepoch,
'lambda_value':lambda_value}
if model.name_or_path[0] == "b":
inputs = {'input_ids': batch[0],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment