Skip to content
Snippets Groups Projects
Commit 347d389a authored by kulcsar's avatar kulcsar
Browse files

add changes tmix

parent c841c22a
No related branches found
No related tags found
No related merge requests found
......@@ -96,13 +96,13 @@ if __name__ == "__main__":
parser.add_argument(
"--tmix",
help="whether or not to use tmix. if yes, please specify layer and lambda"
action="store-true"
help="whether or not to use tmix. if yes, please specify layer and lambda",
action="store_true"
)
parser.add_argument(
"--mixlayer",
help="specify the layer to mix. Only select one layer at a time"
help="specify the layer to mix. Only select one layer at a time",
type=list
)
......
......@@ -474,7 +474,7 @@ def forward_new(forward):
#print("encoder hidden states: ", encoder_hidden_states.size())
if layer_now == mixlayer
if layer_now == mixlayer:
runs = math.floor(hidden_states.size()[0]/2)
print("runs: ", runs)
print("lambda_value: ", lambda_value)
......@@ -498,7 +498,7 @@ def forward_new(forward):
outputs=forward(self, hidden_states=new_matrices, head_mask=head_mask, attention_mask=attention_mask, encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask, past_key_value=past_key_values, output_attentions=output_attentions) #I"m a bit confused here... do we have to add self or rather not?
else:
outputs = forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value=past_key_values, output_attentions)
outputs = forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value=past_key_values, output_attentions=output_attentions)
print(outputs)
return outputs
return forward_mix
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment