Skip to content
Snippets Groups Projects
Commit 427c3c7d authored by friebolin's avatar friebolin
Browse files

Add docstrings

parent 1bf3cb79
No related branches found
No related tags found
No related merge requests found
......@@ -41,11 +41,23 @@ def set_seed(seed: int = 42) -> None:
class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bibliothek
"""This class is needed to enable BERT to work with our input. We apply a dropout layer
and the linear classifier layer (/2 layer MLP) to make it a binary decision problem. In the forward step
we specify the classification over the span given by end and start position and compute the
loss function with cross entropy. The predictions (logits) are made by our classifier layer."""
class WordClassificationModel(torch.nn.Module):
"""
A PyTorch Module that utilizes BERT for word classification. It applies a dropout layer and
a linear classifier layer to make it a binary decision problem. In the forward step, it specifies
the classification over the span given by end and start position and computes the loss function
with cross entropy. The predictions (logits) are made by our classifier layer.
Params:
config_name (str): The configuration name of the pre-trained BERT model.
tmix (bool): Whether to use the TMix layer or not. Default is False.
imdb (bool): Whether to use the IMDB dataset or not. Default is False.
Returns:
outputs: The predicted logits along with the hidden states and attention masks from BERT
model and the computed loss value.
"""
def __init__(self, config_name, tmix=False, imdb=False): #mixlayer=-1, lambda_value=0.0):
super(WordClassificationModel, self).__init__()
self.tmix=tmix
......@@ -116,12 +128,24 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib
class BertForWordClassification(BertPreTrainedModel): #AutoModel verwenden aus der Bibliothek
"""This class is needed to enable BERT to work with our input. We apply a dropout layer
and the linear classifier layer to make it a binary decision problem. In the forward step
we specify the classification over the span given by end and start position and compute the
loss function with cross entropy. The predictions (logits) are made by our classifier layer.
class BertForWordClassification(BertPreTrainedModel):
"""
BERT model for word classification. Applies a dropout layer and a linear classifier layer to make it a binary
decision problem. In the forward step, the classification is specified over the span given by end and start
position, and the loss function is computed with cross entropy. The predictions (logits) are made by the classifier
layer.
Params:
config (:class:`~transformers.BertConfig`):
Configuration class for BERT.
Outputs:
if `labels` is not `None`:
- `loss`: `torch.FloatTensor` of shape `(1,)`. Classification loss.
- `logits`: `torch.FloatTensor` of shape `(batch_size, num_labels)`. Logits (output) produced by the linear classifier layer.
if `labels` is `None`:
- `logits`: `torch.FloatTensor` of shape `(batch_size, num_labels)`. Logits (output) produced by the linear classifier layer.
"""
def __init__(self, config):
super(BertForWordClassification, self).__init__(config)
self.num_labels=config.num_labels
......@@ -158,11 +182,26 @@ class BertForWordClassification(BertPreTrainedModel): #AutoModel verwenden aus d
return outputs
class RobertaForWordClassification(RobertaPreTrainedModel): #AutoModel verwenden aus der Bibliothek
"""This class is needed to enable BERT to work with our input. We apply a dropout layer
and the linear classifier layer to make it a binary decision problem. In the forward step
we specify the classification over the span given by end and start position and compute the
loss function with cross entropy. The predictions (logits) are made by our classifier layer."""
class RobertaForWordClassification(RobertaPreTrainedModel):
class RobertaForWordClassification(RobertaPreTrainedModel):
"""
Fine-tunes a pre-trained RoBERTa model for word classification tasks. Applies a dropout layer
and a linear classifier layer to make it a binary decision problem. In the forward step,
the model specifies the classification over the span given by end and start position and computes the
loss function with cross entropy. The predictions (logits) are made by the classifier layer.
Args:
config (:class:`~transformers.RobertaConfig`):
The configuration object that configures the model architecture.
Outputs:
if `labels` is not None:
Returns the cross-entropy loss (:obj:`torch.FloatTensor`).
if `labels` is None:
Returns a tuple of the predicted logits for each class (:obj:`torch.FloatTensor`)
and a tuple of outputs from the RoBERTa model, including the last hidden state and
the attention mask.
"""
def __init__(self, config):
super(RobertaForWordClassification, self).__init__(config)
self.num_labels=config.num_labels
......@@ -199,11 +238,27 @@ class RobertaForWordClassification(RobertaPreTrainedModel): #AutoModel verwenden
class BertModelTMix(BertPreTrainedModel):
"""
Model to override forward function in Encoder (copied and slightly modified from
"""
Initializes a BertModelTMix model. Override the forward function in Encoder (copied and slightly modified from
transformers)
Params:
- config: `BertConfig` instance with the model configuration.
- add_pooling_layer: `bool`. Whether to include a pooling layer in the model architecture. Default: True.
- *args: Variable length argument list.
- **kwargs: Arbitrary keyword arguments.
Returns:
- If return_dict is True, returns a dictionary containing the following keys:
'last_hidden_state': a tensor of shape (batch_size, sequence_length, hidden_size) containing the final hidden states of the model.
'pooler_output': a tensor of shape (batch_size, hidden_size) containing the output of the model's pooling layer (if add_pooling_layer is True).
'past_key_values': a tuple of tensors containing the precomputed key and value hidden states of the attention blocks (if use_cache is True).
'hidden_states': a tuple of tensors containing the hidden states of all layers of the model (if output_hidden_states is True).
'attentions': a tuple of tensors containing the attention weights of all layers of the model (if output_attentions is True).
- Otherwise, returns a tuple containing:
a tensor of shape (batch_size, sequence_length, hidden_size) containing the final hidden states of the model.
a tensor of shape (batch_size, hidden_size) containing the output of the model's pooling layer (if add_pooling_layer is True).
"""
def __init__(self, config, add_pooling_layer=True):
super().__init__(config)
self.config = config
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment