From 427c3c7d3748bbcbb2e4579abfe4351648241cd4 Mon Sep 17 00:00:00 2001 From: friebolin <friebolin@cl.uni-heidelberg.de> Date: Fri, 24 Feb 2023 13:30:03 +0100 Subject: [PATCH] Add docstrings --- Code/models.py | 91 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 18 deletions(-) diff --git a/Code/models.py b/Code/models.py index 530b9f7..7c6197e 100644 --- a/Code/models.py +++ b/Code/models.py @@ -41,11 +41,23 @@ def set_seed(seed: int = 42) -> None: -class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bibliothek - """This class is needed to enable BERT to work with our input. We apply a dropout layer - and the linear classifier layer (/2 layer MLP) to make it a binary decision problem. In the forward step - we specify the classification over the span given by end and start position and compute the - loss function with cross entropy. The predictions (logits) are made by our classifier layer.""" +class WordClassificationModel(torch.nn.Module): + """ + A PyTorch Module that utilizes BERT for word classification. It applies a dropout layer and + a linear classifier layer to make it a binary decision problem. In the forward step, it specifies + the classification over the span given by end and start position and computes the loss function + with cross entropy. The predictions (logits) are made by our classifier layer. + + Params: + config_name (str): The configuration name of the pre-trained BERT model. + tmix (bool): Whether to use the TMix layer or not. Default is False. + imdb (bool): Whether to use the IMDB dataset or not. Default is False. + + Returns: + outputs: The predicted logits along with the hidden states and attention masks from BERT + model and the computed loss value. + + """ def __init__(self, config_name, tmix=False, imdb=False): #mixlayer=-1, lambda_value=0.0): super(WordClassificationModel, self).__init__() self.tmix=tmix @@ -116,12 +128,24 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib -class BertForWordClassification(BertPreTrainedModel): #AutoModel verwenden aus der Bibliothek - """This class is needed to enable BERT to work with our input. We apply a dropout layer - and the linear classifier layer to make it a binary decision problem. In the forward step - we specify the classification over the span given by end and start position and compute the - loss function with cross entropy. The predictions (logits) are made by our classifier layer. +class BertForWordClassification(BertPreTrainedModel): """ + BERT model for word classification. Applies a dropout layer and a linear classifier layer to make it a binary + decision problem. In the forward step, the classification is specified over the span given by end and start + position, and the loss function is computed with cross entropy. The predictions (logits) are made by the classifier + layer. + + Params: + config (:class:`~transformers.BertConfig`): + Configuration class for BERT. + + Outputs: + if `labels` is not `None`: + - `loss`: `torch.FloatTensor` of shape `(1,)`. Classification loss. + - `logits`: `torch.FloatTensor` of shape `(batch_size, num_labels)`. Logits (output) produced by the linear classifier layer. + if `labels` is `None`: + - `logits`: `torch.FloatTensor` of shape `(batch_size, num_labels)`. Logits (output) produced by the linear classifier layer. + """ def __init__(self, config): super(BertForWordClassification, self).__init__(config) self.num_labels=config.num_labels @@ -158,11 +182,26 @@ class BertForWordClassification(BertPreTrainedModel): #AutoModel verwenden aus d return outputs -class RobertaForWordClassification(RobertaPreTrainedModel): #AutoModel verwenden aus der Bibliothek - """This class is needed to enable BERT to work with our input. We apply a dropout layer - and the linear classifier layer to make it a binary decision problem. In the forward step - we specify the classification over the span given by end and start position and compute the - loss function with cross entropy. The predictions (logits) are made by our classifier layer.""" +class RobertaForWordClassification(RobertaPreTrainedModel): + class RobertaForWordClassification(RobertaPreTrainedModel): + """ + Fine-tunes a pre-trained RoBERTa model for word classification tasks. Applies a dropout layer + and a linear classifier layer to make it a binary decision problem. In the forward step, + the model specifies the classification over the span given by end and start position and computes the + loss function with cross entropy. The predictions (logits) are made by the classifier layer. + + Args: + config (:class:`~transformers.RobertaConfig`): + The configuration object that configures the model architecture. + + Outputs: + if `labels` is not None: + Returns the cross-entropy loss (:obj:`torch.FloatTensor`). + if `labels` is None: + Returns a tuple of the predicted logits for each class (:obj:`torch.FloatTensor`) + and a tuple of outputs from the RoBERTa model, including the last hidden state and + the attention mask. + """ def __init__(self, config): super(RobertaForWordClassification, self).__init__(config) self.num_labels=config.num_labels @@ -199,11 +238,27 @@ class RobertaForWordClassification(RobertaPreTrainedModel): #AutoModel verwenden class BertModelTMix(BertPreTrainedModel): - """ - Model to override forward function in Encoder (copied and slightly modified from + """ + Initializes a BertModelTMix model. Override the forward function in Encoder (copied and slightly modified from transformers) + + Params: + - config: `BertConfig` instance with the model configuration. + - add_pooling_layer: `bool`. Whether to include a pooling layer in the model architecture. Default: True. + - *args: Variable length argument list. + - **kwargs: Arbitrary keyword arguments. + + Returns: + - If return_dict is True, returns a dictionary containing the following keys: + 'last_hidden_state': a tensor of shape (batch_size, sequence_length, hidden_size) containing the final hidden states of the model. + 'pooler_output': a tensor of shape (batch_size, hidden_size) containing the output of the model's pooling layer (if add_pooling_layer is True). + 'past_key_values': a tuple of tensors containing the precomputed key and value hidden states of the attention blocks (if use_cache is True). + 'hidden_states': a tuple of tensors containing the hidden states of all layers of the model (if output_hidden_states is True). + 'attentions': a tuple of tensors containing the attention weights of all layers of the model (if output_attentions is True). + - Otherwise, returns a tuple containing: + a tensor of shape (batch_size, sequence_length, hidden_size) containing the final hidden states of the model. + a tensor of shape (batch_size, hidden_size) containing the output of the model's pooling layer (if add_pooling_layer is True). """ - def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config -- GitLab