Add docstrings

427c3c7d · friebolin · 1bf3cb79 · 427c3c7d
Commit 427c3c7d authored 2 years ago by friebolin
--- a/Code/models.py
+++ b/Code/models.py
@@ -41,11 +41,23 @@ def set_seed(seed: int = 42) -> None:



-class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bibliothek
-	"""This class is needed to enable BERT to work with our input. We apply a dropout layer
-	and the linear classifier layer (/2 layer MLP) to make it a binary decision problem. In the forward step
-	we specify the classification over the span given by end and start position and compute the
-	loss function with cross entropy. The predictions (logits) are made by our classifier layer."""
+class WordClassificationModel(torch.nn.Module): 
+	"""
+    A PyTorch Module that utilizes BERT for word classification. It applies a dropout layer and
+    a linear classifier layer to make it a binary decision problem. In the forward step, it specifies
+    the classification over the span given by end and start position and computes the loss function
+    with cross entropy. The predictions (logits) are made by our classifier layer.
+
+    Params:
+        config_name (str): The configuration name of the pre-trained BERT model.
+        tmix (bool): Whether to use the TMix layer or not. Default is False.
+        imdb (bool): Whether to use the IMDB dataset or not. Default is False.
+
+    Returns:
+        outputs: The predicted logits along with the hidden states and attention masks from BERT
+                 model and the computed loss value.
+
+    """
 	def __init__(self, config_name, tmix=False, imdb=False): #mixlayer=-1, lambda_value=0.0):
 		super(WordClassificationModel, self).__init__()
 		self.tmix=tmix
@@ -116,12 +128,24 @@ class WordClassificationModel(torch.nn.Module): #AutoModel verwenden aus der Bib



-class BertForWordClassification(BertPreTrainedModel): #AutoModel verwenden aus der Bibliothek
-	"""This class is needed to enable BERT to work with our input. We apply a dropout layer
-	and the linear classifier layer to make it a binary decision problem. In the forward step
-	we specify the classification over the span given by end and start position and compute the
-	loss function with cross entropy. The predictions (logits) are made by our classifier layer.
+class BertForWordClassification(BertPreTrainedModel): 
 	"""
+    BERT model for word classification. Applies a dropout layer and a linear classifier layer to make it a binary
+    decision problem. In the forward step, the classification is specified over the span given by end and start
+    position, and the loss function is computed with cross entropy. The predictions (logits) are made by the classifier
+    layer.
+    
+    Params:
+        config (:class:`~transformers.BertConfig`):
+            Configuration class for BERT.
+
+    Outputs:
+        if `labels` is not `None`:
+        - `loss`: `torch.FloatTensor` of shape `(1,)`. Classification loss.
+        - `logits`: `torch.FloatTensor` of shape `(batch_size, num_labels)`. Logits (output) produced by the linear classifier layer.
+        if `labels` is `None`:
+        - `logits`: `torch.FloatTensor` of shape `(batch_size, num_labels)`. Logits (output) produced by the linear classifier layer.
+    """
 	def __init__(self, config):
 		super(BertForWordClassification, self).__init__(config)
 		self.num_labels=config.num_labels
@@ -158,11 +182,26 @@ class BertForWordClassification(BertPreTrainedModel): #AutoModel verwenden aus d
 		return outputs


-class RobertaForWordClassification(RobertaPreTrainedModel): #AutoModel verwenden aus der Bibliothek
-	"""This class is needed to enable BERT to work with our input. We apply a dropout layer
-	and the linear classifier layer to make it a binary decision problem. In the forward step
-	we specify the classification over the span given by end and start position and compute the
-	loss function with cross entropy. The predictions (logits) are made by our classifier layer."""
+class RobertaForWordClassification(RobertaPreTrainedModel):
+	class RobertaForWordClassification(RobertaPreTrainedModel):
+    """
+    Fine-tunes a pre-trained RoBERTa model for word classification tasks. Applies a dropout layer
+    and a linear classifier layer to make it a binary decision problem. In the forward step,
+    the model specifies the classification over the span given by end and start position and computes the
+    loss function with cross entropy. The predictions (logits) are made by the classifier layer.
+
+    Args:
+        config (:class:`~transformers.RobertaConfig`):
+            The configuration object that configures the model architecture.
+
+    Outputs:
+        if `labels` is not None:
+            Returns the cross-entropy loss (:obj:`torch.FloatTensor`).
+        if `labels` is None:
+            Returns a tuple of the predicted logits for each class (:obj:`torch.FloatTensor`)
+            and a tuple of outputs from the RoBERTa model, including the last hidden state and
+            the attention mask.
+    """
 	def __init__(self, config):
 		super(RobertaForWordClassification, self).__init__(config)
 		self.num_labels=config.num_labels
@@ -199,11 +238,27 @@ class RobertaForWordClassification(RobertaPreTrainedModel): #AutoModel verwenden


 class BertModelTMix(BertPreTrainedModel):
-    """
-   	Model to override forward function in Encoder (copied and slightly modified from
+	"""
+    Initializes a BertModelTMix model. Override the forward function in Encoder (copied and slightly modified from
 	transformers)
+    
+    Params:
+		- config: `BertConfig` instance with the model configuration.
+		- add_pooling_layer: `bool`. Whether to include a pooling layer in the model architecture. Default: True.
+		- *args: Variable length argument list.
+		- **kwargs: Arbitrary keyword arguments.
+    
+    Returns:
+    	- If return_dict is True, returns a dictionary containing the following keys:
+			'last_hidden_state': a tensor of shape (batch_size, sequence_length, hidden_size) containing the final hidden states of the model.
+			'pooler_output': a tensor of shape (batch_size, hidden_size) containing the output of the model's pooling layer (if add_pooling_layer is True).
+			'past_key_values': a tuple of tensors containing the precomputed key and value hidden states of the attention blocks (if use_cache is True).
+			'hidden_states': a tuple of tensors containing the hidden states of all layers of the model (if output_hidden_states is True).
+			'attentions': a tuple of tensors containing the attention weights of all layers of the model (if output_attentions is True).
+		- Otherwise, returns a tuple containing:
+			a tensor of shape (batch_size, sequence_length, hidden_size) containing the final hidden states of the model.
+			a tensor of shape (batch_size, hidden_size) containing the output of the model's pooling layer (if add_pooling_layer is True).
    """
-
    def __init__(self, config, add_pooling_layer=True):
        super().__init__(config)
        self.config = config