Add missing module documentation

2455a6cd · kupper · a9cdbec8 · 2455a6cd · 2455a6cd · 2455a6cd
Commit 2455a6cd authored 3 weeks ago by kupper
--- a/src/experiments/NEC_evaluation/context_sensitivity.py
+++ b/src/experiments/NEC_evaluation/context_sensitivity.py
+"""
+This module evaluates the importance of specific context word for the NLI based named entity classification task. Individual words are selectively replaced with a placeholder and subsequent mispredictions are recorded.
+"""
+
 import data.data_manager as data_manager
 from src.common_interface import classify_entity

@@ -29,4 +33,4 @@ def run_context_analysis(model_name, dataset, num_sentences):
            print(f"Predicted: {predicted}, True: {entity[1]}")

        
-run_context_analysis("Llama-3.1-8B", "FIGER-coarse", 50)
+run_context_analysis("T5-NLI", "FIGER-coarse", 50)
--- a/src/experiments/NEC_evaluation/evaluation.py
+++ b/src/experiments/NEC_evaluation/evaluation.py
 """
-This file evaluates all NEC approaches.
+This file evaluates all NEC approaches on all datasets.
 """
+
 import os
 import csv
 import datetime
@@ -108,5 +109,5 @@ def read_NEC_metrics(directory):
        print(f"Model: {model}, Dataset: {dataset}, Accuracy: {avg_accuracy:.2f}%")


-# run_NEC_tests_all()
+run_NEC_tests_all()
 read_NEC_metrics("results")
--- a/src/experiments/finetune_T5/finetune_T5_MLM_entity.py
+++ b/src/experiments/finetune_T5/finetune_T5_MLM_entity.py
+"""
+This module implements the finetuning procedure for the MLM entity task formulation for the named entity classification task.
+By default, the model is finetuned on the FIGER-coarse
+"""
+
 import data.data_manager as data_manager
 from src.models.T5_MLM_entity import finetune_model, set_label_dict


--- a/src/experiments/finetune_T5/finetune_T5_MLM_label.py
+++ b/src/experiments/finetune_T5/finetune_T5_MLM_label.py
+"""
+This module implements the finetuning procedure for the MLM label task formulation for the named entity classification task.
+By default, the model is finetuned on the FIGER-coarse dataset with 1000 example sentences.
+"""
+
 import data.data_manager as data_manager
 from src.models.T5_MLM_label import finetune_model


--- a/src/experiments/finetune_T5/finetune_T5_NLI.py
+++ b/src/experiments/finetune_T5/finetune_T5_NLI.py
+"""
+This module implements the finetuning procedure for the NLI task formulation for the named entity classification task.
+By default, the model is finetuned on the FIGER-coarse dataset with 1000 example sentences.
+"""
+
 import data.data_manager as data_manager
 from src.models.T5_NLI import finetune_model


--- a/src/experiments/finetune_T5/plotting/plot_loss.py
+++ b/src/experiments/finetune_T5/plotting/plot_loss.py
+"""
+This module is used to plot the loss curve of the T5 finetuning tasks. The loss values are extracted from the Slurm output file.
+To use this module, the correct filenames must be substituted in the call to `plot_loss_curve`
+"""
+
 import os
 import re
 import pandas as pd

--- a/src/models/T5_MLM_entity.py
+++ b/src/models/T5_MLM_entity.py
+"""
+This module implements the entity masking approach for the named entity recogition task. It uses the T5 model and allows for finetuning of the model.
+"""
+
 import random
 import numpy as np
 from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq

--- a/src/models/T5_MLM_label.py
+++ b/src/models/T5_MLM_label.py
+"""
+This module implements the label masking approach for the named entity recogition task. It uses the T5 model and allows for finetuning of the model.
+"""
+
 import numpy as np
 from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq
 from datasets import Dataset, DatasetDict

--- a/src/models/T5_NLI.py
+++ b/src/models/T5_NLI.py
+"""
+This module implements the natural language inference approach for the named entity recogition task. It uses the T5 model and allows for finetuning of the model.
+"""
+
 import torch
 from torch.nn.functional import softmax
 from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq

--- a/src/models/Word2Vec.py
+++ b/src/models/Word2Vec.py
+"""
+This module implements the Word2Vec based approach for the named entity recogition task. It relies on the label dictionary functionality of the data manager for the required representative entities.
+"""
+
 from gensim.models import Word2Vec
 import gensim.downloader as api
 import string
@@ -81,4 +85,4 @@ def classify_entity(entity, labels):

    return best_label if best_label else labels[0]

-# load_pretrained()
+load_pretrained()