diff --git a/src/experiments/NEC_evaluation/context_sensitivity.py b/src/experiments/NEC_evaluation/context_sensitivity.py index b72bd445bd3819d628cecb35007594288017de0e..f507f92713d8fc2f00b6b9316ad72121dad6378d 100644 --- a/src/experiments/NEC_evaluation/context_sensitivity.py +++ b/src/experiments/NEC_evaluation/context_sensitivity.py @@ -1,3 +1,7 @@ +""" +This module evaluates the importance of specific context word for the NLI based named entity classification task. Individual words are selectively replaced with a placeholder and subsequent mispredictions are recorded. +""" + import data.data_manager as data_manager from src.common_interface import classify_entity @@ -29,4 +33,4 @@ def run_context_analysis(model_name, dataset, num_sentences): print(f"Predicted: {predicted}, True: {entity[1]}") -run_context_analysis("Llama-3.1-8B", "FIGER-coarse", 50) +run_context_analysis("T5-NLI", "FIGER-coarse", 50) diff --git a/src/experiments/NEC_evaluation/evaluation.py b/src/experiments/NEC_evaluation/evaluation.py index 8bb9b8502b24198bf2b3f44734f612d9c14b8baf..cc6f6fd24cfd65cdfbcf4729548d3ba8c7ae002b 100644 --- a/src/experiments/NEC_evaluation/evaluation.py +++ b/src/experiments/NEC_evaluation/evaluation.py @@ -1,6 +1,7 @@ """ -This file evaluates all NEC approaches. +This file evaluates all NEC approaches on all datasets. """ + import os import csv import datetime @@ -108,5 +109,5 @@ def read_NEC_metrics(directory): print(f"Model: {model}, Dataset: {dataset}, Accuracy: {avg_accuracy:.2f}%") -# run_NEC_tests_all() +run_NEC_tests_all() read_NEC_metrics("results") diff --git a/src/experiments/finetune_T5/finetune_T5_MLM_entity.py b/src/experiments/finetune_T5/finetune_T5_MLM_entity.py index de0481b40022b2eee0bde43e196dddadc30e9362..c65999f198697a72eafe3fa8b009e6a3b77dff28 100644 --- a/src/experiments/finetune_T5/finetune_T5_MLM_entity.py +++ b/src/experiments/finetune_T5/finetune_T5_MLM_entity.py @@ -1,3 +1,8 @@ +""" +This module implements the finetuning procedure for the MLM entity task formulation for the named entity classification task. +By default, the model is finetuned on the FIGER-coarse +""" + import data.data_manager as data_manager from src.models.T5_MLM_entity import finetune_model, set_label_dict diff --git a/src/experiments/finetune_T5/finetune_T5_MLM_label.py b/src/experiments/finetune_T5/finetune_T5_MLM_label.py index c7707b4d912caf62cab2fa373c84f7a15d3836e3..aed2575eacc6b66c204c21f7f576bc0bba0b3e91 100644 --- a/src/experiments/finetune_T5/finetune_T5_MLM_label.py +++ b/src/experiments/finetune_T5/finetune_T5_MLM_label.py @@ -1,3 +1,8 @@ +""" +This module implements the finetuning procedure for the MLM label task formulation for the named entity classification task. +By default, the model is finetuned on the FIGER-coarse dataset with 1000 example sentences. +""" + import data.data_manager as data_manager from src.models.T5_MLM_label import finetune_model diff --git a/src/experiments/finetune_T5/finetune_T5_NLI.py b/src/experiments/finetune_T5/finetune_T5_NLI.py index 6db6dfd2d56b01063f765e59454dda4d6c058971..85dd279cd68c9184849b3b229ae0bccfa44911b2 100644 --- a/src/experiments/finetune_T5/finetune_T5_NLI.py +++ b/src/experiments/finetune_T5/finetune_T5_NLI.py @@ -1,3 +1,8 @@ +""" +This module implements the finetuning procedure for the NLI task formulation for the named entity classification task. +By default, the model is finetuned on the FIGER-coarse dataset with 1000 example sentences. +""" + import data.data_manager as data_manager from src.models.T5_NLI import finetune_model diff --git a/src/experiments/finetune_T5/plotting/plot_loss.py b/src/experiments/finetune_T5/plotting/plot_loss.py index 75ab038a5b48b62846e1f69b27d8f6944716453b..18ed38e839bee1f392d45d011174bbd3188918c7 100644 --- a/src/experiments/finetune_T5/plotting/plot_loss.py +++ b/src/experiments/finetune_T5/plotting/plot_loss.py @@ -1,3 +1,8 @@ +""" +This module is used to plot the loss curve of the T5 finetuning tasks. The loss values are extracted from the Slurm output file. +To use this module, the correct filenames must be substituted in the call to `plot_loss_curve` +""" + import os import re import pandas as pd diff --git a/src/models/T5_MLM_entity.py b/src/models/T5_MLM_entity.py index 05692edbaa5f484ecc3134fc74a2d4161a93144c..c39c6b046e038f6f427f901f2ec166788ea5c97f 100644 --- a/src/models/T5_MLM_entity.py +++ b/src/models/T5_MLM_entity.py @@ -1,3 +1,7 @@ +""" +This module implements the entity masking approach for the named entity recogition task. It uses the T5 model and allows for finetuning of the model. +""" + import random import numpy as np from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq diff --git a/src/models/T5_MLM_label.py b/src/models/T5_MLM_label.py index 14004ca763dce4f35c31c33efb731e394d69698b..8f1d54dd4277fe207e52fa0d8f08b2d7c88c56ff 100644 --- a/src/models/T5_MLM_label.py +++ b/src/models/T5_MLM_label.py @@ -1,3 +1,7 @@ +""" +This module implements the label masking approach for the named entity recogition task. It uses the T5 model and allows for finetuning of the model. +""" + import numpy as np from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq from datasets import Dataset, DatasetDict diff --git a/src/models/T5_NLI.py b/src/models/T5_NLI.py index 4d57ada6ff55d523ca1168769b9589508f0ec600..78c7458911c496801b0066eb3e31de008b33a7e8 100644 --- a/src/models/T5_NLI.py +++ b/src/models/T5_NLI.py @@ -1,3 +1,7 @@ +""" +This module implements the natural language inference approach for the named entity recogition task. It uses the T5 model and allows for finetuning of the model. +""" + import torch from torch.nn.functional import softmax from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq diff --git a/src/models/Word2Vec.py b/src/models/Word2Vec.py index 741dbf7ecff09ee54f80e0425f2399436edc35da..ef824cf9ea7e84c529032ffae01eb83ca515c8b1 100644 --- a/src/models/Word2Vec.py +++ b/src/models/Word2Vec.py @@ -1,3 +1,7 @@ +""" +This module implements the Word2Vec based approach for the named entity recogition task. It relies on the label dictionary functionality of the data manager for the required representative entities. +""" + from gensim.models import Word2Vec import gensim.downloader as api import string @@ -81,4 +85,4 @@ def classify_entity(entity, labels): return best_label if best_label else labels[0] -# load_pretrained() +load_pretrained()