From 60188fa6c39e1e768e8b952d7450b76d590f9dd7 Mon Sep 17 00:00:00 2001 From: "ih322@uni-heidelberg.de" <hd_ih322@uc2n994.localdomain> Date: Fri, 28 Mar 2025 13:45:17 +0100 Subject: [PATCH] Revert to t5-base and finetuning fixes --- scripts/finetune_T5_MLM_entity_bwuni.sh | 14 ++++++++++++++ scripts/finetune_T5_MLM_label_bwuni.sh | 14 ++++++++++++++ scripts/finetune_T5_NLI_bwuni.sh | 14 ++++++++++++++ .../finetune_T5/finetune_T5_MLM_label.py | 2 +- src/models/T5_MLM_entity.py | 4 ++-- src/models/T5_MLM_label.py | 4 ++-- src/models/T5_NLI.py | 4 ++-- 7 files changed, 49 insertions(+), 7 deletions(-) create mode 100644 scripts/finetune_T5_MLM_entity_bwuni.sh create mode 100644 scripts/finetune_T5_MLM_label_bwuni.sh create mode 100644 scripts/finetune_T5_NLI_bwuni.sh diff --git a/scripts/finetune_T5_MLM_entity_bwuni.sh b/scripts/finetune_T5_MLM_entity_bwuni.sh new file mode 100644 index 0000000..227390e --- /dev/null +++ b/scripts/finetune_T5_MLM_entity_bwuni.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +#SBATCH --ntasks=1 +#SBATCH --time=24:00:00 +#SBATCH --mem=32gb +#SBATCH --job-name=finetune_T5_MLM_entity +#SBATCH --gres=gpu:1 +#SBATCH --partition=gpu_4_a100 +#SBATCH --output=logs/finetune_T5_MLM_entity_%j.txt + +conda activate fsem + +export PYTHONUNBUFFERED=1 +python -m src.experiments.finetune_T5.finetune_T5_MLM_entity diff --git a/scripts/finetune_T5_MLM_label_bwuni.sh b/scripts/finetune_T5_MLM_label_bwuni.sh new file mode 100644 index 0000000..0c3486a --- /dev/null +++ b/scripts/finetune_T5_MLM_label_bwuni.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +#SBATCH --ntasks=1 +#SBATCH --time=24:00:00 +#SBATCH --mem=32gb +#SBATCH --job-name=finetune_T5_MLM_label +#SBATCH --gres=gpu:1 +#SBATCH --partition=gpu_4_a100 +#SBATCH --output=logs/finetune_T5_MLM_label_%j.txt + +conda activate fsem + +export PYTHONUNBUFFERED=1 +python -m src.experiments.finetune_T5.finetune_T5_MLM_label diff --git a/scripts/finetune_T5_NLI_bwuni.sh b/scripts/finetune_T5_NLI_bwuni.sh new file mode 100644 index 0000000..573ed1a --- /dev/null +++ b/scripts/finetune_T5_NLI_bwuni.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +#SBATCH --ntasks=1 +#SBATCH --time=24:00:00 +#SBATCH --mem=32gb +#SBATCH --job-name=finetune_T5_NLI +#SBATCH --gres=gpu:1 +#SBATCH --partition=gpu_4_a100 +#SBATCH --output=logs/finetune_T5_NLI_%j.txt + +conda activate fsem + +export PYTHONUNBUFFERED=1 +python -m src.experiments.finetune_T5.finetune_T5_NLI diff --git a/src/experiments/finetune_T5/finetune_T5_MLM_label.py b/src/experiments/finetune_T5/finetune_T5_MLM_label.py index f244ed0..5a80a7b 100644 --- a/src/experiments/finetune_T5/finetune_T5_MLM_label.py +++ b/src/experiments/finetune_T5/finetune_T5_MLM_label.py @@ -25,6 +25,6 @@ def finetune_t5(dataset): epochs = 150 - finetune_model(sentences, entities, labels, output_dir=f"./src/models/t5_mlm_finetuned_model/pretrained_{dataset}_epoch{epochs}", epochs=epochs) + finetune_model(sentences, entities, labels, output_dir=f"./src/models/t5_mlm_label_finetuned_model/pretrained_{dataset}_epoch{epochs}", epochs=epochs) finetune_t5("FIGER-coarse") diff --git a/src/models/T5_MLM_entity.py b/src/models/T5_MLM_entity.py index f07add7..7c269a8 100644 --- a/src/models/T5_MLM_entity.py +++ b/src/models/T5_MLM_entity.py @@ -5,7 +5,7 @@ from torch.nn.functional import softmax from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq from datasets import Dataset, DatasetDict -model_name = "google-t5/t5-large" +model_name = "google-t5/t5-base" print("Loading model: T5 MLM entity") tokenizer = T5Tokenizer.from_pretrained(model_name) @@ -86,7 +86,7 @@ def finetune_model(sentences, entities, labels, output_dir, epochs=10): dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) training_args = TrainingArguments( - output_dir="./src/models/t5_nli_finetuned_model/checkpoints/", + output_dir="./src/models/t5_mlm_entity_finetuned_model/checkpoints/", eval_strategy="epoch", learning_rate=5e-5, per_device_train_batch_size=8, diff --git a/src/models/T5_MLM_label.py b/src/models/T5_MLM_label.py index d5fa033..983a59b 100644 --- a/src/models/T5_MLM_label.py +++ b/src/models/T5_MLM_label.py @@ -4,7 +4,7 @@ from torch.nn.functional import softmax from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq from datasets import Dataset, DatasetDict -model_name = "google-t5/t5-large" +model_name = "google-t5/t5-base" print("Loading model: T5 MLM label") tokenizer = T5Tokenizer.from_pretrained(model_name) @@ -62,7 +62,7 @@ def finetune_model(sentences, entities, labels, output_dir, epochs=10): dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"]) training_args = TrainingArguments( - output_dir="./src/models/t5_nli_finetuned_model/checkpoints/", + output_dir="./src/models/t5_mlm_label_finetuned_model/checkpoints/", eval_strategy="epoch", learning_rate=5e-5, per_device_train_batch_size=8, diff --git a/src/models/T5_NLI.py b/src/models/T5_NLI.py index d062c7b..3429509 100644 --- a/src/models/T5_NLI.py +++ b/src/models/T5_NLI.py @@ -5,8 +5,8 @@ from datasets import Dataset, DatasetDict label_map = {True: "entailment", False: "contradiction"} -# Use t5-large for testing because it is smaller -model_name = "google-t5/t5-large" +# Use t5-base for testing because it is smaller +model_name = "google-t5/t5-base" # model_name = "google/t5_xxl_true_nli_mixture" -- GitLab