From d641f769ed392343d3029f6b5a2b43b1556fc6e4 Mon Sep 17 00:00:00 2001
From: JulianFP <julian@partanengroup.de>
Date: Thu, 13 Mar 2025 19:30:27 +0100
Subject: [PATCH] T5 MLM: Add next extra_id to end of label ids like it was
 done in training of T5 (see paper)

---
 src/models/T5_MLM_label.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/models/T5_MLM_label.py b/src/models/T5_MLM_label.py
index 39bd94d..d893ca2 100644
--- a/src/models/T5_MLM_label.py
+++ b/src/models/T5_MLM_label.py
@@ -15,7 +15,7 @@ def classify_entity(sentence, entity, labels):
 
     results = {}
     for label in labels:
-        label_ids = tokenizer(f"<extra_id_0> {label}", return_tensors="pt").input_ids
+        label_ids = tokenizer(f"<extra_id_0> {label} <extra_id_1>", return_tensors="pt").input_ids
         loss = model(input_ids=input_ids, labels=label_ids).loss.item()
         results[loss] = label
 
-- 
GitLab