Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
NER-project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
fsem25-project-nerds
NER-project
Commits
f558c9e4
Unverified
Commit
f558c9e4
authored
1 month ago
by
JulianFP
Browse files
Options
Downloads
Patches
Plain Diff
Add T5 MLM approach where the label is being masked
parent
602041ce
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/common_interface.py
+3
-0
3 additions, 0 deletions
src/common_interface.py
src/models/T5_MLM_label.py
+23
-0
23 additions, 0 deletions
src/models/T5_MLM_label.py
tests/test_NEC.py
+1
-1
1 addition, 1 deletion
tests/test_NEC.py
with
27 additions
and
1 deletion
src/common_interface.py
+
3
−
0
View file @
f558c9e4
...
@@ -6,6 +6,7 @@ from src.models.llms_interface import available_models as llms
...
@@ -6,6 +6,7 @@ from src.models.llms_interface import available_models as llms
from
src.models.GLiNER
import
find_entities
as
find_entities_gliner
from
src.models.GLiNER
import
find_entities
as
find_entities_gliner
from
src.models.GLiNER
import
classify_entity
as
classify_entity_gliner
from
src.models.GLiNER
import
classify_entity
as
classify_entity_gliner
from
src.models.T5
import
classify_entity
as
classify_entity_t5
from
src.models.T5
import
classify_entity
as
classify_entity_t5
from
src.models.T5
import
classify_entity
as
classify_entity_t5_mlm_label
from
src.experiments.NER_with_LLMs.NER_with_LLMs
import
find_entities
as
find_entities_llm
from
src.experiments.NER_with_LLMs.NER_with_LLMs
import
find_entities
as
find_entities_llm
...
@@ -15,6 +16,8 @@ def classify_entity(model_name, sentence, entity, labels):
...
@@ -15,6 +16,8 @@ def classify_entity(model_name, sentence, entity, labels):
"""
"""
if
model_name
==
"
T5
"
:
if
model_name
==
"
T5
"
:
return
classify_entity_t5
(
sentence
,
entity
,
labels
)
return
classify_entity_t5
(
sentence
,
entity
,
labels
)
elif
model_name
==
"
T5-MLM-label
"
:
return
classify_entity_t5_mlm_label
(
sentence
,
entity
,
labels
)
elif
model_name
==
"
GLiNER
"
:
elif
model_name
==
"
GLiNER
"
:
return
classify_entity_gliner
(
sentence
,
entity
,
labels
)
return
classify_entity_gliner
(
sentence
,
entity
,
labels
)
...
...
This diff is collapsed.
Click to expand it.
src/models/T5_MLM_label.py
0 → 100644
+
23
−
0
View file @
f558c9e4
import
torch
from
torch.nn.functional
import
softmax
from
transformers
import
T5ForConditionalGeneration
,
T5Tokenizer
,
Trainer
,
TrainingArguments
,
DataCollatorForSeq2Seq
model_name
=
"
google-t5/t5-base
"
print
(
"
Loading model: T5 MLM
"
)
tokenizer
=
T5Tokenizer
.
from_pretrained
(
model_name
)
model
=
T5ForConditionalGeneration
.
from_pretrained
(
model_name
)
print
(
"
Finished loading model: T5 MLM
"
)
def
classify_entity
(
sentence
,
entity
,
labels
):
sentence_with_masked_hypothesis
=
f
"
{
sentence
}
{
entity
}
is a <extra_id_0>
"
inputs_ids
=
tokenizer
(
sentence_with_masked_hypothesis
,
return_tensors
=
"
pt
"
).
inputs_ids
results
=
{}
for
label
in
labels
:
label_ids
=
tokenizer
(
f
"
<extra_id_0>
{
label
}
"
,
return_tensors
=
"
pt
"
).
inputs_ids
loss
=
model
(
inputs_ids
=
inputs_ids
,
labels
=
label_ids
)
results
[
loss
]
=
label
min_loss
=
min
(
results
.
keys
())
return
results
[
min_loss
]
This diff is collapsed.
Click to expand it.
tests/test_NEC.py
+
1
−
1
View file @
f558c9e4
from
src.common_interface
import
classify_entity
from
src.common_interface
import
classify_entity
tested_models
=
[
"
GLiNER
"
,
"
T5
"
]
tested_models
=
[
"
GLiNER
"
,
"
T5
"
,
"
T5-MLM-label
"
]
test_sentence
=
"
Barack Obama was the president of the United States.
"
test_sentence
=
"
Barack Obama was the president of the United States.
"
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment