Skip to content
Snippets Groups Projects
Commit 51e06d32 authored by Thomas Wolf's avatar Thomas Wolf
Browse files

- Merged NEC testcases to one

- Minor improvements in LLM pipeline, removed irrelevant todo
parent 53ba4829
No related branches found
No related tags found
No related merge requests found
......@@ -28,8 +28,8 @@ Mark your result like this for easy extraction: <answer>predicted_class</answer>
Example:
Labels == ['person', 'organization', 'location', 'miscellaneous']
Sentence: 'Europe rejects German call to boycott British lamb.'
Target Entity: Europe
Sentence: 'NASA sent astronauts to the moon.'
Target Entity: NASA
Desired Result: <answer>organization</answer>
Your Task:
......
......@@ -3,7 +3,6 @@ This file deals with interacting with the LLMs, handles input and output.
There is a common interface factory class LLM, and one child class for each used model.
"""
# todo deal with external server issues by sending the request again until there is a valid response
import os
import ollama
......
from src.common_interface import classify_entity
tested_models = ["GLiNER", "T5-NLI", "T5-MLM-label"]
test_sentence = "Barack Obama was the president of the United States."
tested_models = ["GLiNER", "T5-NLI", "T5-MLM-label", "Llama-3.1-8B", "DeepSeek-R1-Distill-Qwen-32B"]
test_entities = ["Barack Obama", "United States"]
test_labels = ["person", "organization", "time", "location", "miscellaneous"]
labels = ["person", "organization", "time", "location", "miscellaneous"]
test_sentence = "Apollo 11 was a spaceflight conducted in July 1969 by the United States and launched " \
"by NASA, sending the astronauts Neil Armstrong and Buzz Aldrin to become the first humans to walk on the moon."
true_labels = [('Apollo 11', 'miscellaneous'), ('July 1969', 'time'),
('United States', 'organization'), ('NASA', 'organization'), ('Neil Armstrong', 'person'),
('Buzz Aldrin', 'person'), ('moon', 'location')]
print("Test NEC")
print("Test sentence:\n" + test_sentence)
for model in tested_models:
print("\n")
for test_entity in test_entities:
print(f"{model} prediction for {test_entity}:")
print(classify_entity(model, test_sentence, test_entity, labels))
print(f"\nTesting model {model}...")
correct = 0
for pair in true_labels:
entity = pair[0]
predicted_label = classify_entity(model, test_sentence, entity, test_labels)
print(f"Prediction: {entity} is a {predicted_label}.")
if predicted_label == pair[1]:
correct += 1
accuracy = correct / len(true_labels)
print(f"Accuracy {model}: {accuracy}")
from src.common_interface import classify_entity
from src.metrics import precision, recall, f1_score
tested_models = ["Llama-3.1-8B", "DeepSeek-R1-Distill-Qwen-32B"]
test_labels = ["person", "organization", "time", "location", "miscellaneous"]
test_sentence = "Apollo 11 was a spaceflight conducted in July 1969 by the United States and launched " \
"by NASA, sending the astronauts Neil Armstrong and Buzz Aldrin to become the first humans to walk on the moon."
true_labels = [('Apollo 11', 'miscellaneous'), ('July 1969', 'time'),
('United States', 'organization'), ('NASA', 'organization'), ('Neil Armstrong', 'person'),
('Buzz Aldrin', 'person'), ('moon', 'location')]
print("Test sentence:\n" + test_sentence)
for model in tested_models:
print(f"Testing model {model}...")
predicted_entities = []
for pair in true_labels:
entity = pair[0]
predicted_label = classify_entity(model, test_sentence, entity, test_labels)
predicted_entities.append((entity, predicted_label))
print(f"{model} found entities: \n{predicted_entities}")
print(f"Precision: {precision(true_labels, predicted_entities)}")
print(f"Recall: {recall(true_labels, predicted_entities)}")
print(f"F1-score: {f1_score(true_labels, predicted_entities)}\n")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment