- Merged NEC testcases to one

- Minor improvements in LLM pipeline, removed irrelevant todo

- Merged NEC testcases to one
51e06d32 · Thomas Wolf · 53ba4829 · 51e06d32 · 51e06d32 · 51e06d32
Commit 51e06d32 authored 2 weeks ago by Thomas Wolf
--- a/src/experiments/NER_with_LLMs/NER_with_LLMs.py
+++ b/src/experiments/NER_with_LLMs/NER_with_LLMs.py
@@ -28,8 +28,8 @@ Mark your result like this for easy extraction: <answer>predicted_class</answer>

 Example:
 Labels == ['person', 'organization', 'location', 'miscellaneous']
-Sentence: 'Europe rejects German call to boycott British lamb.' 
-Target Entity: Europe
+Sentence: 'NASA sent astronauts to the moon.' 
+Target Entity: NASA
 Desired Result: <answer>organization</answer>

 Your Task:

--- a/src/models/llms_interface.py
+++ b/src/models/llms_interface.py
@@ -3,7 +3,6 @@ This file deals with interacting with the LLMs, handles input and output.
 There is a common interface factory class LLM, and one child class for each used model.
 """

-# todo deal with external server issues by sending the request again until there is a valid response

 import os
 import ollama

--- a/tests/test_NEC.py
+++ b/tests/test_NEC.py
 from src.common_interface import classify_entity

-tested_models = ["GLiNER", "T5-NLI", "T5-MLM-label"]

-test_sentence = "Barack Obama was the president of the United States."
+tested_models = ["GLiNER", "T5-NLI", "T5-MLM-label", "Llama-3.1-8B", "DeepSeek-R1-Distill-Qwen-32B"]

-test_entities = ["Barack Obama", "United States"]
+test_labels = ["person", "organization", "time", "location", "miscellaneous"]

-labels = ["person", "organization", "time", "location", "miscellaneous"]
+test_sentence = "Apollo 11 was a spaceflight conducted in July 1969 by the United States and launched " \
+                "by NASA, sending the astronauts Neil Armstrong and Buzz Aldrin to become the first humans to walk on the moon."

+true_labels = [('Apollo 11', 'miscellaneous'), ('July 1969', 'time'),
+               ('United States', 'organization'), ('NASA', 'organization'), ('Neil Armstrong', 'person'),
+               ('Buzz Aldrin', 'person'), ('moon', 'location')]

-print("Test NEC")

+print("Test sentence:\n" + test_sentence)
 for model in tested_models:
-    print("\n")
-    for test_entity in test_entities:
-        print(f"{model} prediction for {test_entity}:")
-        print(classify_entity(model, test_sentence, test_entity, labels))
+    print(f"\nTesting model {model}...")
+    correct = 0
+    for pair in true_labels:
+        entity = pair[0]
+        predicted_label = classify_entity(model, test_sentence, entity, test_labels)
+        print(f"Prediction: {entity} is a {predicted_label}.")
+        if predicted_label == pair[1]:
+            correct += 1
+
+    accuracy = correct / len(true_labels)
+    print(f"Accuracy {model}: {accuracy}")
+
--- a/tests/test_NEC_LLMs.py
+++ b/tests/test_NEC_LLMs.py
-from src.common_interface import classify_entity
-from src.metrics import precision, recall, f1_score
-
-
-tested_models = ["Llama-3.1-8B", "DeepSeek-R1-Distill-Qwen-32B"]
-
-test_labels = ["person", "organization", "time", "location", "miscellaneous"]
-
-test_sentence = "Apollo 11 was a spaceflight conducted in July 1969 by the United States and launched " \
-                "by NASA, sending the astronauts Neil Armstrong and Buzz Aldrin to become the first humans to walk on the moon."
-
-true_labels = [('Apollo 11', 'miscellaneous'), ('July 1969', 'time'),
-               ('United States', 'organization'), ('NASA', 'organization'), ('Neil Armstrong', 'person'),
-               ('Buzz Aldrin', 'person'), ('moon', 'location')]
-
-
-print("Test sentence:\n" + test_sentence)
-for model in tested_models:
-    print(f"Testing model {model}...")
-    predicted_entities = []
-    for pair in true_labels:
-        entity = pair[0]
-        predicted_label = classify_entity(model, test_sentence, entity, test_labels)
-        predicted_entities.append((entity, predicted_label))
-
-    print(f"{model} found entities: \n{predicted_entities}")
-    print(f"Precision: {precision(true_labels, predicted_entities)}")
-    print(f"Recall: {recall(true_labels, predicted_entities)}")
-    print(f"F1-score: {f1_score(true_labels, predicted_entities)}\n")