Skip to content
Snippets Groups Projects
Commit f8b0b084 authored by pracht's avatar pracht
Browse files

Annotation script

parent 3e11de63
No related branches found
No related tags found
No related merge requests found
from enum import Enum
from outlines import models, prompt
from outlines.fsm import json_schema
from pydantic import BaseModel
from typing import Union, Optional
from typing_extensions import List, Literal
import json
import requests
import re
from .schema import MultiWOZ
### Prompt
@prompt
def llama_prompt_no_schema(dialog):
"""
<s>[INST] <<SYS>>
You are a helpful annotator. You read the text carefully and annotate all valid feels in the schema.
Make sure to only annotate attractions like museums, clubs or other tourist attractions as such.
If you are not sure with an annotation you should annotate None instead.
<</SYS>>
{{dialog}} [/INST]
"""
### Requests
def main():
# Read dialogue data
with open("elise/output_dialogues_50.json", "r") as file:
data = json.load(file)
dialogues = [d["dialogue"][0] for d in data]
# Request annotations
for dia in dialogues:
prompt = llama_prompt_no_schema(dia)
# Send request to vLLM server
response = requests.post(
"http://localhost:8000/generate",
json = {
"prompt": prompt,
"schema": MultiWOZ.model_json_schema(),
"max_tokens": 1024, # Find reasonable limit
"n": 1
}
)
for reply in response.json()["text"]:
annotation = reply.split("[/INST]")[1]
# Cleanup the whitespace by some erroneous generations
annotation = annotation.replace("\n", "")
annotation = re.sub(r"\s+", " ", annotation)
with open("output_annotations.txt", "a") as file:
# Catch annotation errors like invalid json.
# Most of the time only a closing bracket is missing.
try:
annotation_json = json.loads(annotation)
file.write(json.dumps(annotation_json))
file.write("\n")
except:
annotation = annotation + "}"
try:
annotation_json = json.loads(annotation)
file.write(json.dumps(annotation_json))
file.write("\n")
except:
file.write(f"PARSING ERROR: {annotation}\n")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment