Skip to content
Snippets Groups Projects
Commit 1dac63ad authored by finn's avatar finn
Browse files

Delete unused files

parent 9823e29d
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 4400 deletions
# ELiSe - Extracted Literal Search
Extracted Literal Search is a simple method for Retrieval Augmented Generation (RAG).
For small datasets where most of the data fields are known literals, this method is easier to implement and might provide better results than Similarity Search of embeddings.
## Installation
Make sure you have `poetry` installed.
> pip install --local poetry
Install the package.
> poetry install
Export your Huggingface token.
> export HUGGING_FACE_HUB_TOKEN=<TOKEN>
Run the vLLM server. For example with a quantized Llama-2 model.
> poetry run python -m outlines.serve.serve --model="TheBloke/Llama-2-7b-Chat-GPTQ" -q gptq
## Usage
Define your domain specific prompts.
File deleted
File deleted
File deleted
#!/usr/bin/env python
import numpy as np
import polars as pl
import requests
from elise.prompts import (
prompts,
minimal_prompt,
minimal_prompt_yn,
minimal_bang_prompt,
minimal_bang_prompt_yn,
basic_prompt,
basic_prompt_yn,
basic_bang_prompt,
basic_bang_prompt_yn,
elaborate_prompt,
elaborate_prompt_yn,
elaborate_bang_prompt,
elaborate_bang_prompt_yn,
)
example_turns = [
("I need a hotel in the south with free wifi.", "YES", "+ 1"),
("Can you tell me if the cheap one has a star rating?", "YES", "+ 2"),
("Yes, I need a place to stay that is expensive, and is a hotel please.", "YES", "+ 3"),
("I'd like to book for eight people lasting five nights, starting on Saturday.", "YES", "+ 4"),
("Is there any hotel in the area with 4 stars and I can get for cheap price.", "YES", "+ 5"),
("Great. That's all I need, thank you.", "NO", "- 6"),
("What a beautiful hotel, I like the architecture", "NO", "- 7"),
("No, you've taken care of everything. Thanks so much. Have a great day.", "NO", "- 8"),
("Thank you, please let me know if you need anything else.", "NO", "- 9"),
("Could you tell me the travel time and price of that train please?", "NO", "-10"),
("You're welcome. Enjoy your time in Cambridge!", "NO", "- 11"),
]
# Get the turn labels + for yes, - for no, enumerated
turn_labels = [t[2] for t in example_turns]
prompt_labels = []
# Regex to ask the model a yes or no question
y_or_n = "YES|NO"
def main():
results = {}
for prompt_name, prompt in prompts:
prompt_labels.append(prompt_name)
accuracy = []
for turn in example_turns:
p = prompt(turn[0])
prompt_length = len(p)
# We generate 100 responses per turn so we can calculate the accuracy in percent easily
n = 100
# Count results
c = 0
# Send prompt to vLLM server
response = requests.post(
"http://localhost:8000/generate",
json = {
"prompt": p,
"regex": y_or_n,
"n": n
}
)
for reply in response.json()["text"]:
if reply[prompt_length:] == turn[1]:
c += 1
accuracy.append(c/n)
avg = np.average(accuracy)
accuracy.append(avg)
results[prompt_name] = accuracy
df = pl.from_dict(results)
print(df)
print(df.max_horizontal())
# TODO: Get model name with environment variable
with open("llama2-13.benchmark.results.json", "w") as file:
df.write_ndjson(file)
if __name__ == "__main__":
main()
\ No newline at end of file
from enum import Enum
from outlines import models, prompt
from outlines.fsm import json_schema
from pydantic import BaseModel
from typing import Union, Optional
from typing_extensions import List, Literal
import json
import requests
import re
from .schema import MultiWOZ
### Prompt
@prompt
def llama_prompt_no_schema(dialog):
"""
<s>[INST] <<SYS>>
You are a helpful annotator. You read the text carefully and annotate all valid feels in the schema.
Make sure to only annotate attractions like museums, clubs or other tourist attractions as such.
If you are not sure with an annotation you should annotate None instead.
<</SYS>>
{{dialog}} [/INST]
"""
### Requests
def main():
# Read dialogue data
with open("elise/output_dialogues_50.json", "r") as file:
data = json.load(file)
dialogues = [d["dialogue"][0] for d in data]
# Request annotations
for dia in dialogues:
prompt = llama_prompt_no_schema(dia)
# Send request to vLLM server
response = requests.post(
"http://localhost:8000/generate",
json = {
"prompt": prompt,
"schema": MultiWOZ.model_json_schema(),
"max_tokens": 1024, # Find reasonable limit
"n": 1
}
)
for reply in response.json()["text"]:
annotation = reply.split("[/INST]")[1]
# Cleanup the whitespace by some erroneous generations
annotation = annotation.replace("\n", "")
annotation = re.sub(r"\s+", " ", annotation)
with open("output_annotations.txt", "a") as file:
# Catch annotation errors like invalid json.
# Most of the time only a closing bracket is missing.
try:
annotation_json = json.loads(annotation)
file.write(json.dumps(annotation_json))
file.write("\n")
except:
annotation = annotation + "}"
try:
annotation_json = json.loads(annotation)
file.write(json.dumps(annotation_json))
file.write("\n")
except:
file.write(f"PARSING ERROR: {annotation}\n")
\ No newline at end of file
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
from outlines import prompt
@prompt
def minimal_prompt(dialog_turn):
"""Does the dialog turn contain actionable information?
TURN:
{{dialog_turn}}
"""
@prompt
def minimal_bang_prompt(dialog_turn):
"""Does the dialog turn contain actionable information?
# TURN:
{{dialog_turn}}
"""
@prompt
def minimal_prompt_yn(dialog_turn):
"""Does the dialog turn contain actionable information?
TURN:
{{dialog_turn}}
Answer with "YES" or "NO".
"""
@prompt
def minimal_bang_prompt_yn(dialog_turn):
"""Does the dialog turn contain actionable information?
# TURN:
{{dialog_turn}}
Answer with "YES" or "NO".
"""
@prompt
def basic_prompt(dialog_turn):
"""Does the dialog turn contain actionable information?
ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
TURN:
{{dialog_turn}}
"""
@prompt
def basic_prompt_yn(dialog_turn):
"""Does the dialog turn contain actionable information?
ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
TURN:
{{dialog_turn}}
Answer with "YES" or "NO".
"""
@prompt
def basic_bang_prompt(dialog_turn):
"""Does the dialog turn contain actionable information?
# ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
# TURN:
{{dialog_turn}}
"""
@prompt
def basic_bang_prompt_yn(dialog_turn):
"""Does the dialog turn contain actionable information?
# ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
# TURN:
{{dialog_turn}}
Answer with "YES" or "NO".
"""
@prompt
def elaborate_prompt(dialog_turn):
"""Does the dialog turn contain actionable information?
ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
EXAMPLES:
"I would like to go to a restaurant in the south"
> "YES"
"Thank you so much, have a nice day!"
> "NO"
"Does the hotel has wi-fi?"
> "YES"
"This seems like a nice hotel"
> "NO"
TURN:
{{dialog_turn}}
"""
@prompt
def elaborate_prompt_yn(dialog_turn):
"""Does the dialog turn contain actionable information?
ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
EXAMPLES:
"I would like to go to a restaurant in the south"
> "YES"
"Thank you so much, have a nice day!"
> "NO"
"Does the hotel has wi-fi?"
> "YES"
"This seems like a nice hotel"
> "NO"
TURN:
{{dialog_turn}}
Answer with "YES" or "NO".
"""
@prompt
def elaborate_bang_prompt(dialog_turn):
"""# INTERESTING INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
# EXAMPLES:
"I would like to go to a restaurant in the south"
> "YES"
"Thank you so much, have a nice day!"
> "NO"
"Does the hotel has wi-fi?"
> "YES"
"This seems like a nice hotel"
> "NO"
# TASK
Does the following dialog turn provide information of interest?
# TURN:
{{dialog_turn}}
"""
@prompt
def elaborate_bang_prompt2(dialog_turn):
"""# INTERESTING INFORMATION:
- hotel-price
- hotel-location
- hotel-name
- hotel-stars
- hotel-guests
- length of the stay
- restaurant-price
- restaurant-location
- restaurant-food
- restaurant-name
- restaurant-seating
# EXAMPLES:
"I would like to go to a restaurant in the south"
> "YES"
"Thank you so much, have a nice day!"
> "NO"
"Does the hotel has wi-fi?"
> "YES"
"This seems like a nice hotel"
> "NO"
# TASK
Does the following dialog turn provide information of interest?
# TURN:
{{dialog_turn}}
"""
@prompt
def elaborate_bang_prompt_yn(dialog_turn):
"""# ACTIONABLE_INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
# EXAMPLES:
"I would like to go to a restaurant in the south"
> "yes"
"Thank you so much, have a nice day!"
> "NO"
"Does the hotel has wi-fi?"
> "YES"
"This seems like a nice hotel"
> "NO"
# TASK
Does the dialog turn contain actionable information?
# TURN:
{{dialog_turn}}
Answer with "yes" or "no".
"""
@prompt
def omega_prompt(dialog_turn):
"""You are a world-class language-analyzer.
You analyze the sentence from the dialog turn and evaluate if there is interesting data to extract.
You reply reliably with yes or no.
# INTERESTING INFORMATION:
- hotel
- price
- location
- name
- stars
- number of guests
- length of the stay
- restaurant
- price
- location
- food
- name
- seating
- number of guests
# EXAMPLES:
"I would like to go to a restaurant in the south"
> "YES"
"Thank you so much, have a nice day!"
> "NO"
"Does the hotel has wi-fi?"
> "YES"
"This seems like a nice hotel"
> "NO"
# TASK
Does the following dialog turn provide interesting information?
# TURN:
{{dialog_turn}}
"""
@prompt
def chatgpt_prompt(dialog_turn):
"""# TASK:
Identify if the provided dialogue turn contains specific information related to hotels or restaurants. Relevant information includes:
- Hotel: price, location, name, star rating, guest information, length of stay
- Restaurant: price, location, food type, name, seating arrangement
# INSTRUCTIONS:
Respond with "YES" if the dialogue turn includes any of the above information.
Respond with "NO" if the dialogue turn does not include any of the above information.
# EXAMPLES:
1. "I'm looking for a hotel near the airport."
> YES (mentions hotel location)
2. "What's the specialty dish at this restaurant?"
> YES (mentions restaurant food)
3. "I just booked my vacation."
> NO (no specific information)
4. "Can you recommend a good place to eat?"
> NO (no specific information)
# DIALOGUE TURN:
{{dialog_turn}}
"""
prompts = [
("minimal_prompt", minimal_prompt),
("minimal_prompt_yn", minimal_prompt_yn),
("minimal_bang_prompt", minimal_bang_prompt),
("minimal_bang_prompt_yn", minimal_bang_prompt_yn),
("basic_prompt", basic_prompt),
("basic_prompt_yn", basic_prompt_yn),
("basic_bang_prompt", basic_bang_prompt),
("basic_bang_prompt_yn", basic_bang_prompt_yn),
("elaborate_prompt_yn", elaborate_prompt_yn),
("elaborate_prompt", elaborate_prompt),
("elaborate_bang_prompt2", elaborate_bang_prompt2),
("elaborate_bang_prompt", elaborate_bang_prompt),
("elaborate_bang_prompt_yn", elaborate_bang_prompt_yn),
("omega_prompt", omega_prompt),
("chatgpt_prompt", chatgpt_prompt),
]
\ No newline at end of file
from typing import List, Literal, Optional
from datetime import datetime
from pydantic import BaseModel
class MultiWOZ(BaseModel):
attractionArea: Optional[Literal[
"center",
"east",
"north",
"south",
"west",
]]
attractionName: Optional[str]
attractionType: Optional[Literal[
"architecture",
"boat",
"cinema",
"college",
"college",
"concerthall"
"entertainment",
"multiple sports",
"museum",
"nightclub",
"park",
"swimmingpool",
"theatre",
]]
hotelArea: Optional[Literal[
"center",
"east",
"north",
"south",
"west",
]]
hotelBookday: Optional[Literal[
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday"
]]
hotelBookpeople: Optional[int]
hotelBookstay: Optional[int]
hotelInternet: Optional[Literal["yes", "no"]]
hotelName: Optional[str]
hotelParking: Optional[Literal["limited", "yes", "no"]]
hotelPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
hotelStars: Optional[int]
hotelType: Optional[Literal["guesthouse", "hotel"]]
restaurantArea: Optional[Literal[
"center",
"east",
"north",
"south",
"west",
]]
restaurantBookday: Optional[Literal[
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday"
]]
restaurantBookpeople: Optional[int]
restaurantBooktime: Optional[datetime]
restaurantFood: Optional[str]
restaurantName: Optional[str]
restaurantPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
taxiArriveby: Optional[datetime]
taxiDeparture: Optional[str]
taxiDestination: Optional[str]
taxiLeaveat: Optional[datetime]
trainArriveby: Optional[datetime]
trainBookpeople: Optional[int]
trainDay: Optional[str]
trainDeparture: Optional[datetime]
trainDestination: Optional[str]
trainLeaveat: Optional[datetime]
class HeidelKBerg(BaseModel):
attractionArea: Optional[Literal[
"center",
"east",
"north",
"south",
"west",
]]
attractionName: Optional[str]
attractionType: Optional[List[Literal[
"active",
"architecture",
"child_friendly",
"educational",
"historic",
"indoor",
"nature",
"outdoor",
"passive",
"view",
"zoo",
]]]
attractionPricerange: Optional[Literal["cheap", "moderate", "expensive", "free"]]
hotelArea: Optional[Literal[
"center",
"east",
"north",
"south",
"west",
]]
hotelBookday: Optional[Literal[
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday"
]]
hotelBookpeople: Optional[int]
hotelBookstay: Optional[int]
hotelInternet: Optional[Literal["yes", "no"]]
hotelName: Optional[str]
hotelParking: Optional[Literal["limited", "yes", "no"]]
hotelPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
hotelStars: Optional[Literal["0", "1", "2", "3", "4", "5"]]
restaurantArea: Optional[Literal[
"center",
"east",
"north",
"south",
"west",
]]
restaurantBookday: Optional[Literal[
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday"
]]
restaurantBookpeople: Optional[int]
restaurantFood: Optional[List[Literal[
"african",
"american",
"asian",
"chinese",
"german",
"greek",
"indian",
"international",
"italian",
"japanese",
"korean",
"oriental",
"sushi",
"thai"
]]]
restaurantName: Optional[str]
restaurantPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
\ No newline at end of file
This diff is collapsed.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
[tool.poetry]
name = "elise"
version = "0.1.0"
description = ""
authors = ["Christoph Pracht <pracht@cl.uni-heidelberg.de>"]
readme = "README.md"
[tool.poetry.scripts]
benchmark = "elise.benchmark:main"
annotate = "elise.mwoz_annotation:main"
user_scripts = "elise.user_scripts:main"
diagen = "elise.multi_agent_generation:run_dialogue"
[tool.poetry.dependencies]
python = "^3.9"
torch = "^2.1.2"
vllm = {git = "https://github.com/vllm-project/vllm.git", rev = "main"}
outlines = "^0.0.24"
ray = "^2.9.1"
huggingface = "^0.0.1"
polars = {extras = ["numpy", "plot"], version = "^0.20.5"}
accelerate = "^0.26.1"
auto-gptq = "^0.6.0"
optimum = "^1.16.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment