Delete unused files

1dac63ad · finn · 9823e29d · 9823e29d · 9823e29d · 9823e29d
Commit 1dac63ad authored 1 year ago by finn
--- a/elise/README.md
+++ b/elise/README.md
-# ELiSe - Extracted Literal Search
-Extracted Literal Search is a simple method for Retrieval Augmented Generation (RAG).
-For small datasets where most of the data fields are known literals, this method is easier to implement and might provide better results than Similarity Search of embeddings.
-## Installation
-Make sure you have `poetry` installed.
-> pip install --local poetry
-Install the package.
-> poetry install
-Export your Huggingface token.
-> export HUGGING_FACE_HUB_TOKEN=<TOKEN>
-Run the vLLM server. For example with a quantized Llama-2 model.
-> poetry run python -m outlines.serve.serve --model="TheBloke/Llama-2-7b-Chat-GPTQ" -q gptq
-## Usage
-Define your domain specific prompts.
--- a/elise/elise/__init__.py
+++ b/elise/elise/__init__.py
--- a/elise/elise/__pycache__/__init__.cpython-39.pyc
+++ b/elise/elise/__pycache__/__init__.cpython-39.pyc
--- a/elise/elise/__pycache__/benchmark.cpython-39.pyc
+++ b/elise/elise/__pycache__/benchmark.cpython-39.pyc
--- a/elise/elise/__pycache__/prompts.cpython-39.pyc
+++ b/elise/elise/__pycache__/prompts.cpython-39.pyc
--- a/elise/elise/benchmark.py
+++ b/elise/elise/benchmark.py
-#!/usr/bin/env python
-import numpy as np
-import polars as pl
-import requests
-from elise.prompts import (
-    prompts,
-    minimal_prompt,
-    minimal_prompt_yn,
-    minimal_bang_prompt,
-    minimal_bang_prompt_yn,
-    basic_prompt,
-    basic_prompt_yn,
-    basic_bang_prompt,
-    basic_bang_prompt_yn,
-    elaborate_prompt,
-    elaborate_prompt_yn,
-    elaborate_bang_prompt,
-    elaborate_bang_prompt_yn,
-)
-example_turns = [
-    ("I need a hotel in the south with free wifi.", "YES", "+ 1"),
-    ("Can you tell me if the cheap one has a star rating?", "YES", "+ 2"),
-    ("Yes, I need a place to stay that is expensive, and is a hotel please.", "YES", "+ 3"),
-    ("I'd like to book for eight people lasting five nights, starting on Saturday.", "YES", "+ 4"),
-    ("Is there any hotel in the area with 4 stars and I can get for cheap price.", "YES", "+ 5"),
-    ("Great. That's all I need, thank you.", "NO", "- 6"),
-    ("What a beautiful hotel, I like the architecture", "NO", "- 7"),
-    ("No, you've taken care of everything. Thanks so much. Have a great day.", "NO", "- 8"),
-    ("Thank you, please let me know if you need anything else.", "NO", "- 9"),
-    ("Could you tell me the travel time and price of that train please?", "NO", "-10"),
-    ("You're welcome. Enjoy your time in Cambridge!", "NO", "- 11"),
-]
-# Get the turn labels + for yes, - for no, enumerated
-turn_labels = [t[2] for t in example_turns]
-prompt_labels = []
-# Regex to ask the model a yes or no question
-y_or_n = "YES|NO"
-def main():
-    results = {}
-    for prompt_name, prompt in prompts:
-        prompt_labels.append(prompt_name)
-        accuracy = []
-        for turn in example_turns:
-            p = prompt(turn[0])
-            prompt_length = len(p)
-            # We generate 100 responses per turn so we can calculate the accuracy in percent easily
-            n = 100
-            # Count results
-            c = 0
-            # Send prompt to vLLM server
-            response = requests.post(
-                "http://localhost:8000/generate",
-                json = {
-                    "prompt": p,
-                    "regex": y_or_n,
-                    "n": n
-                }
-            )
-            for reply in response.json()["text"]:
-                if reply[prompt_length:] == turn[1]:
-                    c += 1
-            accuracy.append(c/n)
-        avg = np.average(accuracy)
-        accuracy.append(avg)
-        results[prompt_name] = accuracy
-    df =  pl.from_dict(results)
-    print(df)
-    print(df.max_horizontal())
-    # TODO: Get model name with environment variable
-    with open("llama2-13.benchmark.results.json", "w") as file:
-        df.write_ndjson(file)
-if __name__ == "__main__":
-    main()
\ No newline at end of file
--- a/elise/elise/mwoz_annotation.py
+++ b/elise/elise/mwoz_annotation.py
-from enum import Enum
-from outlines import models, prompt
-from outlines.fsm import json_schema
-from pydantic import BaseModel
-from typing import Union, Optional
-from typing_extensions import List, Literal
-import json
-import requests
-import re
-from .schema import MultiWOZ
-### Prompt
-@prompt
-def llama_prompt_no_schema(dialog):
-  """
-<s>[INST] <<SYS>>
-You are a helpful annotator. You read the text carefully and annotate all valid feels in the schema.
-Make sure to only annotate attractions like museums, clubs or other tourist attractions as such.
-If you are not sure with an annotation you should annotate None instead.
-<</SYS>>
-{{dialog}} [/INST]
-"""
-### Requests
-def main():
-  # Read dialogue data
-  with open("elise/output_dialogues_50.json", "r") as file:
-    data = json.load(file)
-  dialogues = [d["dialogue"][0] for d in data]
-  # Request annotations
-  for dia in dialogues:
-    prompt = llama_prompt_no_schema(dia)
-    # Send request to vLLM server
-    response = requests.post(
-        "http://localhost:8000/generate",
-        json = {
-            "prompt": prompt,
-            "schema": MultiWOZ.model_json_schema(),
-            "max_tokens": 1024, # Find reasonable limit
-            "n": 1
-        }
-    )
-    for reply in response.json()["text"]:
-      annotation = reply.split("[/INST]")[1]
-      # Cleanup the whitespace by some erroneous generations
-      annotation = annotation.replace("\n", "")
-      annotation = re.sub(r"\s+", " ", annotation)
-      with open("output_annotations.txt", "a") as file:
-        # Catch annotation errors like invalid json.
-        # Most of the time only a closing bracket is missing.
-        try:
-          annotation_json = json.loads(annotation)
-          file.write(json.dumps(annotation_json))
-          file.write("\n")
-        except:
-          annotation = annotation + "}"
-          try:
-            annotation_json = json.loads(annotation)
-            file.write(json.dumps(annotation_json))
-            file.write("\n")
-          except:
-            file.write(f"PARSING ERROR: {annotation}\n")
\ No newline at end of file
--- a/elise/elise/output_dialogues_50.json
+++ b/elise/elise/output_dialogues_50.json
--- a/elise/elise/prompts.py
+++ b/elise/elise/prompts.py
-from outlines import prompt
-@prompt
-def minimal_prompt(dialog_turn):
-    """Does the dialog turn contain actionable information?
-TURN:
-{{dialog_turn}}
-    """
-@prompt
-def minimal_bang_prompt(dialog_turn):
-    """Does the dialog turn contain actionable information?
-# TURN:
-{{dialog_turn}}
-    """
-@prompt
-def minimal_prompt_yn(dialog_turn):
-    """Does the dialog turn contain actionable information?
-TURN:
-{{dialog_turn}}
-Answer with "YES" or "NO".
-    """
-@prompt
-def minimal_bang_prompt_yn(dialog_turn):
-    """Does the dialog turn contain actionable information?
-# TURN:
-{{dialog_turn}}
-Answer with "YES" or "NO".
-    """
-@prompt
-def basic_prompt(dialog_turn):
-    """Does the dialog turn contain actionable information?
-ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-TURN:
-{{dialog_turn}}
-    """
-@prompt
-def basic_prompt_yn(dialog_turn):
-    """Does the dialog turn contain actionable information?
-ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-TURN:
-{{dialog_turn}}
-Answer with "YES" or "NO".
-    """
-@prompt
-def basic_bang_prompt(dialog_turn):
-    """Does the dialog turn contain actionable information?
-# ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-# TURN:
-{{dialog_turn}}
-    """
-@prompt
-def basic_bang_prompt_yn(dialog_turn):
-    """Does the dialog turn contain actionable information?
-# ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-# TURN:
-{{dialog_turn}}
-Answer with "YES" or "NO".
-    """
-@prompt
-def elaborate_prompt(dialog_turn):
-    """Does the dialog turn contain actionable information?
-ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-EXAMPLES:
-"I would like to go to a restaurant in the south"
-> "YES"
-"Thank you so much, have a nice day!"
-> "NO"
-"Does the hotel has wi-fi?"
-> "YES"
-"This seems like a nice hotel"
-> "NO"
-TURN:
-{{dialog_turn}}
-    """
-@prompt
-def elaborate_prompt_yn(dialog_turn):
-    """Does the dialog turn contain actionable information?
-ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-EXAMPLES:
-"I would like to go to a restaurant in the south"
-> "YES"
-"Thank you so much, have a nice day!"
-> "NO"
-"Does the hotel has wi-fi?"
-> "YES"
-"This seems like a nice hotel"
-> "NO"
-TURN:
-{{dialog_turn}}
-Answer with "YES" or "NO".
-    """
-@prompt
-def elaborate_bang_prompt(dialog_turn):
-    """# INTERESTING INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-# EXAMPLES:
-"I would like to go to a restaurant in the south"
-> "YES"
-"Thank you so much, have a nice day!"
-> "NO"
-"Does the hotel has wi-fi?"
-> "YES"
-"This seems like a nice hotel"
-> "NO"
-# TASK
-Does the following dialog turn provide information of interest?
-# TURN:
-{{dialog_turn}}
-    """
-@prompt
-def elaborate_bang_prompt2(dialog_turn):
-    """# INTERESTING INFORMATION:
-    - hotel-price
-    - hotel-location
-    - hotel-name
-    - hotel-stars
-    - hotel-guests
-    - length of the stay
-    - restaurant-price
-    - restaurant-location
-    - restaurant-food
-    - restaurant-name
-    - restaurant-seating
-# EXAMPLES:
-"I would like to go to a restaurant in the south"
-> "YES"
-"Thank you so much, have a nice day!"
-> "NO"
-"Does the hotel has wi-fi?"
-> "YES"
-"This seems like a nice hotel"
-> "NO"
-# TASK
-Does the following dialog turn provide information of interest?
-# TURN:
-{{dialog_turn}}
-    """
-@prompt
-def elaborate_bang_prompt_yn(dialog_turn):
-    """# ACTIONABLE_INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-# EXAMPLES:
-"I would like to go to a restaurant in the south"
-> "yes"
-"Thank you so much, have a nice day!"
-> "NO"
-"Does the hotel has wi-fi?"
-> "YES"
-"This seems like a nice hotel"
-> "NO"
-# TASK
-Does the dialog turn contain actionable information?
-# TURN:
-{{dialog_turn}}
-Answer with "yes" or "no".
-    """
-@prompt
-def omega_prompt(dialog_turn):
-    """You are a world-class language-analyzer.
-You analyze the sentence from the dialog turn and evaluate if there is interesting data to extract.
-You reply reliably with yes or no.
-# INTERESTING INFORMATION:
-    - hotel
-        - price
-        - location
-        - name
-        - stars
-        - number of guests
-        - length of the stay
-    - restaurant
-        - price
-        - location
-        - food
-        - name
-        - seating
-        - number of guests
-# EXAMPLES:
-"I would like to go to a restaurant in the south"
-> "YES"
-"Thank you so much, have a nice day!"
-> "NO"
-"Does the hotel has wi-fi?"
-> "YES"
-"This seems like a nice hotel"
-> "NO"
-# TASK
-Does the following dialog turn provide interesting information?
-# TURN:
-{{dialog_turn}}
-    """
-@prompt
-def chatgpt_prompt(dialog_turn):
-    """# TASK:
-Identify if the provided dialogue turn contains specific information related to hotels or restaurants. Relevant information includes:
- Hotel: price, location, name, star rating, guest information, length of stay
- Restaurant: price, location, food type, name, seating arrangement
-# INSTRUCTIONS:
-Respond with "YES" if the dialogue turn includes any of the above information.
-Respond with "NO" if the dialogue turn does not include any of the above information.
-# EXAMPLES:
-1. "I'm looking for a hotel near the airport." 
-   > YES (mentions hotel location)
-2. "What's the specialty dish at this restaurant?" 
-   > YES (mentions restaurant food)
-3. "I just booked my vacation." 
-   > NO (no specific information)
-4. "Can you recommend a good place to eat?" 
-   > NO (no specific information)
-# DIALOGUE TURN:
-{{dialog_turn}}
-    """
-prompts = [
-    ("minimal_prompt", minimal_prompt),
-    ("minimal_prompt_yn", minimal_prompt_yn),
-    ("minimal_bang_prompt", minimal_bang_prompt),
-    ("minimal_bang_prompt_yn", minimal_bang_prompt_yn),
-    ("basic_prompt", basic_prompt),
-    ("basic_prompt_yn", basic_prompt_yn),
-    ("basic_bang_prompt", basic_bang_prompt),
-    ("basic_bang_prompt_yn", basic_bang_prompt_yn),
-    ("elaborate_prompt_yn", elaborate_prompt_yn),
-    ("elaborate_prompt", elaborate_prompt),
-    ("elaborate_bang_prompt2", elaborate_bang_prompt2),
-    ("elaborate_bang_prompt", elaborate_bang_prompt),
-    ("elaborate_bang_prompt_yn", elaborate_bang_prompt_yn),
-    ("omega_prompt", omega_prompt),
-    ("chatgpt_prompt", chatgpt_prompt),
-]
\ No newline at end of file
--- a/elise/elise/schema.py
+++ b/elise/elise/schema.py
-from typing import List, Literal, Optional
-from datetime import datetime
-from pydantic import BaseModel
-class MultiWOZ(BaseModel):
-    attractionArea: Optional[Literal[
-        "center",
-        "east",
-        "north",
-        "south",
-        "west",
-    ]]
-    attractionName: Optional[str]
-    attractionType: Optional[Literal[
-        "architecture",
-        "boat",
-        "cinema",
-        "college",
-        "college",
-        "concerthall"
-        "entertainment",
-        "multiple sports",
-        "museum",
-        "nightclub",
-        "park",
-        "swimmingpool",
-        "theatre",
-    ]]
-    hotelArea: Optional[Literal[
-        "center",
-        "east",
-        "north",
-        "south",
-        "west",
-    ]]
-    hotelBookday: Optional[Literal[
-        "monday", 
-        "tuesday",
-        "wednesday",
-        "thursday",
-        "friday",
-        "saturday", 
-        "sunday"
-    ]]
-    hotelBookpeople: Optional[int]
-    hotelBookstay: Optional[int]
-    hotelInternet: Optional[Literal["yes", "no"]]
-    hotelName: Optional[str]
-    hotelParking: Optional[Literal["limited", "yes", "no"]]
-    hotelPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
-    hotelStars: Optional[int]
-    hotelType: Optional[Literal["guesthouse", "hotel"]]
-    restaurantArea: Optional[Literal[
-        "center",
-        "east",
-        "north",
-        "south",
-        "west",
-    ]]
-    restaurantBookday: Optional[Literal[
-        "monday", 
-        "tuesday",
-        "wednesday",
-        "thursday",
-        "friday",
-        "saturday", 
-        "sunday"
-    ]]
-    restaurantBookpeople: Optional[int]
-    restaurantBooktime: Optional[datetime]
-    restaurantFood: Optional[str]
-    restaurantName: Optional[str]
-    restaurantPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
-    taxiArriveby: Optional[datetime]
-    taxiDeparture: Optional[str]
-    taxiDestination: Optional[str]
-    taxiLeaveat: Optional[datetime]
-    trainArriveby: Optional[datetime]
-    trainBookpeople: Optional[int]
-    trainDay: Optional[str]
-    trainDeparture: Optional[datetime]
-    trainDestination: Optional[str]
-    trainLeaveat: Optional[datetime]
-class HeidelKBerg(BaseModel):
-    attractionArea: Optional[Literal[
-        "center",
-        "east",
-        "north",
-        "south",
-        "west",
-    ]]
-    attractionName: Optional[str]
-    attractionType: Optional[List[Literal[
-        "active",
-        "architecture", 
-        "child_friendly",
-        "educational",
-        "historic",
-        "indoor",
-        "nature",
-        "outdoor",
-        "passive",
-        "view",
-        "zoo", 
-    ]]]
-    attractionPricerange: Optional[Literal["cheap", "moderate", "expensive", "free"]]
-    hotelArea: Optional[Literal[
-        "center",
-        "east",
-        "north",
-        "south",
-        "west",
-    ]]
-    hotelBookday: Optional[Literal[
-        "monday", 
-        "tuesday",
-        "wednesday",
-        "thursday",
-        "friday",
-        "saturday", 
-        "sunday"
-    ]]
-    hotelBookpeople: Optional[int]
-    hotelBookstay: Optional[int]
-    hotelInternet: Optional[Literal["yes", "no"]]
-    hotelName: Optional[str]
-    hotelParking: Optional[Literal["limited", "yes", "no"]]
-    hotelPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
-    hotelStars: Optional[Literal["0", "1", "2", "3", "4", "5"]]
-    restaurantArea: Optional[Literal[
-        "center",
-        "east",
-        "north",
-        "south",
-        "west",
-    ]]
-    restaurantBookday: Optional[Literal[
-        "monday", 
-        "tuesday",
-        "wednesday",
-        "thursday",
-        "friday",
-        "saturday", 
-        "sunday"
-    ]]
-    restaurantBookpeople: Optional[int]
-    restaurantFood: Optional[List[Literal[
-        "african",
-        "american",
-        "asian",
-        "chinese",
-        "german",
-        "greek",
-        "indian",
-        "international",
-        "italian",
-        "japanese",
-        "korean",
-        "oriental",
-        "sushi",
-        "thai"
-    ]]]
-    restaurantName: Optional[str]
-    restaurantPricerange: Optional[Literal["cheap", "moderate", "expensive"]]
\ No newline at end of file
--- a/elise/llama2-13.benchmark.results.json
+++ b/elise/llama2-13.benchmark.results.json
--- a/elise/llama2-13b.benchmark.results.json
+++ b/elise/llama2-13b.benchmark.results.json
--- a/elise/llama2-7b-gptq.benchmark.results.json
+++ b/elise/llama2-7b-gptq.benchmark.results.json
--- a/elise/poetry
+++ b/elise/poetry
--- a/elise/poetry.lock
+++ b/elise/poetry.lock
--- a/elise/pyproject.toml
+++ b/elise/pyproject.toml
-[tool.poetry]
-name = "elise"
-version = "0.1.0"
-description = ""
-authors = ["Christoph Pracht <pracht@cl.uni-heidelberg.de>"]
-readme = "README.md"
-[tool.poetry.scripts]
-benchmark = "elise.benchmark:main"
-annotate = "elise.mwoz_annotation:main"
-user_scripts = "elise.user_scripts:main"
-diagen = "elise.multi_agent_generation:run_dialogue"
-[tool.poetry.dependencies]
-python = "^3.9"
-torch = "^2.1.2"
-vllm = {git = "https://github.com/vllm-project/vllm.git", rev = "main"}
-outlines = "^0.0.24"
-ray = "^2.9.1"
-huggingface = "^0.0.1"
-polars = {extras = ["numpy", "plot"], version = "^0.20.5"}
-accelerate = "^0.26.1"
-auto-gptq = "^0.6.0"
-optimum = "^1.16.2"
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
\ No newline at end of file