Merge branch 'master' of gitlab.cl.uni-heidelberg.de:hillengass/syndra

ea74547e · finn · 8d48e7ee · b1a91674 · ea74547e · ea74547e
Commit ea74547e authored 1 year ago by finn
--- a/elise/README.md
+++ b/elise/README.md
+# ELiSe - Extracted Literal Search
+
+Extracted Literal Search is a simple method for Retrieval Augmented Generation (RAG).
+
+For small datasets where most of the data fields are known literals, this method is easier to implement and might provide better results than Similarity Search of embeddings.
+
+
+## Installation
+
+Make sure you have `poetry` installed.
+> pip install --local poetry
+
+Install the package.
+> poetry install
+
+Run the vLLM server. For example with a quantized Llama-2 model.
+> poetry run python -m outlines.serve.serve --model="TheBloke/Llama-2-7b-Chat-GPTQ" -q gptq
+
+
+## Usage
+
+Define your domain specific prompts.
\ No newline at end of file
--- a/elise/elise/__init__.py
+++ b/elise/elise/__init__.py
--- a/elise/elise/benchmark.py
+++ b/elise/elise/benchmark.py
+#!/usr/bin/env python
+
+import numpy as np
+import polars as pl
+import requests
+
+from elise.prompts import (
+    prompts,
+    minimal_prompt,
+    minimal_prompt_yn,
+    minimal_bang_prompt,
+    minimal_bang_prompt_yn,
+    basic_prompt,
+    basic_prompt_yn,
+    basic_bang_prompt,
+    basic_bang_prompt_yn,
+    elaborate_prompt,
+    elaborate_prompt_yn,
+    elaborate_bang_prompt,
+    elaborate_bang_prompt_yn,
+)
+
+example_turns = [
+    ("I need a hotel in the south with free wifi.", "yes", "+ 1"),
+    ("Can you tell me if the cheap one has a star rating?", "yes", "+ 2"),
+    ("Yes, I need a place to stay that is expensive, and is a hotel please.", "yes", "+ 3"),
+    ("I'd like to book for eight people lasting five nights, starting on Saturday.", "yes", "+ 4"),
+    ("Is there any hotel in the area with 4 stars and I can get for cheap price.", "yes", "+ 5"),
+    ("Great. That's all I need, thank you.", "no", "- 6"),
+    ("What a beautiful hotel, I like the architecture", "no", "- 7"),
+    ("No, you've taken care of everything. Thanks so much. Have a great day.", "no", "- 8"),
+    ("Thank you, please let me know if you need anything else.", "no", "- 9"),
+    ("Could you tell me the travel time and price of that train please?", "no", "-10"),
+    ("You're welcome. Enjoy your time in Cambridge!", "no", "- 11"),
+]
+
+# Get the turn labels + for yes, - for no, enumerated
+turn_labels = [t[2] for t in example_turns]
+prompt_labels = []
+
+# Regex to ask the model a yes or no question
+y_or_n = "yes|no"
+
+
+def main():
+    results = {}
+    for prompt_name, prompt in prompts:
+        prompt_labels.append(prompt_name)
+        accuracy = []
+        for turn in example_turns:
+            p = prompt(turn[0])
+            prompt_length = len(p)
+
+            # We generate 100 responses per turn so we can calculate the accuracy in percent easily
+            n = 100
+
+            # Count results
+            c = 0
+
+            # Send prompt to vLLM server
+            response = requests.post(
+                "http://localhost:8000/generate",
+                json = {
+                    "prompt": p,
+                    "regex": y_or_n,
+                    "n": n
+                }
+            )
+
+
+            for reply in response.json()["text"]:
+                if reply[prompt_length:] == turn[1]:
+                    c += 1
+
+        
+            accuracy.append(c/n)
+        avg = np.average(accuracy)
+        accuracy.append(avg)
+        results[prompt_name] = accuracy
+    df =  pl.from_dict(results)
+    print(df)
+    print(df.max_horizontal())
+    with open("llama2-7b-gptq.benchmark.results.json", "w") as file:
+        df.write_ndjson(file)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- a/elise/elise/prompts.py
+++ b/elise/elise/prompts.py
+from outlines import prompt
+
+@prompt
+def minimal_prompt(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    TURN:
+    {{dialog_turn}}
+    """
+
+@prompt
+def minimal_bang_prompt(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    # TURN:
+    {{dialog_turn}}
+    """
+
+@prompt
+def minimal_prompt_yn(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    TURN:
+    {{dialog_turn}}
+
+    Answer with "yes" or "no".
+    """
+
+
+@prompt
+def minimal_bang_prompt_yn(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    # TURN:
+    {{dialog_turn}}
+
+    Answer with "yes" or "no".
+    """
+
+
+@prompt
+def basic_prompt(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+    TURN:
+    {{dialog_turn}}
+    """
+
+@prompt
+def basic_prompt_yn(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+    TURN:
+    {{dialog_turn}}
+
+    Answer with "yes" or "no".
+    """
+
+
+@prompt
+def basic_bang_prompt(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    # ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+    # TURN:
+    {{dialog_turn}}
+    """
+
+
+@prompt
+def basic_bang_prompt_yn(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    # ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+    # TURN:
+    {{dialog_turn}}
+
+    Answer with "yes" or "no".
+    """
+
+
+
+@prompt
+def elaborate_prompt(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+
+    EXAMPLES:
+    "I would like to go to a restaurant in the south"
+    > "yes"
+
+    "Thank you so much, have a nice day!"
+    > "no"
+
+    "Does the hotel has wi-fi?"
+    > "yes"
+
+    "This seems like a nice hotel"
+    > "no"
+
+    TURN:
+    {{dialog_turn}}
+    """
+
+@prompt
+def elaborate_prompt_yn(dialog_turn):
+    """
+    Does the dialog turn contain actionable information?
+
+    ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+
+    EXAMPLES:
+    "I would like to go to a restaurant in the south"
+    > "yes"
+
+    "Thank you so much, have a nice day!"
+    > "no"
+
+    "Does the hotel has wi-fi?"
+    > "yes"
+
+    "This seems like a nice hotel"
+    > "no"
+
+    TURN:
+    {{dialog_turn}}
+
+    Answer with "yes" or "no".
+    """
+
+@prompt
+def elaborate_bang_prompt(dialog_turn):
+    """
+    # INTERESTING INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+
+    # EXAMPLES:
+    "I would like to go to a restaurant in the south"
+    > "yes"
+
+    "Thank you so much, have a nice day!"
+    > "no"
+
+    "Does the hotel has wi-fi?"
+    > "yes"
+
+    "This seems like a nice hotel"
+    > "no"
+
+    # TASK
+    Does the following dialog turn provide information of interest?
+
+    # TURN:
+    {{dialog_turn}}
+    """
+
+@prompt
+def elaborate_bang_prompt2(dialog_turn):
+    """
+    # INTERESTING INFORMATION:
+        - hotel-price
+        - hotel-location
+        - hotel-name
+        - hotel-stars
+        - hotel-guests
+        - length of the stay
+        - restaurant-price
+        - restaurant-location
+        - restaurant-food
+        - restaurant-name
+        - restaurant-seating
+
+    # EXAMPLES:
+    "I would like to go to a restaurant in the south"
+    > "yes"
+
+    "Thank you so much, have a nice day!"
+    > "no"
+
+    "Does the hotel has wi-fi?"
+    > "yes"
+
+    "This seems like a nice hotel"
+    > "no"
+
+    # TASK
+    Does the following dialog turn provide information of interest?
+
+    # TURN:
+    {{dialog_turn}}
+    """
+
+@prompt
+def elaborate_bang_prompt_yn(dialog_turn):
+    """
+    # ACTIONABLE_INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+
+    # EXAMPLES:
+    "I would like to go to a restaurant in the south"
+    > "yes"
+
+    "Thank you so much, have a nice day!"
+    > "no"
+
+    "Does the hotel has wi-fi?"
+    > "yes"
+
+    "This seems like a nice hotel"
+    > "no"
+
+    # TASK
+    Does the dialog turn contain actionable information?
+
+    # TURN:
+    {{dialog_turn}}
+
+    Answer with "yes" or "no".
+    """
+
+
+@prompt
+def omega_prompt(dialog_turn):
+    """
+    You are a world-class language-analyzer.
+    You analyze the sentence from the dialog turn and evaluate if there is interesting data to extract.
+    You reply reliably with yes or no.
+
+    # INTERESTING INFORMATION:
+        - hotel
+            - price
+            - location
+            - name
+            - stars
+            - number of guests
+            - length of the stay
+        - restaurant
+            - price
+            - location
+            - food
+            - name
+            - seating
+            - number of guests
+
+
+    # EXAMPLES:
+    "I would like to go to a restaurant in the south"
+    > "yes"
+
+    "Thank you so much, have a nice day!"
+    > "no"
+
+    "Does the hotel has wi-fi?"
+    > "yes"
+
+    "This seems like a nice hotel"
+    > "no"
+
+    # TASK
+    Does the following dialog turn provide interesting information?
+
+    # TURN:
+    {{dialog_turn}}
+    """
+
+prompts = [
+    ("minimal_prompt", minimal_prompt),
+    ("minimal_prompt_yn", minimal_prompt_yn),
+    ("minimal_bang_prompt", minimal_bang_prompt),
+    ("minimal_bang_prompt_yn", minimal_bang_prompt_yn),
+    ("basic_prompt", basic_prompt),
+    ("basic_prompt_yn", basic_prompt_yn),
+    ("basic_bang_prompt", basic_bang_prompt),
+    ("basic_bang_prompt_yn", basic_bang_prompt_yn),
+    ("elaborate_prompt_yn", elaborate_prompt_yn),
+    ("elaborate_prompt", elaborate_prompt),
+    ("elaborate_bang_prompt2", elaborate_bang_prompt2),
+    ("elaborate_bang_prompt", elaborate_bang_prompt),
+    ("elaborate_bang_prompt_yn", elaborate_bang_prompt_yn),
+    ("omega_prompt", omega_prompt),
+]
\ No newline at end of file
--- a/elise/llama2-7b-gptq.benchmark.results.json
+++ b/elise/llama2-7b-gptq.benchmark.results.json
--- a/elise/poetry.lock
+++ b/elise/poetry.lock
--- a/elise/pyproject.toml
+++ b/elise/pyproject.toml
+[tool.poetry]
+name = "elise"
+version = "0.1.0"
+description = ""
+authors = ["Christoph Pracht <pracht@cl.uni-heidelberg.de>"]
+readme = "README.md"
+
+[tool.poetry.scripts]
+benchmark = "elise.benchmark:main"
+
+[tool.poetry.dependencies]
+python = "^3.10"
+torch = "^2.1.2"
+vllm = {git = "https://github.com/vllm-project/vllm.git", rev = "main"}
+outlines = "^0.0.24"
+ray = "^2.9.1"
+huggingface = "^0.0.1"
+polars = {extras = ["numpy", "plot"], version = "^0.20.5"}
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"