Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

5858- Conversion from Completion API end point to Chat Completion API… #146

Merged
merged 3 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 160 additions & 103 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# # Copyright (c) Microsoft Corporation.
# # Licensed under the MIT license.

import openai
from azure.search.documents import SearchClient
Expand All @@ -13,83 +13,71 @@
from azure.storage.blob import BlobServiceClient, generate_account_sas, ResourceTypes, AccountSasPermissions
import re

from typing import Any, Sequence
import tiktoken
from core.messagebuilder import MessageBuilder
from core.modelhelper import get_token_limit
from core.modelhelper import num_tokens_from_messages

# Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
# top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion
# (answer) with that prompt.
class ChatReadRetrieveReadApproach(Approach):
prompt_prefix = """<|im_start|>
You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps answer questions about an agency's data. {response_length_prompt}


Text:
Flight to Denver at 9:00 am tomorrow.

Prompt:
Question: Is my flight on time?

Steps:
1. Look for relevant information in the provided source document to answer the question.
2. If there is specific flight information available in the source document, provide an answer along with the appropriate citation.
3. If there is no information about the specific flight in the source document, respond with "I'm not sure" without providing any citation.
class ChatReadRetrieveReadApproach(Approach):

# Chat roles
SYSTEM = "system"
USER = "user"
ASSISTANT = "assistant"

Response:

1. Look for relevant information in the provided source document to answer the question.
- Search for flight details matching the given flight to determine its current status.

2. If there is specific flight information available in the source document, provide an answer along with the appropriate citation.
- If the source document contains information about the current status of the specified flight, provide a response citing the relevant section of source documents.Don't exclude citation if you are using source document to answer your question.


3. If there is no relevant information about the specific flight in the source document, respond with "I'm not sure" without providing any citation.


Example Response:

Question: Is my flight on time?

<Response>I'm not sure. The provided source document does not include information about the current status of your specific flight.</Response>


User persona: {userPersona}

system_message_chat_conversation = """You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps answer questions about an agency's data. {response_length_prompt}
Emphasize the use of facts listed in the provided source documents.Instruct the model to use source name for each fact used in the response. Avoid generating speculative or generalized information. Each source has a file name followed by a pipe character and
the actual information.Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
Treat each search term as an individual keyword. Do not combine terms in quotes or brackets.
Your goal is to provide accurate and relevant answers based on the information available in the provided source documents. Make sure to reference the source documents appropriately and avoid making assumptions or adding personal opinions.


User persona is {userPersona}

Here is how you should answer every question:

-Look for relevant information in the provided source document to answer the question.
-If there is specific information related to question available in the source document, provide an answer along with the appropriate citation.Do not exclude citation if you are using the source document to answer your question.
-If there is no specific information related to question available in the source document, respond with "I\'m not sure" without providing any citation. Do not provide personal opinions or assumptions.

{follow_up_questions_prompt}
{injected_prompt}
Sources:
{sources}


<|im_end|>
{chat_history}
"""
follow_up_questions_prompt_content = """
Generate three very brief follow-up questions that the user would likely ask next about their agencies data. Use triple angle brackets to reference the questions, e.g. <<<Are there exclusions for prescriptions?>>>. Try not to repeat questions that have already been asked.
Only generate questions and do not generate any text before or after the questions, such as 'Next Questions'
"""

query_prompt_template = """
Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in source documents
query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in source documents.
Generate a search query based on the conversation and the new question.
Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
Do not include any text inside [] or <<<>>> in the search query terms.
If the question is not in {query_term_language}, translate the question to {query_term_language} before generating the search query.
Treat each search term as an individual keyword. Do not combine terms in quotes or brackets.

Chat History:
{chat_history}

Question:
{question}

Search query:
If you cannot generate a search query, return just the number 0.
"""


#Few Shot prompting for Keyword Search Query
query_prompt_few_shots = [
dayland marked this conversation as resolved.
Show resolved Hide resolved
{'role' : USER, 'content' : 'What are the future plans for public transportation development?' },
{'role' : ASSISTANT, 'content' : 'Future plans for public transportation' },
{'role' : USER, 'content' : 'how much renewable energy was generated last year?' },
{'role' : ASSISTANT, 'content' : 'Renewable energy generation last year' }
]

#Few Shot prompting for Response. This will feed into Chain of thought system message.
response_prompt_few_shots = [
{"role": USER ,'content': 'I am looking for information in source documents'},
{'role': ASSISTANT, 'content': 'user is looking for information in source documents. Do not provide answers that are not in the source documents'},
{'role': USER, 'content': 'What steps are being taken to promote energy conservation?'},
{'role': ASSISTANT, 'content': 'I am not sure. The provided source document does not include information about the current status of your specific flight'}
]



def __init__(self, search_client: SearchClient, oai_service_name: str, oai_service_key: str, chatgpt_deployment: str, gpt_deployment: str, sourcepage_field: str, content_field: str, blob_client: BlobServiceClient, query_term_language: str):
self.search_client = search_client
Expand All @@ -99,6 +87,7 @@ def __init__(self, search_client: SearchClient, oai_service_name: str, oai_servi
self.content_field = content_field
self.blob_client = blob_client
self.query_term_language = query_term_language
self.chatgpt_token_limit = get_token_limit(chatgpt_deployment)

openai.api_base = 'https://' + oai_service_name + '.openai.azure.com/'
openai.api_type = 'azure'
Expand All @@ -107,33 +96,50 @@ def __init__(self, search_client: SearchClient, oai_service_name: str, oai_servi



def run(self, history: list[dict], overrides: dict) -> any:
# def run(self, history: list[dict], overrides: dict) -> any:
def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
use_semantic_captions = True if overrides.get("semantic_captions") else False
top = overrides.get("top") or 3
exclude_category = overrides.get("exclude_category") or None
filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
user_persona = overrides.get("user_persona", "")
system_persona = overrides.get("system_persona", "")
#aiPersona = overrides.get("ai_persona","")
response_length = int(overrides.get("response_length") or 1024)


user_q = 'Generate search query for: ' + history[-1]["user"]

query_prompt=self.query_prompt_template.format(query_term_language=self.query_term_language)


# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
prompt = self.query_prompt_template.format(
chat_history=self.get_chat_history_as_text(history, include_last_turn=False),
question=history[-1]["user"],
query_term_language=self.query_term_language
)
messages = self.get_messages_from_history(
query_prompt,
self.gpt_deployment,
history,
user_q,
self.query_prompt_few_shots,
self.chatgpt_token_limit - len(user_q)
)

chat_completion = openai.ChatCompletion.create(

completion = openai.Completion.create(
engine=self.chatgpt_deployment,
prompt=prompt,
temperature=0.0,
max_tokens=32,
n=1,
stop=["\n"])
q = completion.choices[0].text
deployment_id=self.gpt_deployment,
model=self.gpt_deployment,
messages=messages,
temperature=0.0,
max_tokens=32,
n=1)

generated_query = chat_completion.choices[0].message.content

generated_query = q.strip('\"')
#if we fail to generate a query, return the last user question

if generated_query.strip() == "0":
generated_query = history[-1]["user"]

#remove any special characters from the query. Commenting below beacuse with gpt-4 we dont need this.

# generated_query = q.strip('\"')

# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
if overrides.get("semantic_ranker"):
Expand Down Expand Up @@ -188,68 +194,110 @@ def run(self, history: list[dict], overrides: dict) -> any:

# Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
prompt_override = overrides.get("prompt_template")

if prompt_override is None:
prompt = self.prompt_prefix.format(
system_message = self.system_message_chat_conversation.format(
injected_prompt="",
sources=content,
chat_history=self.get_chat_history_as_text(history),
follow_up_questions_prompt=follow_up_questions_prompt,
response_length_prompt=self.get_repsonse_lenth_prompt_text(response_length),
userPersona=user_persona,
systemPersona=system_persona
)
elif prompt_override.startswith(">>>"):
prompt = self.prompt_prefix.format(
system_message = self.system_message_chat_conversation.format(
injected_prompt=prompt_override[3:] + "\n ",
sources=content,
chat_history=self.get_chat_history_as_text(history),
follow_up_questions_prompt=follow_up_questions_prompt,
response_length_prompt=self.get_repsonse_lenth_prompt_text(response_length),
userPersona=user_persona,
systemPersona=system_persona
)
else:
prompt = prompt_override.format(
sources=content,
chat_history=self.get_chat_history_as_text(history),
system_message = self.system_message_chat_conversation.format(
follow_up_questions_prompt=follow_up_questions_prompt,
response_length_prompt=self.get_repsonse_lenth_prompt_text(response_length),
userPersona=user_persona,
systemPersona=system_persona
)

# STEP 3: Generate a contextual and content-specific answer using the search results and chat history
messages = self.get_messages_from_history(
system_message + "\n\nSources:\n" + content,
self.chatgpt_deployment,
history,
history[-1]["user"],
self.response_prompt_few_shots,
max_tokens=self.chatgpt_token_limit
)

#Aparmar.Token Debugging Code. Uncomment to debug token usage.

# print(messages)
# total_prompt_tokens = sum(len(token.split()) for token in (system_message + "\n\nSources:\n" + content).split())
# print("Total Prompt Tokens:", total_prompt_tokens)

completion = openai.Completion.create(
engine=self.chatgpt_deployment,
prompt=prompt,
temperature=float(overrides.get("response_temp")) or 0.7,

chat_completion = openai.ChatCompletion.create(

deployment_id=self.chatgpt_deployment,
model=self.chatgpt_deployment,
messages=messages,
temperature=float(overrides.get("response_temp")) or 0.6,
max_tokens=response_length,
n=1,
stop=["<|im_end|>", "<|im_start|>"]
)

n=1

)
# generated_response = chat_completion.choices[0].message.content

#Aparmar.Token Debugging Code. Uncomment to debug token usage.

# generated_response_message = chat_completion.choices[0].message
# # Count the tokens in the generated response message
# token_count = num_tokens_from_messages(generated_response_message, 'gpt-4')
# print("Generated Response Tokens:", token_count)


msg_to_display = '\n\n'.join([str(message) for message in messages])



return {
"data_points": data_points,
"answer": f"{urllib.parse.unquote(completion.choices[0].text)}",
"thoughts": f"Searched for:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>'),
"answer": f"{urllib.parse.unquote(chat_completion.choices[0].message.content)}",
"thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
"citation_lookup": citation_lookup
}

# Get the chat history as a single string
def get_chat_history_as_text(self, history, include_last_turn=True, approx_max_tokens=1000) -> str:
history_text = ""
for h in reversed(history if include_last_turn else history[:-1]):
history_text = (
"""User:""" + " " + h["user"] + "\n" + """""" + "\n" + """Assistant:""" + " " + (h.get("bot") + """""" if h.get("bot") else "") + "\n" + history_text
)
if len(history_text) > approx_max_tokens * 4:

#Aparmar. Custom method to construct Chat History as opposed to single string of chat History.
def get_messages_from_history(
self,
system_prompt: str,
model_id: str,
history: Sequence[dict[str, str]],
user_conv: str,
few_shots = [],
max_tokens: int = 4096) -> []:

message_builder = MessageBuilder(system_prompt, model_id)

# Few Shot prompting. Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
for shot in few_shots:
message_builder.append_message(shot.get('role'), shot.get('content'))

user_content = user_conv
append_index = len(few_shots) + 1

message_builder.append_message(self.USER, user_content, index=append_index)

for h in reversed(history[:-1]):
if h.get("bot"):
message_builder.append_message(self.ASSISTANT, h.get('bot'), index=append_index)
message_builder.append_message(self.USER, h.get('user'), index=append_index)
if message_builder.token_length > max_tokens:
break
return history_text

messages = message_builder.messages
return messages



#Get the prompt text for the response length

Expand Down Expand Up @@ -287,7 +335,16 @@ def get_first_page_num_for_chunk(self, content: str) -> str:
except Exception as e:
logging.exception("Unable to parse first page num: " + str(e) + "")
return "0"














25 changes: 25 additions & 0 deletions app/backend/core/messagebuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .modelhelper import num_tokens_from_messages


class MessageBuilder:
"""
A class for building and managing messages in a chat conversation.
Attributes:
message (list): A list of dictionaries representing chat messages.
model (str): The name of the ChatGPT model.
token_count (int): The total number of tokens in the conversation.
Methods:
__init__(self, system_content: str, chatgpt_model: str): Initializes the MessageBuilder instance.
append_message(self, role: str, content: str, index: int = 1): Appends a new message to the conversation.
"""

def __init__(self, system_content: str, chatgpt_model: str):
self.messages = [{'role': 'system', 'content': system_content}]
self.model = chatgpt_model
self.token_length = num_tokens_from_messages(
self.messages[-1], self.model)

def append_message(self, role: str, content: str, index: int = 1):
self.messages.insert(index, {'role': role, 'content': content})
self.token_length += num_tokens_from_messages(
self.messages[index], self.model)
Loading