microsoft · ArpitaisAn0maly · Jul 31, 2023 · Jul 26, 2023 · Jul 27, 2023 · Jul 27, 2023
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -1,5 +1,5 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
+# # Copyright (c) Microsoft Corporation.
+# # Licensed under the MIT license.
 
 import openai
 from azure.search.documents import SearchClient
@@ -13,83 +13,71 @@
 from azure.storage.blob import BlobServiceClient, generate_account_sas, ResourceTypes, AccountSasPermissions
 import re
 
+from typing import Any, Sequence
+import tiktoken
+from core.messagebuilder import MessageBuilder
+from core.modelhelper import get_token_limit
+from core.modelhelper import num_tokens_from_messages
+
 # Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
 # top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion 
 # (answer) with that prompt.
-class ChatReadRetrieveReadApproach(Approach):
-    prompt_prefix = """<|im_start|>
-    You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps answer questions about an agency's data. {response_length_prompt}
-
-
-    Text:
-    Flight to Denver at 9:00 am tomorrow.
-
-    Prompt:
-    Question: Is my flight on time?
 
-    Steps:
-    1. Look for relevant information in the provided source document to answer the question.
-    2. If there is specific flight information available in the source document, provide an answer along with the appropriate citation.
-    3. If there is no information about the specific flight in the source document, respond with "I'm not sure" without providing any citation.
+class ChatReadRetrieveReadApproach(Approach):
 
+     # Chat roles
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
 
-    Response:
-
-    1. Look for relevant information in the provided source document to answer the question.
-    - Search for flight details matching the given flight to determine its current status.
-
-    2. If there is specific flight information available in the source document, provide an answer along with the appropriate citation.
-    - If the source document contains information about the current status of the specified flight, provide a response citing the relevant section of source documents.Don't exclude citation if you are using source document to answer your question.
-
 
-    3. If there is no relevant information about the specific flight in the source document, respond with "I'm not sure" without providing any citation.
-
-
-    Example Response:
-
-    Question: Is my flight on time?
-
-    <Response>I'm not sure. The provided source document does not include information about the current status of your specific flight.</Response>
-
-
-    User persona: {userPersona}
+
+    system_message_chat_conversation = """You are an Azure OpenAI Completion system. Your persona is {systemPersona} who helps answer questions about an agency's data. {response_length_prompt}
     Emphasize the use of facts listed in the provided source documents.Instruct the model to use source name for each fact used in the response.  Avoid generating speculative or generalized information. Each source has a file name followed by a pipe character and 
     the actual information.Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
     Treat each search term as an individual keyword. Do not combine terms in quotes or brackets.
     Your goal is to provide accurate and relevant answers based on the information available in the provided source documents. Make sure to reference the source documents appropriately and avoid making assumptions or adding personal opinions.
-
-
+    User persona is {userPersona}
+
+    Here is how you should answer every question:
+
+    -Look for relevant information in the provided source document to answer the question.       
+    -If there is specific information related to question available in the source document, provide an answer along with the appropriate citation.Do not exclude citation if you are using the source document to answer your question.
+    -If there is no specific information related to question available in the source document, respond with "I\'m not sure" without providing any citation. Do not provide personal opinions or assumptions.
+
     {follow_up_questions_prompt}
     {injected_prompt}
-    Sources:
-    {sources}
 
-
-    <|im_end|>
-    {chat_history}
     """
     follow_up_questions_prompt_content = """
     Generate three very brief follow-up questions that the user would likely ask next about their agencies data. Use triple angle brackets to reference the questions, e.g. <<<Are there exclusions for prescriptions?>>>. Try not to repeat questions that have already been asked.
     Only generate questions and do not generate any text before or after the questions, such as 'Next Questions'
     """
-
-    query_prompt_template = """
-    Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in source documents
+    query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in source documents.
     Generate a search query based on the conversation and the new question. 
     Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
     Do not include any text inside [] or <<<>>> in the search query terms.
     If the question is not in {query_term_language}, translate the question to {query_term_language} before generating the search query.
-    Treat each search term as an individual keyword. Do not combine terms in quotes or brackets.
-
-    Chat History:
-    {chat_history}
-
-    Question:
-    {question}
-
-    Search query:
+    If you cannot generate a search query, return just the number 0.
     """
-
+
+    #Few Shot prompting for Keyword Search Query
+    query_prompt_few_shots = [
+    {'role' : USER, 'content' : 'What are the future plans for public transportation development?' },
+    {'role' : ASSISTANT, 'content' : 'Future plans for public transportation' },
+    {'role' : USER, 'content' : 'how much renewable energy was generated last year?' },
+    {'role' : ASSISTANT, 'content' : 'Renewable energy generation last year' }
+    ]
+
+    #Few Shot prompting for Response. This will feed into Chain of thought system message.
+    response_prompt_few_shots = [
+    {"role": USER ,'content': 'I am looking for information in source documents'},
+    {'role': ASSISTANT, 'content': 'user is looking for information in source documents. Do not provide answers that are not in the source documents'},
+    {'role': USER, 'content': 'What steps are being taken to promote energy conservation?'}, 
+    {'role': ASSISTANT, 'content': 'I am not sure. The provided source document does not include information about the current status of your specific flight'}
+    ]
+
+
 
     def __init__(self, search_client: SearchClient, oai_service_name: str, oai_service_key: str, chatgpt_deployment: str, gpt_deployment: str, sourcepage_field: str, content_field: str, blob_client: BlobServiceClient, query_term_language: str):
         self.search_client = search_client
@@ -99,6 +87,7 @@ def __init__(self, search_client: SearchClient, oai_service_name: str, oai_servi
         self.content_field = content_field
         self.blob_client = blob_client
         self.query_term_language = query_term_language
+        self.chatgpt_token_limit = get_token_limit(chatgpt_deployment)
 
         openai.api_base = 'https://' + oai_service_name + '.openai.azure.com/'
         openai.api_type = 'azure'
@@ -107,33 +96,50 @@ def __init__(self, search_client: SearchClient, oai_service_name: str, oai_servi
 
 
 
-    def run(self, history: list[dict], overrides: dict) -> any:
+    # def run(self, history: list[dict], overrides: dict) -> any:
+    def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         top = overrides.get("top") or 3
         exclude_category = overrides.get("exclude_category") or None
         filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
         user_persona = overrides.get("user_persona", "")
         system_persona = overrides.get("system_persona", "")
-        #aiPersona = overrides.get("ai_persona","")
         response_length = int(overrides.get("response_length") or 1024)
-
+
+        user_q = 'Generate search query for: ' + history[-1]["user"]
+
+        query_prompt=self.query_prompt_template.format(query_term_language=self.query_term_language)
+
+
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
-        prompt = self.query_prompt_template.format(
-                chat_history=self.get_chat_history_as_text(history, include_last_turn=False),
-                question=history[-1]["user"],
-                query_term_language=self.query_term_language
-                    )
+        messages = self.get_messages_from_history(
+            query_prompt,
+            self.gpt_deployment,
+            history,
+            user_q,
+            self.query_prompt_few_shots,
+            self.chatgpt_token_limit - len(user_q)
+            )
+
+        chat_completion = openai.ChatCompletion.create(
 
-        completion = openai.Completion.create(
-            engine=self.chatgpt_deployment,
-            prompt=prompt,
-            temperature=0.0,
-            max_tokens=32,
-            n=1,
-            stop=["\n"])
-        q = completion.choices[0].text
+            deployment_id=self.gpt_deployment,
+            model=self.gpt_deployment,
+            messages=messages, 
+            temperature=0.0, 
+            max_tokens=32, 
+            n=1)
+
+        generated_query = chat_completion.choices[0].message.content
 
-        generated_query = q.strip('\"')
+        #if we fail to generate a query, return the last user question
+
+        if generated_query.strip() == "0":
+            generated_query = history[-1]["user"] 
+
+        #remove any special characters from the query. Commenting below beacuse with gpt-4 we dont need this.
+
+        # generated_query = q.strip('\"')
 
         # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
         if overrides.get("semantic_ranker"):
@@ -188,68 +194,110 @@ def run(self, history: list[dict], overrides: dict) -> any:
 
         # Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
         prompt_override = overrides.get("prompt_template")
+
         if prompt_override is None:
-            prompt = self.prompt_prefix.format(
+            system_message = self.system_message_chat_conversation.format(
                 injected_prompt="",
-                sources=content,
-                chat_history=self.get_chat_history_as_text(history),
                 follow_up_questions_prompt=follow_up_questions_prompt,
                 response_length_prompt=self.get_repsonse_lenth_prompt_text(response_length),
                 userPersona=user_persona,
                 systemPersona=system_persona
             )
         elif prompt_override.startswith(">>>"):
-            prompt = self.prompt_prefix.format(
+            system_message = self.system_message_chat_conversation.format(
                 injected_prompt=prompt_override[3:] + "\n ",
-                sources=content,
-                chat_history=self.get_chat_history_as_text(history),
                 follow_up_questions_prompt=follow_up_questions_prompt,
                 response_length_prompt=self.get_repsonse_lenth_prompt_text(response_length),
                 userPersona=user_persona,
                 systemPersona=system_persona
             )
         else:
-            prompt = prompt_override.format(
-                sources=content,
-                chat_history=self.get_chat_history_as_text(history),
+            system_message = self.system_message_chat_conversation.format(
                 follow_up_questions_prompt=follow_up_questions_prompt,
                 response_length_prompt=self.get_repsonse_lenth_prompt_text(response_length),             
                 userPersona=user_persona,
                 systemPersona=system_persona
             )
 
         # STEP 3: Generate a contextual and content-specific answer using the search results and chat history
+        messages = self.get_messages_from_history(
+            system_message + "\n\nSources:\n" + content,
+            self.chatgpt_deployment,
+            history,
+            history[-1]["user"],
+            self.response_prompt_few_shots,
+            max_tokens=self.chatgpt_token_limit
+            )
+
+        #Aparmar.Token Debugging Code. Uncomment to debug token usage.
 
+        # print(messages)
+        # total_prompt_tokens = sum(len(token.split()) for token in (system_message + "\n\nSources:\n" + content).split())
+        # print("Total Prompt Tokens:", total_prompt_tokens)                         
 
-        completion = openai.Completion.create(
-            engine=self.chatgpt_deployment,
-            prompt=prompt,
-            temperature=float(overrides.get("response_temp")) or 0.7,
+
+        chat_completion = openai.ChatCompletion.create(
+
+            deployment_id=self.chatgpt_deployment,
+            model=self.chatgpt_deployment,
+            messages=messages,
+            temperature=float(overrides.get("response_temp")) or 0.6,
             max_tokens=response_length,
-            n=1,
-            stop=["<|im_end|>", "<|im_start|>"]
-        )
-
+            n=1
 
+        )
+        # generated_response = chat_completion.choices[0].message.content
+
+        #Aparmar.Token Debugging Code. Uncomment to debug token usage.
+
+        # generated_response_message = chat_completion.choices[0].message
+        # # Count the tokens in the generated response message
+        # token_count = num_tokens_from_messages(generated_response_message, 'gpt-4')
+        # print("Generated Response Tokens:", token_count)
 
-
+        msg_to_display = '\n\n'.join([str(message) for message in messages])
+
+
+
         return {
             "data_points": data_points,
-            "answer": f"{urllib.parse.unquote(completion.choices[0].text)}",
-            "thoughts": f"Searched for:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>'),
+            "answer": f"{urllib.parse.unquote(chat_completion.choices[0].message.content)}",
+            "thoughts": f"Searched for:<br>{generated_query}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>'),
             "citation_lookup": citation_lookup
         }
-
-    # Get the chat history as a single string
-    def get_chat_history_as_text(self, history, include_last_turn=True, approx_max_tokens=1000) -> str:
-        history_text = ""
-        for h in reversed(history if include_last_turn else history[:-1]):
-            history_text = (
-                """User:""" + " " + h["user"] + "\n" + """""" + "\n" + """Assistant:""" + " " + (h.get("bot") + """""" if h.get("bot") else "") + "\n" + history_text
-            )
-            if len(history_text) > approx_max_tokens * 4:
+
+        #Aparmar. Custom method to construct Chat History as opposed to single string of chat History.
+    def get_messages_from_history(
+        self, 
+        system_prompt: str,
+        model_id: str, 
+        history: Sequence[dict[str, str]], 
+        user_conv: str, 
+        few_shots = [], 
+        max_tokens: int = 4096) -> []:
+
+        message_builder = MessageBuilder(system_prompt, model_id)
+
+        # Few Shot prompting. Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
+        for shot in few_shots:
+            message_builder.append_message(shot.get('role'), shot.get('content'))
+
+        user_content = user_conv
+        append_index = len(few_shots) + 1
+
+        message_builder.append_message(self.USER, user_content, index=append_index)
+
+        for h in reversed(history[:-1]):
+            if h.get("bot"):
+                message_builder.append_message(self.ASSISTANT, h.get('bot'), index=append_index)
+            message_builder.append_message(self.USER, h.get('user'), index=append_index)
+            if message_builder.token_length > max_tokens:
                 break
-        return history_text
+
+        messages = message_builder.messages
+        return messages
+
+
 
     #Get the prompt text for the response length
 
@@ -287,7 +335,16 @@ def get_first_page_num_for_chunk(self, content: str) -> str:
         except Exception as e:
             logging.exception("Unable to parse first page num: " + str(e) + "")
             return "0"
-
+
+
+
+
+
+
+
+
+
+
 
 
 
diff --git a/app/backend/core/messagebuilder.py b/app/backend/core/messagebuilder.py
@@ -0,0 +1,25 @@
+from .modelhelper import num_tokens_from_messages
+
+
+class MessageBuilder:
+    """
+      A class for building and managing messages in a chat conversation.
+      Attributes:
+          message (list): A list of dictionaries representing chat messages.
+          model (str): The name of the ChatGPT model.
+          token_count (int): The total number of tokens in the conversation.
+      Methods:
+          __init__(self, system_content: str, chatgpt_model: str): Initializes the MessageBuilder instance.
+          append_message(self, role: str, content: str, index: int = 1): Appends a new message to the conversation.
+      """
+
+    def __init__(self, system_content: str, chatgpt_model: str):
+        self.messages = [{'role': 'system', 'content': system_content}]
+        self.model = chatgpt_model
+        self.token_length = num_tokens_from_messages(
+            self.messages[-1], self.model)
+
+    def append_message(self, role: str, content: str, index: int = 1):
+        self.messages.insert(index, {'role': role, 'content': content})
+        self.token_length += num_tokens_from_messages(
+            self.messages[index], self.model)