Updated streamlit embedding demo

STRIDES · Nov 5, 2024 · cc4b935 · cc4b935
1 parent 39c233e
commit cc4b935
Show file tree

Hide file tree

Showing 9 changed files with 422 additions and 188 deletions.
diff --git a/notebooks/GenAI/.gitignore b/notebooks/GenAI/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+.venv
+.env
+microsoft-earnings_embeddings.csv
diff --git a/notebooks/GenAI/embedding_demos/Demo_Suite.py b/notebooks/GenAI/embedding_demos/Demo_Suite.py
@@ -0,0 +1,61 @@
+import streamlit as st  
+from styling import global_page_style
+
+def main():  
+    # Set page configuration  
+    # st.set_page_config(page_title="Azure OpenAI RAG Demo Suite", layout="wide")  
+
+    # Title and subtitle  
+        # Create columns for logo and title  
+
+    st.markdown(  
+    f'<div style="text-align: center;"><img src="{"https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg"  }" width="{60}"></div>',  
+    unsafe_allow_html=True  
+                )  
+    st.title("Azure OpenAI RAG Demo Suite")  
+    st.markdown("### Demo Overviews")  
+    st.write("""  
+    Welcome to the Azure OpenAI RAG Demo Suite. On the left side-panel, you will find various demonstrations that showcase the capabilities of Azure OpenAI with a Streamlit frontend. Each demonstration is described in detail below, highlighting their unique features and functionalities.  
+    """)  
+
+    # Horizontal divider  
+    st.markdown("---")  
+
+    # Chat with Your Data section  
+    st.markdown("### Chat with Your Data using Azure OpenAI API and AI Search Index (AI Search Query)")  
+    st.write("""  
+    This demo allows users to interact with data stored in their Azure AI Search Index using a combination of semantic and vector search methods.  
+    """)  
+    st.write("""  
+    - **Semantic Search**: Understands the meaning and context of your queries to deliver more relevant results.  
+    - **Vector Search**: Utilizes numerical representations of text to find similar content based on cosine similarity.  
+    """)  
+    # Ensure the user has created the Azure AI search index already  
+    st.write("""  
+    **Note**: Users must have created the Azure AI search index already as shown here: [Upload your own data and query over it](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/notebooks/GenAI/Azure_Open_AI_README.md)  
+    """)  
+
+    # Horizontal divider  
+    st.markdown("---")  
+
+    # Generate & Search with Azure OpenAI Embeddings section  
+    st.markdown("### Generate & Search with Azure OpenAI Embeddings (AOAI Embeddings)")  
+    st.write("""  
+    This demo enables users to generate embeddings from a pre-chunked CSV file and perform searches over the content using vector search.  
+    """)  
+    st.write("""  
+    - **Vectorize**: Creates embeddings based on the "microsoft-earnings.csv" file provided in this directory. The embeddings are generated from the "text" column. The CSV file is pre-chunked, meaning the text has already been split and prepared for embedding generation. A new CSV file will be created to store all generated embeddings, forming your vector store.  
+    - **Retrieve**: Generates embeddings based on user queries. The query embedding is then used to search for the most similar document within the vector store using cosine similarity.  
+    """)  
+    st.write("""  
+    Example questions a user can ask about the microsoft-earnings.csv:  
+    - What was said about the budget?  
+    - How many people utilize GitHub to build software?  
+    - How many points did Microsoft Cloud gross margin percentage increase by?  
+    - What are the expectations for the Q2 cash flow?  
+    """)  
+
+
+if __name__ == '__main__': 
+    global_page_style() 
+    main()  
diff --git a/notebooks/GenAI/embedding_demos/acs_embeddings.py b/notebooks/GenAI/embedding_demos/acs_embeddings.py
diff --git a/notebooks/GenAI/embedding_demos/aoai_embeddings.py b/notebooks/GenAI/embedding_demos/aoai_embeddings.py
diff --git a/notebooks/GenAI/embedding_demos/pages/AI_Search_Query.py b/notebooks/GenAI/embedding_demos/pages/AI_Search_Query.py
@@ -0,0 +1,106 @@
+from openai import AzureOpenAI  
+import os
+import streamlit as st
+from dotenv import load_dotenv
+from styling import global_page_style
+
+# load in .env variables 
+load_dotenv()
+
+# Configure Azure OpenAI params, using an Azure OpenAI account with a deployment of an embedding model
+azure_endpoint: str = os.getenv('AZURE_OPENAI_BASE')
+azure_openai_api_key: str = os.getenv('AZURE_OPENAI_KEY')
+azure_openai_api_version: str = os.getenv('AZURE_OPENAI_VERSION')
+azure_ada_deployment: str = os.getenv('AZURE_EMBEDDINGS_DEPLOYMENT')
+azure_gpt_deployment: str = os.getenv('AZURE_GPT_DEPLOYMENT')
+
+# Configure Azure AI Search params
+search_endpoint: str = os.getenv('AZURE_SEARCH_ENDPOINT')
+search_key: str = os.getenv('AZURE_SEARCH_ADMIN_KEY')
+
+def chat_on_your_data(query, search_index, messages):
+    messages.append({"role": "user", "content":query})  
+    with st.chat_message("user"):  
+            st.markdown(query)  
+    with st.spinner('Processing...'):  
+        client = AzureOpenAI(  
+            azure_endpoint=azure_endpoint,  
+            api_key=azure_openai_api_key,  
+            api_version=azure_openai_api_version,  
+        )  
+        completion = client.chat.completions.create(  
+            model=azure_gpt_deployment,  
+            messages=[  
+                {"role": "system", "content": "You are an AI assistant that helps people find information. \
+                Ensure the Markdown responses are correctly formatted before responding."},  
+                {"role": "user", "content": query}  
+            ],  
+            max_tokens=800,  
+            temperature=0.7,  
+            top_p=0.95,  
+            frequency_penalty=0,  
+            presence_penalty=0,  
+            stop=None,  
+            stream=False,  
+            extra_body={  
+                "data_sources": [{  
+                    "type": "azure_search",  
+                    "parameters": {  
+                        "endpoint": f"{search_endpoint}",  
+                        "index_name": search_index,  
+                        "semantic_configuration": "default",  
+                        "query_type": "vector_simple_hybrid",  
+                        "fields_mapping": {},  
+                        "in_scope": True,  
+                        "role_information": "You are an AI assistant that helps people find information.",  
+                        "filter": None,  
+                        "strictness": 3,  
+                        "top_n_documents": 5,  
+                        "authentication": {  
+                            "type": "api_key",  
+                            "key": f"{search_key}"  
+                        },  
+                        "embedding_dependency": {  
+                            "type": "deployment_name",  
+                            "deployment_name": azure_ada_deployment 
+                        }  
+                    }  
+                }]  
+            }  
+        )  
+        print(completion)
+        response_data = completion.to_dict()  
+        ai_response = response_data['choices'][0]['message']['content'] 
+        messages.append({"role": "assistant", "content":ai_response})  
+        with st.chat_message("assistant"):  
+            st.markdown(ai_response) 
+
+def main():
+    st.markdown(  
+    f'<div style="text-align: center;"><img src="{"https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg"  }" width="{60}"></div>',  
+    unsafe_allow_html=True  
+                )  
+    st.title("Demo - Azure OpenAI & AI Search")
+    # image = Image.open('image_logo2.png')
+    # st.image(image, caption = '')
+    st.write('This demo showcases an innovative way for users to engage with data housed in their Azure AI Search Index by leveraging both \
+             semantic and vector search techniques. Semantic search enhances the querying process by comprehending the meaning and context of \
+             user queries, thereby providing more pertinent results. Vector search, on the other hand, employs numerical representations of \
+             text to identify similar content using cosine similarity. ***For users to effectively utilize this demo, it is essential that they \
+             have previously created their Azure AI Search Index, following the necessary steps to upload and query their data as outlined [here](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/notebooks/GenAI/Azure_Open_AI_README.md).***')
+    if 'messages' not in st.session_state:  
+        st.session_state.messages = []  
+    index_name = st.text_input(label="Azure AI Search index name:", value="")
+    st.write('-'*50)
+    if index_name:
+        query = st.chat_input('Input search query here...')
+        for message in st.session_state.messages:  
+            with st.chat_message(message["role"]):  
+                st.markdown(message['content'])  
+        if query:
+            chat_on_your_data(query, index_name, st.session_state.messages)
+
+
+if __name__ == '__main__':
+    global_page_style()
+    main()