diff --git a/docs/docs/integrations/llms/clarifai.ipynb b/docs/docs/integrations/llms/clarifai.ipynb index 02569a560170b..bac65941b3918 100644 --- a/docs/docs/integrations/llms/clarifai.ipynb +++ b/docs/docs/integrations/llms/clarifai.ipynb @@ -38,6 +38,19 @@ "!pip install clarifai" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "326395b1-27e0-4cb6-9321-27c04466362b", + "metadata": {}, + "outputs": [], + "source": [ + "# Declare clarifai pat token as environment variable or you can pass it as argument in clarifai class.\n", + "import os\n", + "\n", + "os.environ[\"CLARIFAI_PAT\"] = \"CLARIFAI_PAT_TOKEN\"" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -50,20 +63,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "3f5dc9d7-65e3-4b5b-9086-3327d016cfe0", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - " ········\n" - ] - } - ], + "outputs": [], "source": [ "# Please login and get your API key from https://clarifai.com/settings/security\n", "from getpass import getpass\n", @@ -73,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "6fb585dd", "metadata": { "tags": [] @@ -98,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "035dea0f", "metadata": { "tags": [] @@ -121,7 +126,11 @@ "# Setup\n", "Setup the user id and app id where the model resides. You can find a list of public models on https://clarifai.com/explore/models\n", "\n", - "You will have to also initialize the model id and if needed, the model version id. Some models have many versions, you can choose the one appropriate for your task." + "You will have to also initialize the model id and if needed, the model version id. Some models have many versions, you can choose the one appropriate for your task.\n", + "\n", + " or\n", + " \n", + "You can use the model_url (for ex: \"https://clarifai.com/anthropic/completion/models/claude-v2\") for intialization." ] }, { @@ -136,7 +145,10 @@ "MODEL_ID = \"GPT-3_5-turbo\"\n", "\n", "# You can provide a specific model version as the model_version_id arg.\n", - "# MODEL_VERSION_ID = \"MODEL_VERSION_ID\"" + "# MODEL_VERSION_ID = \"MODEL_VERSION_ID\"\n", + "# or\n", + "\n", + "MODEL_URL = \"https://clarifai.com/openai/chat-completion/models/GPT-4\"" ] }, { @@ -149,14 +161,15 @@ "outputs": [], "source": [ "# Initialize a Clarifai LLM\n", - "clarifai_llm = Clarifai(\n", - " pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID\n", - ")" + "clarifai_llm = Clarifai(user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)\n", + "# or\n", + "# Initialize through Model URL\n", + "clarifai_llm = Clarifai(model_url=MODEL_URL)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "a641dbd9", "metadata": { "tags": [] @@ -178,17 +191,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "9f844993", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'Justin Bieber was born on March 1, 1994. So, we need to figure out the Super Bowl winner for the 1994 season. The NFL season spans two calendar years, so the Super Bowl for the 1994 season would have taken place in early 1995. \\n\\nThe Super Bowl in question is Super Bowl XXIX, which was played on January 29, 1995. The game was won by the San Francisco 49ers, who defeated the San Diego Chargers by a score of 49-26. Therefore, the San Francisco 49ers won the Super Bowl in the year Justin Bieber was born.'" + "' Okay, here are the steps to figure this out:\\n\\n1. Justin Bieber was born on March 1, 1994.\\n\\n2. The Super Bowl that took place in the year of his birth was Super Bowl XXVIII. \\n\\n3. Super Bowl XXVIII was played on January 30, 1994.\\n\\n4. The two teams that played in Super Bowl XXVIII were the Dallas Cowboys and the Buffalo Bills. \\n\\n5. The Dallas Cowboys defeated the Buffalo Bills 30-13 to win Super Bowl XXVIII.\\n\\nTherefore, the NFL team that won the Super Bowl in the year Justin Bieber was born was the Dallas Cowboys.'" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -198,6 +211,107 @@ "\n", "llm_chain.run(question)" ] + }, + { + "cell_type": "markdown", + "id": "2604c17f-a410-4159-ac1c-fdd7c60b725c", + "metadata": {}, + "source": [ + "## Model Predict with Inference parameters for GPT.\n", + "Alternatively you can use GPT models with inference parameters (like temperature, max_tokens etc)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "59ae8cd9-3d07-40f2-b4b5-b7908b2fc71d", + "metadata": {}, + "outputs": [], + "source": [ + "# Intialize the parameters as dict.\n", + "params = dict(temperature=str(0.3), max_tokens=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c14c7940-4fd3-41a4-9ed4-f4ecead08edf", + "metadata": {}, + "outputs": [], + "source": [ + "clarifai_llm = Clarifai(user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)\n", + "llm_chain = LLMChain(\n", + " prompt=prompt, llm=clarifai_llm, llm_kwargs={\"inference_params\": params}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "80fef923-0473-4119-aa7e-868374560fdd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Step 1: The first digit can be any even number from 1 to 9, except for 5. So there are 4 choices for the first digit.\\n\\nStep 2: If the first digit is not 5, then the second digit must be 7. So there is only 1 choice for the second digit.\\n\\nStep 3: The third digit can be any even number from 0 to 9, except for 5 and 7. So there are '" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "question = \"How many 3 digit even numbers you can form that if one of the digits is 5 then the following digit must be 7?\"\n", + "\n", + "llm_chain.run(question)" + ] + }, + { + "cell_type": "markdown", + "id": "c9ab08e2-4bc9-49b8-9b7a-88ae840ac3f8", + "metadata": {}, + "source": [ + "Generate responses for list of prompts" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fb3b8cea-5cc9-46f3-9334-1994f195cde3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LLMResult(generations=[[Generation(text=' Here is a 5 sentence summary of the key events of the American Revolution:\\n\\nThe American Revolution began with growing tensions between American colonists and the British government over issues of taxation without representation. In 1775, fighting broke out between British troops and American militiamen in Lexington and Concord, starting the Revolutionary War. The Continental Congress appointed George Washington as commander of the Continental Army, which went on to win key victories over the British. In 1776, the Declaration of Independence was adopted, formally declaring the 13 American colonies free from British rule. After years of fighting, the Revolutionary War ended with the British defeat at Yorktown in 1781 and recognition of American independence.')], [Generation(text=\" Here's a humorous take on explaining rocket science:\\n\\nRocket science is so easy, it's practically child's play! Just strap a big metal tube full of explosive liquid to your butt and light the fuse. What could go wrong? Blastoff! Whoosh, you'll be zooming to the moon in no time. Just remember your helmet or your head might go pop like a zit when you leave the atmosphere. \\n\\nMaking rockets is a cinch too. Simply mix together some spicy spices, garlic powder, chili powder, a dash of gunpowder and voila - rocket fuel! Add a pinch of baking soda and vinegar if you want an extra kick. Shake well and pour into your DIY soda bottle rocket. Stand back and watch that baby soar!\\n\\nGuiding a rocket is fun for the whole family. Just strap in, push some random buttons and see where you end up. It's like the ultimate surprise vacation! You never know if you'll wind up on Venus, crash land on Mars, or take a quick dip through the rings of Saturn. \\n\\nAnd if anything goes wrong, don't sweat it. Rocket science is easy breezy. Just troubleshoot on the fly with some duct tape and crazy glue and you'll be back on course in a jiffy. Who needs mission control when you've got this!\")], [Generation(text=\" Here is a draft welcome speech for a college sports day:\\n\\nGood morning everyone and welcome to our college's annual sports day! It's wonderful to see so many students, faculty, staff, alumni, and guests gathered here today to celebrate sportsmanship and athletic achievement at our college. \\n\\nLet's begin by thanking all the organizers, volunteers, coaches, and staff members who worked tirelessly behind the scenes to make this event possible. Our sports day would not happen without your dedication and commitment. \\n\\nI also want to recognize all the student-athletes with us today. You inspire us with your talent, spirit, and determination. Sports have a unique power to unite and energize our community. Through both individual and team sports, you demonstrate focus, collaboration, perseverance and resilience – qualities that will serve you well both on and off the field.\\n\\nThe spirit of competition and fair play are core values of any sports event. I encourage all of you to compete enthusiastically today. Play to the best of your ability and have fun. Applaud the effort and sportsmanship of your fellow athletes, regardless of the outcome. \\n\\nWin or lose, this sports day is a day for us to build camaraderie and create lifelong memories. Let's make it a day of fitness and friendship for all. With that, let the games begin. Enjoy the day!\")]], llm_output=None, run=None)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We can use _generate to generate the response for list of prompts.\n", + "clarifai_llm._generate(\n", + " [\n", + " \"Help me summarize the events of american revolution in 5 sentences\",\n", + " \"Explain about rocket science in a funny way\",\n", + " \"Create a script for welcome speech for the college sports day\",\n", + " ],\n", + " inference_params=params,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb080d00-7b5c-4726-ae4f-df9374997c7f", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -216,7 +330,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.10" } }, "nbformat": 4, diff --git a/docs/docs/integrations/text_embedding/clarifai.ipynb b/docs/docs/integrations/text_embedding/clarifai.ipynb index 33f9ac0abcaa1..335f597ddac0f 100644 --- a/docs/docs/integrations/text_embedding/clarifai.ipynb +++ b/docs/docs/integrations/text_embedding/clarifai.ipynb @@ -57,7 +57,7 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ " ········\n" @@ -81,6 +81,7 @@ "outputs": [], "source": [ "# Import the required modules\n", + "from langchain.chains import LLMChain\n", "from langchain.embeddings import ClarifaiEmbeddings\n", "from langchain.prompts import PromptTemplate" ] @@ -125,16 +126,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "1fe9bf15", "metadata": {}, "outputs": [], "source": [ - "USER_ID = \"salesforce\"\n", - "APP_ID = \"blip\"\n", - "MODEL_ID = \"multimodal-embedder-blip-2\"\n", + "USER_ID = \"clarifai\"\n", + "APP_ID = \"main\"\n", + "MODEL_ID = \"BAAI-bge-base-en-v15\"\n", + "MODEL_URL = \"https://clarifai.com/clarifai/main/models/BAAI-bge-base-en-v15\"\n", "\n", - "# You can provide a specific model version as the model_version_id arg.\n", + "# Further you can also provide a specific model version as the model_version_id arg.\n", "# MODEL_VERSION_ID = \"MODEL_VERSION_ID\"" ] }, @@ -148,26 +150,38 @@ "outputs": [], "source": [ "# Initialize a Clarifai embedding model\n", - "embeddings = ClarifaiEmbeddings(\n", - " pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID\n", - ")" + "embeddings = ClarifaiEmbeddings(user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)\n", + "\n", + "# Initialize a clarifai embedding model using model URL\n", + "embeddings = ClarifaiEmbeddings(model_url=MODEL_URL)\n", + "\n", + "# Alternatively you can initialize clarifai class with pat argument." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "a641dbd9", "metadata": { "tags": [] }, "outputs": [], "source": [ - "text = \"This is a test document.\"" + "text = \"roses are red violets are blue.\"\n", + "text2 = \"Make hay while the sun shines.\"" + ] + }, + { + "cell_type": "markdown", + "id": "14544fbb-76df-43c9-b5ec-88941ff12889", + "metadata": {}, + "source": [ + "You can embed single line of your text using embed_query function !" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "32b4d5f4-2b8e-4681-856f-19a3dd141ae4", "metadata": {}, "outputs": [], @@ -175,14 +189,22 @@ "query_result = embeddings.embed_query(text)" ] }, + { + "cell_type": "markdown", + "id": "ab9140c7-19c7-48fd-9a28-0c2351e5d2c5", + "metadata": {}, + "source": [ + "Further to embed list of texts/documents use embed_documents function." + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "47076457-1880-48ac-970f-872ead6f0d94", "metadata": {}, "outputs": [], "source": [ - "doc_result = embeddings.embed_documents([text])" + "doc_result = embeddings.embed_documents([text, text2])" ] } ], @@ -202,7 +224,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.10" } }, "nbformat": 4, diff --git a/docs/docs/integrations/vectorstores/clarifai.ipynb b/docs/docs/integrations/vectorstores/clarifai.ipynb index 454872293ffae..f6f7b7ec9ac29 100644 --- a/docs/docs/integrations/vectorstores/clarifai.ipynb +++ b/docs/docs/integrations/vectorstores/clarifai.ipynb @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "aac9563e", "metadata": { "tags": [] @@ -98,14 +98,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 24, "id": "4d853395", "metadata": {}, "outputs": [], "source": [ "USER_ID = \"USERNAME_ID\"\n", "APP_ID = \"APPLICATION_ID\"\n", - "NUMBER_OF_DOCS = 4" + "NUMBER_OF_DOCS = 2" ] }, { @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 16, "id": "1d828f77", "metadata": {}, "outputs": [], @@ -139,49 +139,130 @@ "]" ] }, + { + "cell_type": "markdown", + "id": "8e467c0b-e218-4cb2-a02e-2948670bbab7", + "metadata": {}, + "source": [ + "Alternatively you have an option to give custom input ids to the inputs." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ffab62d6-8ef4-4c5e-b45e-6f1b39d0c013", + "metadata": {}, + "outputs": [], + "source": [ + "idlist = [\"text1\", \"text2\", \"text3\", \"text4\", \"text5\"]\n", + "metadatas = [\n", + " {\"id\": idlist[i], \"text\": text, \"source\": \"book 1\", \"category\": [\"books\", \"modern\"]}\n", + " for i, text in enumerate(texts)\n", + "]" + ] + }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 27, "id": "738bff27", "metadata": {}, "outputs": [], + "source": [ + "# There is an option to initialize clarifai vector store with pat as argument!\n", + "clarifai_vector_db = Clarifai(\n", + " user_id=USER_ID,\n", + " app_id=APP_ID,\n", + " number_of_docs=NUMBER_OF_DOCS,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d3ca631f-8182-461f-b581-b649f7176a5f", + "metadata": {}, + "source": [ + "Upload data into clarifai app." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77e4f544-e766-4e0e-934a-4e85f68a0286", + "metadata": {}, + "outputs": [], + "source": [ + "# upload with metadata and custom input ids.\n", + "response = clarifai_vector_db.add_texts(texts=texts, ids=idlist, metadatas=metadatas)\n", + "\n", + "# upload without metadata (Not recommended)- Since you will not be able to perform Search operation with respect to metadata.\n", + "# custom input_id (optional)\n", + "response = clarifai_vector_db.add_texts(texts=texts)" + ] + }, + { + "cell_type": "markdown", + "id": "09d97edf-014b-4a5b-86a9-6a5b255554ba", + "metadata": {}, + "source": [ + "You can create a clarifai vector DB store and ingest all the inputs into your app directly by," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c466ac9-6b50-48ff-8b23-9fc6a3cbdf97", + "metadata": {}, + "outputs": [], "source": [ "clarifai_vector_db = Clarifai.from_texts(\n", " user_id=USER_ID,\n", " app_id=APP_ID,\n", " texts=texts,\n", - " pat=CLARIFAI_PAT,\n", - " number_of_docs=NUMBER_OF_DOCS,\n", " metadatas=metadatas,\n", ")" ] }, + { + "cell_type": "markdown", + "id": "0bb2affb-48ca-410b-85c0-9e1275429bcb", + "metadata": {}, + "source": [ + "Search similar texts using similarity search function." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "e755cdce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(page_content='I really enjoy spending time with you', metadata={'text': 'I really enjoy spending time with you', 'id': 0.0, 'source': 'book 1', 'category': ['books', 'modern']}),\n", - " Document(page_content='I went to the movies yesterday', metadata={'text': 'I went to the movies yesterday', 'id': 3.0, 'source': 'book 1', 'category': ['books', 'modern']})]" + "[Document(page_content='I really enjoy spending time with you', metadata={'text': 'I really enjoy spending time with you', 'id': 'text1', 'source': 'book 1', 'category': ['books', 'modern']})]" ] }, - "execution_count": null, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "docs = clarifai_vector_db.similarity_search(\"I would love to see you\")\n", + "docs = clarifai_vector_db.similarity_search(\"I would like to see you\")\n", "docs" ] }, + { + "cell_type": "markdown", + "id": "bd703470-7efb-4be5-a556-eea896ca60f4", + "metadata": {}, + "source": [ + "Further you can filter your search results by metadata." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "140103ec-0936-454a-9f4a-7d5beefc138f", "metadata": {}, "outputs": [], @@ -210,41 +291,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "a3c3999a", "metadata": {}, "outputs": [], "source": [ - "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", + "loader = TextLoader(\"your_local_file_path.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" ] }, - { - "cell_type": "code", - "execution_count": 9, - "id": "69ae7e35", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': '../../../state_of_the_union.txt'}),\n", - " Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.', metadata={'source': '../../../state_of_the_union.txt'}),\n", - " Document(page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland.', metadata={'source': '../../../state_of_the_union.txt'}),\n", - " Document(page_content='We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \\n\\nTogether with our allies –we are right now enforcing powerful economic sanctions. \\n\\nWe are cutting off Russia’s largest banks from the international financial system. \\n\\nPreventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \\n\\nWe are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. \\n\\nTonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \\n\\nThe U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \\n\\nWe are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.', metadata={'source': '../../../state_of_the_union.txt'})]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "docs[:4]" - ] - }, { "cell_type": "code", "execution_count": 10, @@ -257,9 +314,17 @@ "NUMBER_OF_DOCS = 4" ] }, + { + "cell_type": "markdown", + "id": "52d86f01-3462-440e-8960-3c0c17b98f09", + "metadata": {}, + "source": [ + "Create a clarifai vector DB class and ingest all your documents into clarifai App." + ] + }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "6e104aee", "metadata": {}, "outputs": [], @@ -268,33 +333,18 @@ " user_id=USER_ID,\n", " app_id=APP_ID,\n", " documents=docs,\n", - " pat=CLARIFAI_PAT,\n", " number_of_docs=NUMBER_OF_DOCS,\n", ")" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "9c608226", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[Document(page_content='And I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \\n\\nIn state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen.', metadata={'source': '../../../state_of_the_union.txt'}),\n", - " Document(page_content='We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.', metadata={'source': '../../../state_of_the_union.txt'}),\n", - " Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt'}),\n", - " Document(page_content='So let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. \\n\\nWe should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe.', metadata={'source': '../../../state_of_the_union.txt'})]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "docs = clarifai_vector_db.similarity_search(\"Texts related to criminals and violence\")\n", + "docs = clarifai_vector_db.similarity_search(\"Texts related to population\")\n", "docs" ] }, @@ -330,7 +380,6 @@ "clarifai_vector_db = Clarifai(\n", " user_id=USER_ID,\n", " app_id=APP_ID,\n", - " pat=CLARIFAI_PAT,\n", " number_of_docs=NUMBER_OF_DOCS,\n", ")" ] @@ -342,9 +391,39 @@ "metadata": {}, "outputs": [], "source": [ - "docs = clarifai_vector_db.similarity_search(\"Texts related to criminals and violence\")\n", - "docs" + "docs = clarifai_vector_db.similarity_search(\n", + " \"Texts related to ammuniction and president wilson\"\n", + ")" ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "55ee5fc7-94c4-45d0-84ca-00defeca871e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"President Wilson, generally acclaimed as the leader of the world's democracies,\\nphrased for civilization the arguments against autocracy in the great peace conference\\nafter the war. The President headed the American delegation to that conclave of world\\nre-construction. With him as delegates to the conference were Robert Lansing, Secretary\\nof State; Henry White, former Ambassador to France and Italy; Edward M. House and\\nGeneral Tasker H. Bliss.\\nRepresenting American Labor at the International Labor conference held in Paris\\nsimultaneously with the Peace Conference were Samuel Gompers, president of the\\nAmerican Federation of Labor; William Green, secretary-treasurer of the United Mine\\nWorkers of America; John R. Alpine, president of the Plumbers' Union; James Duncan,\\npresident of the International Association of Granite Cutters; Frank Duffy, president of\\nthe United Brotherhood of Carpenters and Joiners, and Frank Morrison, secretary of the\\nAmerican Federation of Labor.\\nEstimating the share of each Allied nation in the great victory, mankind will\\nconclude that the heaviest cost in proportion to prewar population and treasure was paid\\nby the nations that first felt the shock of war, Belgium, Serbia, Poland and France. All\\nfour were the battle-grounds of huge armies, oscillating in a bloody frenzy over once\\nfertile fields and once prosperous towns.\\nBelgium, with a population of 8,000,000, had a casualty list of more than 350,000;\\nFrance, with its casualties of 4,000,000 out of a population (including its colonies) of\\n90,000,000, is really the martyr nation of the world. Her gallant poilus showed the world\\nhow cheerfully men may die in defense of home and liberty. Huge Russia, including\\nhapless Poland, had a casualty list of 7,000,000 out of its entire population of\\n180,000,000. The United States out of a population of 110,000,000 had a casualty list of\\n236,117 for nineteen months of war; of these 53,169 were killed or died of disease;\\n179,625 were wounded; and 3,323 prisoners or missing.\"" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[0].page_content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa7b3260-2ee3-4619-836f-da64370a855c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -363,7 +442,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.10" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/embeddings/clarifai.py b/libs/langchain/langchain/embeddings/clarifai.py index 2f54bf5138fe9..e39c2bbc4b041 100644 --- a/libs/langchain/langchain/embeddings/clarifai.py +++ b/libs/langchain/langchain/embeddings/clarifai.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional from langchain_core.embeddings import Embeddings from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator @@ -20,15 +20,15 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): .. code-block:: python from langchain.embeddings import ClarifaiEmbeddings - clarifai = ClarifaiEmbeddings( - model="embed-english-light-v3.0", clarifai_api_key="my-api-key" - ) + clarifai = ClarifaiEmbeddings(user_id=USER_ID, + app_id=APP_ID, + model_id=MODEL_ID) + (or) + clarifai_llm = Clarifai(model_url=EXAMPLE_URL) """ - stub: Any #: :meta private: - """Clarifai stub.""" - userDataObject: Any - """Clarifai user data object.""" + model_url: Optional[str] = None + """Model url to use.""" model_id: Optional[str] = None """Model id to use.""" model_version_id: Optional[str] = None @@ -48,37 +48,24 @@ class Config: @root_validator() def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" + """Validate that we have all required info to access Clarifai + platform and python package exists in environment.""" + values["pat"] = get_from_dict_or_env(values, "pat", "CLARIFAI_PAT") user_id = values.get("user_id") app_id = values.get("app_id") model_id = values.get("model_id") + model_url = values.get("model_url") - if values["pat"] is None: - raise ValueError("Please provide a pat.") - if user_id is None: - raise ValueError("Please provide a user_id.") - if app_id is None: - raise ValueError("Please provide a app_id.") - if model_id is None: - raise ValueError("Please provide a model_id.") + if model_url is not None and model_id is not None: + raise ValueError("Please provide either model_url or model_id, not both.") - try: - from clarifai.client import create_stub - from clarifai.client.auth.helper import ClarifaiAuthHelper - except ImportError: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) - auth = ClarifaiAuthHelper( - user_id=user_id, - app_id=app_id, - pat=values["pat"], - base=values["api_base"], - ) - values["userDataObject"] = auth.get_user_app_id_proto() - values["stub"] = create_stub(auth) + if model_url is None and model_id is None: + raise ValueError("Please provide one of model_url or model_id.") + + if model_url is None and model_id is not None: + if user_id is None or app_id is None: + raise ValueError("Please provide a user_id and app_id.") return values @@ -91,57 +78,48 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: Returns: List of embeddings, one for each text. """ - try: - from clarifai_grpc.grpc.api import ( - resources_pb2, - service_pb2, - ) - from clarifai_grpc.grpc.api.status import status_code_pb2 + from clarifai.client.input import Inputs + from clarifai.client.model import Model except ImportError: raise ImportError( "Could not import clarifai python package. " "Please install it with `pip install clarifai`." ) + if self.pat is not None: + pat = self.pat + if self.model_url is not None: + _model_init = Model(url=self.model_url, pat=pat) + else: + _model_init = Model( + model_id=self.model_id, + user_id=self.user_id, + app_id=self.app_id, + pat=pat, + ) + input_obj = Inputs(pat=pat) batch_size = 32 embeddings = [] - for i in range(0, len(texts), batch_size): - batch = texts[i : i + batch_size] - - post_model_outputs_request = service_pb2.PostModelOutputsRequest( - user_app_id=self.userDataObject, - model_id=self.model_id, - version_id=self.model_version_id, - inputs=[ - resources_pb2.Input( - data=resources_pb2.Data(text=resources_pb2.Text(raw=t)) - ) - for t in batch - ], - ) - post_model_outputs_response = self.stub.PostModelOutputs( - post_model_outputs_request - ) - if post_model_outputs_response.status.code != status_code_pb2.SUCCESS: - logger.error(post_model_outputs_response.status) - first_output_failure = ( - post_model_outputs_response.outputs[0].status - if len(post_model_outputs_response.outputs) - else None - ) - raise Exception( - f"Post model outputs failed, status: " - f"{post_model_outputs_response.status}, first output failure: " - f"{first_output_failure}" - ) - embeddings.extend( - [ - list(o.data.embeddings[0].vector) - for o in post_model_outputs_response.outputs + try: + for i in range(0, len(texts), batch_size): + batch = texts[i : i + batch_size] + input_batch = [ + input_obj.get_text_input(input_id=str(id), raw_text=inp) + for id, inp in enumerate(batch) ] - ) + predict_response = _model_init.predict(input_batch) + embeddings.extend( + [ + list(output.data.embeddings[0].vector) + for output in predict_response.outputs + ] + ) + + except Exception as e: + logger.error(f"Predict failed, exception: {e}") + return embeddings def embed_query(self, text: str) -> List[float]: @@ -153,48 +131,34 @@ def embed_query(self, text: str) -> List[float]: Returns: Embeddings for the text. """ - try: - from clarifai_grpc.grpc.api import ( - resources_pb2, - service_pb2, - ) - from clarifai_grpc.grpc.api.status import status_code_pb2 + from clarifai.client.model import Model except ImportError: raise ImportError( "Could not import clarifai python package. " "Please install it with `pip install clarifai`." ) - - post_model_outputs_request = service_pb2.PostModelOutputsRequest( - user_app_id=self.userDataObject, - model_id=self.model_id, - version_id=self.model_version_id, - inputs=[ - resources_pb2.Input( - data=resources_pb2.Data(text=resources_pb2.Text(raw=text)) - ) - ], - ) - post_model_outputs_response = self.stub.PostModelOutputs( - post_model_outputs_request - ) - - if post_model_outputs_response.status.code != status_code_pb2.SUCCESS: - logger.error(post_model_outputs_response.status) - first_output_failure = ( - post_model_outputs_response.outputs[0].status - if len(post_model_outputs_response.outputs[0]) - else None + if self.pat is not None: + pat = self.pat + if self.model_url is not None: + _model_init = Model(url=self.model_url, pat=pat) + else: + _model_init = Model( + model_id=self.model_id, + user_id=self.user_id, + app_id=self.app_id, + pat=pat, ) - raise Exception( - f"Post model outputs failed, status: " - f"{post_model_outputs_response.status}, first output failure: " - f"{first_output_failure}" + + try: + predict_response = _model_init.predict_by_bytes( + bytes(text, "utf-8"), input_type="text" ) + embeddings = [ + list(op.data.embeddings[0].vector) for op in predict_response.outputs + ] + + except Exception as e: + logger.error(f"Predict failed, exception: {e}") - embeddings = [ - list(o.data.embeddings[0].vector) - for o in post_model_outputs_response.outputs - ] return embeddings[0] diff --git a/libs/langchain/langchain/llms/clarifai.py b/libs/langchain/langchain/llms/clarifai.py index 40fe0c536fcd9..7d56a5de7f67c 100644 --- a/libs/langchain/langchain/llms/clarifai.py +++ b/libs/langchain/langchain/llms/clarifai.py @@ -12,6 +12,9 @@ logger = logging.getLogger(__name__) +EXAMPLE_URL = "https://clarifai.com/openai/chat-completion/models/GPT-4" + + class Clarifai(LLM): """Clarifai large language models. @@ -24,27 +27,23 @@ class Clarifai(LLM): .. code-block:: python from langchain.llms import Clarifai - clarifai_llm = Clarifai(pat=CLARIFAI_PAT, \ - user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID) + clarifai_llm = Clarifai(user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID) + (or) + clarifai_llm = Clarifai(model_url=EXAMPLE_URL) """ - stub: Any #: :meta private: - userDataObject: Any - + model_url: Optional[str] = None + """Model url to use.""" model_id: Optional[str] = None """Model id to use.""" - model_version_id: Optional[str] = None """Model version id to use.""" - app_id: Optional[str] = None """Clarifai application id to use.""" - user_id: Optional[str] = None """Clarifai user id to use.""" - pat: Optional[str] = None - + """Clarifai personal access token to use.""" api_base: str = "https://api.clarifai.com" class Config: @@ -60,32 +59,17 @@ def validate_environment(cls, values: Dict) -> Dict: user_id = values.get("user_id") app_id = values.get("app_id") model_id = values.get("model_id") + model_url = values.get("model_url") - if values["pat"] is None: - raise ValueError("Please provide a pat.") - if user_id is None: - raise ValueError("Please provide a user_id.") - if app_id is None: - raise ValueError("Please provide a app_id.") - if model_id is None: - raise ValueError("Please provide a model_id.") + if model_url is not None and model_id is not None: + raise ValueError("Please provide either model_url or model_id, not both.") - try: - from clarifai.client import create_stub - from clarifai.client.auth.helper import ClarifaiAuthHelper - except ImportError: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) - auth = ClarifaiAuthHelper( - user_id=user_id, - app_id=app_id, - pat=values["pat"], - base=values["api_base"], - ) - values["userDataObject"] = auth.get_user_app_id_proto() - values["stub"] = create_stub(auth) + if model_url is None and model_id is None: + raise ValueError("Please provide one of model_url or model_id.") + + if model_url is None and model_id is not None: + if user_id is None or app_id is None: + raise ValueError("Please provide a user_id and app_id.") return values @@ -99,6 +83,7 @@ def _identifying_params(self) -> Dict[str, Any]: """Get the identifying parameters.""" return { **{ + "model_url": self.model_url, "user_id": self.user_id, "app_id": self.app_id, "model_id": self.model_id, @@ -115,6 +100,7 @@ def _call( prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + inference_params: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> str: """Call out to Clarfai's PostModelOutputs endpoint. @@ -131,54 +117,39 @@ def _call( response = clarifai_llm("Tell me a joke.") """ - + # If version_id None, Defaults to the latest model version try: - from clarifai_grpc.grpc.api import ( - resources_pb2, - service_pb2, - ) - from clarifai_grpc.grpc.api.status import status_code_pb2 + from clarifai.client.model import Model except ImportError: raise ImportError( "Could not import clarifai python package. " "Please install it with `pip install clarifai`." ) - - # The userDataObject is created in the overview and - # is required when using a PAT - # If version_id None, Defaults to the latest model version - post_model_outputs_request = service_pb2.PostModelOutputsRequest( - user_app_id=self.userDataObject, - model_id=self.model_id, - version_id=self.model_version_id, - inputs=[ - resources_pb2.Input( - data=resources_pb2.Data(text=resources_pb2.Text(raw=prompt)) - ) - ], - ) - post_model_outputs_response = self.stub.PostModelOutputs( - post_model_outputs_request - ) - - if post_model_outputs_response.status.code != status_code_pb2.SUCCESS: - logger.error(post_model_outputs_response.status) - first_model_failure = ( - post_model_outputs_response.outputs[0].status - if len(post_model_outputs_response.outputs) - else None + if self.pat is not None: + pat = self.pat + if self.model_url is not None: + _model_init = Model(url=self.model_url, pat=pat) + else: + _model_init = Model( + model_id=self.model_id, + user_id=self.user_id, + app_id=self.app_id, + pat=pat, ) - raise Exception( - f"Post model outputs failed, status: " - f"{post_model_outputs_response.status}, first output failure: " - f"{first_model_failure}" + try: + (inference_params := {}) if inference_params is None else inference_params + predict_response = _model_init.predict_by_bytes( + bytes(prompt, "utf-8"), + input_type="text", + inference_params=inference_params, ) + text = predict_response.outputs[0].data.text.raw + if stop is not None: + text = enforce_stop_tokens(text, stop) - text = post_model_outputs_response.outputs[0].data.text.raw + except Exception as e: + logger.error(f"Predict failed, exception: {e}") - # In order to make this consistent with other endpoints, we strip them. - if stop is not None: - text = enforce_stop_tokens(text, stop) return text def _generate( @@ -186,56 +157,50 @@ def _generate( prompts: List[str], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + inference_params: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> LLMResult: """Run the LLM on the given prompt and input.""" + # TODO: add caching here. try: - from clarifai_grpc.grpc.api import ( - resources_pb2, - service_pb2, - ) - from clarifai_grpc.grpc.api.status import status_code_pb2 + from clarifai.client.input import Inputs + from clarifai.client.model import Model except ImportError: raise ImportError( "Could not import clarifai python package. " "Please install it with `pip install clarifai`." ) - - # TODO: add caching here. - generations = [] - batch_size = 32 - for i in range(0, len(prompts), batch_size): - batch = prompts[i : i + batch_size] - post_model_outputs_request = service_pb2.PostModelOutputsRequest( - user_app_id=self.userDataObject, + if self.pat is not None: + pat = self.pat + if self.model_url is not None: + _model_init = Model(url=self.model_url, pat=pat) + else: + _model_init = Model( model_id=self.model_id, - version_id=self.model_version_id, - inputs=[ - resources_pb2.Input( - data=resources_pb2.Data(text=resources_pb2.Text(raw=prompt)) - ) - for prompt in batch - ], - ) - post_model_outputs_response = self.stub.PostModelOutputs( - post_model_outputs_request + user_id=self.user_id, + app_id=self.app_id, + pat=pat, ) - if post_model_outputs_response.status.code != status_code_pb2.SUCCESS: - logger.error(post_model_outputs_response.status) - first_model_failure = ( - post_model_outputs_response.outputs[0].status - if len(post_model_outputs_response.outputs) - else None - ) - raise Exception( - f"Post model outputs failed, status: " - f"{post_model_outputs_response.status}, first output failure: " - f"{first_model_failure}" + generations = [] + batch_size = 32 + input_obj = Inputs(pat=pat) + try: + for i in range(0, len(prompts), batch_size): + batch = prompts[i : i + batch_size] + input_batch = [ + input_obj.get_text_input(input_id=str(id), raw_text=inp) + for id, inp in enumerate(batch) + ] + ( + inference_params := {} + ) if inference_params is None else inference_params + predict_response = _model_init.predict( + inputs=input_batch, inference_params=inference_params ) - for output in post_model_outputs_response.outputs: + for output in predict_response.outputs: if stop is not None: text = enforce_stop_tokens(output.data.text.raw, stop) else: @@ -243,4 +208,7 @@ def _generate( generations.append([Generation(text=text)]) + except Exception as e: + logger.error(f"Predict failed, exception: {e}") + return LLMResult(generations=generations) diff --git a/libs/langchain/langchain/vectorstores/clarifai.py b/libs/langchain/langchain/vectorstores/clarifai.py index f78565ef5532f..308bc3502b22d 100644 --- a/libs/langchain/langchain/vectorstores/clarifai.py +++ b/libs/langchain/langchain/vectorstores/clarifai.py @@ -3,10 +3,12 @@ import logging import os import traceback +import uuid from concurrent.futures import ThreadPoolExecutor from typing import Any, Iterable, List, Optional, Tuple import requests +from google.protobuf.struct_pb2 import Struct from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.vectorstores import VectorStore @@ -17,7 +19,7 @@ class Clarifai(VectorStore): """`Clarifai AI` vector store. - To use, you should have the ``clarifai`` python package installed. + To use, you should have the ``clarifai`` python SDK package installed. Example: .. code-block:: python @@ -33,9 +35,8 @@ def __init__( self, user_id: Optional[str] = None, app_id: Optional[str] = None, - pat: Optional[str] = None, number_of_docs: Optional[int] = None, - api_base: Optional[str] = None, + pat: Optional[str] = None, ) -> None: """Initialize with Clarifai client. @@ -50,21 +51,11 @@ def __init__( Raises: ValueError: If user ID, app ID or personal access token is not provided. """ - try: - from clarifai.auth.helper import DEFAULT_BASE, ClarifaiAuthHelper - from clarifai.client import create_stub - except ImportError: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) - - if api_base is None: - self._api_base = DEFAULT_BASE - self._user_id = user_id or os.environ.get("CLARIFAI_USER_ID") self._app_id = app_id or os.environ.get("CLARIFAI_APP_ID") - self._pat = pat or os.environ.get("CLARIFAI_PAT") + if pat: + os.environ["CLARIFAI_PAT"] = pat + self._pat = os.environ.get("CLARIFAI_PAT") if self._user_id is None or self._app_id is None or self._pat is None: raise ValueError( "Could not find CLARIFAI_USER_ID, CLARIFAI_APP_ID or\ @@ -73,77 +64,8 @@ def __init__( app ID and personal access token \ from https://clarifai.com/settings/security." ) - - self._auth = ClarifaiAuthHelper( - user_id=self._user_id, - app_id=self._app_id, - pat=self._pat, - base=self._api_base, - ) - self._stub = create_stub(self._auth) - self._userDataObject = self._auth.get_user_app_id_proto() self._number_of_docs = number_of_docs - def _post_texts_as_inputs( - self, texts: List[str], metadatas: Optional[List[dict]] = None - ) -> List[str]: - """Post text to Clarifai and return the ID of the input. - - Args: - text (str): Text to post. - metadata (dict): Metadata to post. - - Returns: - str: ID of the input. - """ - try: - from clarifai_grpc.grpc.api import resources_pb2, service_pb2 - from clarifai_grpc.grpc.api.status import status_code_pb2 - from google.protobuf.struct_pb2 import Struct # type: ignore - except ImportError as e: - raise ImportError( - "Could not import clarifai python package. " - "Please install it with `pip install clarifai`." - ) from e - - if metadatas is not None: - assert len(list(texts)) == len( - metadatas - ), "Number of texts and metadatas should be the same." - - inputs = [] - for idx, text in enumerate(texts): - if metadatas is not None: - input_metadata = Struct() - input_metadata.update(metadatas[idx]) - inputs.append( - resources_pb2.Input( - data=resources_pb2.Data( - text=resources_pb2.Text(raw=text), - metadata=input_metadata, - ) - ) - ) - - post_inputs_response = self._stub.PostInputs( - service_pb2.PostInputsRequest( - user_app_id=self._userDataObject, - inputs=inputs, - ) - ) - - if post_inputs_response.status.code != status_code_pb2.SUCCESS: - logger.error(post_inputs_response.status) - raise Exception( - "Post inputs failed, status: " + post_inputs_response.status.description - ) - - input_ids = [] - for input in post_inputs_response.inputs: - input_ids.append(input.id) - - return input_ids - def add_texts( self, texts: Iterable[str], @@ -162,9 +84,14 @@ def add_texts( metadatas (Optional[List[dict]], optional): Optional list of metadatas. ids (Optional[List[str]], optional): Optional list of IDs. - Returns: - List[str]: List of IDs of the added texts. """ + try: + from clarifai.client.input import Inputs + except ImportError as e: + raise ImportError( + "Could not import clarifai python package. " + "Please install it with `pip install clarifai`." + ) from e ltexts = list(texts) length = len(ltexts) @@ -175,29 +102,51 @@ def add_texts( metadatas ), "Number of texts and metadatas should be the same." + if ids is not None: + assert len(ltexts) == len( + ids + ), "Number of text inputs and input ids should be the same." + + input_obj = Inputs(app_id=self._app_id, user_id=self._user_id) batch_size = 32 - input_ids = [] + input_job_ids = [] for idx in range(0, length, batch_size): try: batch_texts = ltexts[idx : idx + batch_size] batch_metadatas = ( metadatas[idx : idx + batch_size] if metadatas else None ) - result_ids = self._post_texts_as_inputs(batch_texts, batch_metadatas) - input_ids.extend(result_ids) - logger.debug(f"Input {result_ids} posted successfully.") + if batch_metadatas is not None: + meta_list = [] + for meta in batch_metadatas: + meta_struct = Struct() + meta_struct.update(meta) + meta_list.append(meta_struct) + if ids is None: + ids = [uuid.uuid4().hex for _ in range(len(batch_texts))] + input_batch = [ + input_obj.get_text_input( + input_id=ids[id], + raw_text=inp, + metadata=meta_list[id] if batch_metadatas else None, + ) + for id, inp in enumerate(batch_texts) + ] + result_id = input_obj.upload_inputs(inputs=input_batch) + input_job_ids.extend(result_id) + logger.debug("Input posted successfully.") + except Exception as error: logger.warning(f"Post inputs failed: {error}") traceback.print_exc() - return input_ids + return input_job_ids def similarity_search_with_score( self, query: str, k: int = 4, - filter: Optional[dict] = None, - namespace: Optional[str] = None, + filters: Optional[dict] = None, **kwargs: Any, ) -> List[Tuple[Document, float]]: """Run similarity search with score using Clarifai. @@ -212,10 +161,9 @@ def similarity_search_with_score( List[Document]: List of documents most similar to the query text. """ try: - from clarifai_grpc.grpc.api import resources_pb2, service_pb2 - from clarifai_grpc.grpc.api.status import status_code_pb2 + from clarifai.client.search import Search + from clarifai_grpc.grpc.api import resources_pb2 from google.protobuf import json_format # type: ignore - from google.protobuf.struct_pb2 import Struct # type: ignore except ImportError as e: raise ImportError( "Could not import clarifai python package. " @@ -226,50 +174,22 @@ def similarity_search_with_score( if self._number_of_docs is not None: k = self._number_of_docs - req = service_pb2.PostAnnotationsSearchesRequest( - user_app_id=self._userDataObject, - searches=[ - resources_pb2.Search( - query=resources_pb2.Query( - ranks=[ - resources_pb2.Rank( - annotation=resources_pb2.Annotation( - data=resources_pb2.Data( - text=resources_pb2.Text(raw=query), - ) - ) - ) - ] - ) - ) - ], - pagination=service_pb2.Pagination(page=1, per_page=k), - ) - + search_obj = Search(user_id=self._user_id, app_id=self._app_id, top_k=k) + rank = [{"text_raw": query}] # Add filter by metadata if provided. - if filter is not None: - search_metadata = Struct() - search_metadata.update(filter) - f = req.searches[0].query.filters.add() - f.annotation.data.metadata.update(search_metadata) - - post_annotations_searches_response = self._stub.PostAnnotationsSearches(req) - - # Check if search was successful - if post_annotations_searches_response.status.code != status_code_pb2.SUCCESS: - raise Exception( - "Post searches failed, status: " - + post_annotations_searches_response.status.description - ) + if filters is not None: + search_metadata = {"metadata": filters} + search_response = search_obj.query(ranks=rank, filters=[search_metadata]) + else: + search_response = search_obj.query(ranks=rank) # Retrieve hits - hits = post_annotations_searches_response.hits - + hits = [hit for data in search_response for hit in data.hits] executor = ThreadPoolExecutor(max_workers=10) def hit_to_document(hit: resources_pb2.Hit) -> Tuple[Document, float]: metadata = json_format.MessageToDict(hit.input.data.metadata) - h = {"Authorization": f"Key {self._auth.pat}"} + h = {"Authorization": f"Key {self._pat}"} request = requests.get(hit.input.data.text.url, headers=h) # override encoding by real educated guess as provided by chardet @@ -314,9 +234,8 @@ def from_texts( metadatas: Optional[List[dict]] = None, user_id: Optional[str] = None, app_id: Optional[str] = None, - pat: Optional[str] = None, number_of_docs: Optional[int] = None, - api_base: Optional[str] = None, + pat: Optional[str] = None, **kwargs: Any, ) -> Clarifai: """Create a Clarifai vectorstore from a list of texts. @@ -325,10 +244,8 @@ def from_texts( user_id (str): User ID. app_id (str): App ID. texts (List[str]): List of texts to add. - pat (Optional[str]): Personal access token. Defaults to None. number_of_docs (Optional[int]): Number of documents to return during vector search. Defaults to None. - api_base (Optional[str]): API base. Defaults to None. metadatas (Optional[List[dict]]): Optional list of metadatas. Defaults to None. @@ -338,9 +255,8 @@ def from_texts( clarifai_vector_db = cls( user_id=user_id, app_id=app_id, - pat=pat, number_of_docs=number_of_docs, - api_base=api_base, + pat=pat, ) clarifai_vector_db.add_texts(texts=texts, metadatas=metadatas) return clarifai_vector_db @@ -352,9 +268,8 @@ def from_documents( embedding: Optional[Embeddings] = None, user_id: Optional[str] = None, app_id: Optional[str] = None, - pat: Optional[str] = None, number_of_docs: Optional[int] = None, - api_base: Optional[str] = None, + pat: Optional[str] = None, **kwargs: Any, ) -> Clarifai: """Create a Clarifai vectorstore from a list of documents. @@ -363,10 +278,8 @@ def from_documents( user_id (str): User ID. app_id (str): App ID. documents (List[Document]): List of documents to add. - pat (Optional[str]): Personal access token. Defaults to None. number_of_docs (Optional[int]): Number of documents to return during vector search. Defaults to None. - api_base (Optional[str]): API base. Defaults to None. Returns: Clarifai: Clarifai vectorstore. @@ -377,8 +290,7 @@ def from_documents( user_id=user_id, app_id=app_id, texts=texts, - pat=pat, number_of_docs=number_of_docs, - api_base=api_base, + pat=pat, metadatas=metadatas, )