diff --git a/docs/ecosystem/bananadev.md b/docs/ecosystem/bananadev.md new file mode 100644 index 0000000000000..1f0a232ad5da4 --- /dev/null +++ b/docs/ecosystem/bananadev.md @@ -0,0 +1,74 @@ +# Banana + +This page covers how to use the Banana ecosystem within LangChain. +It is broken into two parts: installation and setup, and then references to specific Banana wrappers. + +## Installation and Setup +- Install with `pip3 install banana-dev` +- Get an CerebriumAI api key and set it as an environment variable (`BANANA_API_KEY`) + +## Define your Banana Template + +If you want to use an available language model template you can find one [here](https://app.banana.dev/templates/conceptofmind/serverless-template-palmyra-base). +This template uses the Palmyra-Base model by [Writer](https://writer.com/product/api/). +You can check out an example Banana repository [here](https://github.com/conceptofmind/serverless-template-palmyra-base). + +## Build the Banana app + +You must include a output in the result. There is a rigid response structure. +```python +# Return the results as a dictionary +result = {'output': result} +``` + +An example inference function would be: +```python +def inference(model_inputs:dict) -> dict: + global model + global tokenizer + + # Parse out your arguments + prompt = model_inputs.get('prompt', None) + if prompt == None: + return {'message': "No prompt provided"} + + # Run the model + input_ids = tokenizer.encode(prompt, return_tensors='pt').cuda() + output = model.generate( + input_ids, + max_length=100, + do_sample=True, + top_k=50, + top_p=0.95, + num_return_sequences=1, + temperature=0.9, + early_stopping=True, + no_repeat_ngram_size=3, + num_beams=5, + length_penalty=1.5, + repetition_penalty=1.5, + bad_words_ids=[[tokenizer.encode(' ', add_prefix_space=True)[0]]] + ) + + result = tokenizer.decode(output[0], skip_special_tokens=True) + # Return the results as a dictionary + result = {'output': result} + return result +``` + +You can find a full example of a Banana app [here](https://github.com/conceptofmind/serverless-template-palmyra-base/blob/main/app.py). + + +## Wrappers + +### LLM + +There exists an Banana LLM wrapper, which you can access with +```python +from langchain.llms import Banana +``` + +You need to provide a model key located in the dashboard: +```python +llm = Banana(model_key="YOUR_MODEL_KEY") +``` \ No newline at end of file diff --git a/docs/ecosystem/deepinfra.md b/docs/ecosystem/deepinfra.md new file mode 100644 index 0000000000000..4149a4133320c --- /dev/null +++ b/docs/ecosystem/deepinfra.md @@ -0,0 +1,17 @@ +# DeepInfra + +This page covers how to use the DeepInfra ecosystem within LangChain. +It is broken into two parts: installation and setup, and then references to specific DeepInfra wrappers. + +## Installation and Setup +- Get your DeepInfra api key from this link [here](https://deepinfra.com/). +- Get an DeepInfra api key and set it as an environment variable (`DEEPINFRA_API_TOKEN`) + +## Wrappers + +### LLM + +There exists an DeepInfra LLM wrapper, which you can access with +```python +from langchain.llms import DeepInfra +``` diff --git a/docs/ecosystem/graphsignal.md b/docs/ecosystem/graphsignal.md new file mode 100644 index 0000000000000..2666f59ce3378 --- /dev/null +++ b/docs/ecosystem/graphsignal.md @@ -0,0 +1,38 @@ +# Graphsignal + +This page covers how to use the Graphsignal to trace and monitor LangChain. + +## Installation and Setup + +- Install the Python library with `pip install graphsignal` +- Create free Graphsignal account [here](https://graphsignal.com) +- Get an API key and set it as an environment variable (`GRAPHSIGNAL_API_KEY`) + +## Tracing and Monitoring + +Graphsignal automatically instruments and starts tracing and monitoring chains. Traces, metrics and errors are then available in your [Graphsignal dashboard](https://app.graphsignal.com/). No prompts or other sensitive data are sent to Graphsignal cloud, only statistics and metadata. + +Initialize the tracer by providing a deployment name: + +```python +import graphsignal + +graphsignal.configure(deployment='my-langchain-app-prod') +``` + +In order to trace full runs and see a breakdown by chains and tools, you can wrap the calling routine or use a decorator: + +```python +with graphsignal.start_trace('my-chain'): + chain.run("some initial text") +``` + +Optionally, enable profiling to record function-level statistics for each trace. + +```python +with graphsignal.start_trace( + 'my-chain', options=graphsignal.TraceOptions(enable_profiling=True)): + chain.run("some initial text") +``` + +See the [Quick Start](https://graphsignal.com/docs/guides/quick-start/) guide for complete setup instructions. diff --git a/docs/ecosystem/helicone.md b/docs/ecosystem/helicone.md index 345cc4a456162..88cf2e527dd61 100644 --- a/docs/ecosystem/helicone.md +++ b/docs/ecosystem/helicone.md @@ -19,3 +19,35 @@ export OPENAI_API_BASE="https://oai.hconeai.com/v1" Now head over to [helicone.ai](https://helicone.ai/onboarding?step=2) to create your account, and add your OpenAI API key within our dashboard to view your logs. ![Helicone](../_static/HeliconeKeys.png) + +## How to enable Helicone caching + +```python +from langchain.llms import OpenAI +import openai +openai.api_base = "https://oai.hconeai.com/v1" + +llm = OpenAI(temperature=0.9, headers={"Helicone-Cache-Enabled": "true"}) +text = "What is a helicone?" +print(llm(text)) +``` + +[Helicone caching docs](https://docs.helicone.ai/advanced-usage/caching) + +## How to use Helicone custom properties + +```python +from langchain.llms import OpenAI +import openai +openai.api_base = "https://oai.hconeai.com/v1" + +llm = OpenAI(temperature=0.9, headers={ + "Helicone-Property-Session": "24", + "Helicone-Property-Conversation": "support_issue_2", + "Helicone-Property-App": "mobile", + }) +text = "What is a helicone?" +print(llm(text)) +``` + +[Helicone property docs](https://docs.helicone.ai/advanced-usage/custom-properties) diff --git a/docs/ecosystem/modal.md b/docs/ecosystem/modal.md new file mode 100644 index 0000000000000..7338e88e01a1a --- /dev/null +++ b/docs/ecosystem/modal.md @@ -0,0 +1,66 @@ +# Modal + +This page covers how to use the Modal ecosystem within LangChain. +It is broken into two parts: installation and setup, and then references to specific Modal wrappers. + +## Installation and Setup +- Install with `pip install modal-client` +- Run `modal token new` + +## Define your Modal Functions and Webhooks + +You must include a prompt. There is a rigid response structure. + +```python +class Item(BaseModel): + prompt: str + +@stub.webhook(method="POST") +def my_webhook(item: Item): + return {"prompt": my_function.call(item.prompt)} +``` + +An example with GPT2: + +```python +from pydantic import BaseModel + +import modal + +stub = modal.Stub("example-get-started") + +volume = modal.SharedVolume().persist("gpt2_model_vol") +CACHE_PATH = "/root/model_cache" + +@stub.function( + gpu="any", + image=modal.Image.debian_slim().pip_install( + "tokenizers", "transformers", "torch", "accelerate" + ), + shared_volumes={CACHE_PATH: volume}, + retries=3, +) +def run_gpt2(text: str): + from transformers import GPT2Tokenizer, GPT2LMHeadModel + tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + model = GPT2LMHeadModel.from_pretrained('gpt2') + encoded_input = tokenizer(text, return_tensors='pt').input_ids + output = model.generate(encoded_input, max_length=50, do_sample=True) + return tokenizer.decode(output[0], skip_special_tokens=True) + +class Item(BaseModel): + prompt: str + +@stub.webhook(method="POST") +def get_text(item: Item): + return {"prompt": run_gpt2.call(item.prompt)} +``` + +## Wrappers + +### LLM + +There exists an Modal LLM wrapper, which you can access with +```python +from langchain.llms import Modal +``` \ No newline at end of file diff --git a/docs/ecosystem/petals.md b/docs/ecosystem/petals.md index 42f51ffe904e7..2f6db15cb97a7 100644 --- a/docs/ecosystem/petals.md +++ b/docs/ecosystem/petals.md @@ -5,7 +5,7 @@ It is broken into two parts: installation and setup, and then references to spec ## Installation and Setup - Install with `pip install petals` -- Get an Huggingface api key and set it as an environment variable (`HUGGINGFACE_API_KEY`) +- Get a Hugging Face api key and set it as an environment variable (`HUGGINGFACE_API_KEY`) ## Wrappers @@ -14,4 +14,4 @@ It is broken into two parts: installation and setup, and then references to spec There exists an Petals LLM wrapper, which you can access with ```python from langchain.llms import Petals -``` \ No newline at end of file +``` diff --git a/docs/ecosystem/stochasticai.md b/docs/ecosystem/stochasticai.md new file mode 100644 index 0000000000000..75891103962c6 --- /dev/null +++ b/docs/ecosystem/stochasticai.md @@ -0,0 +1,17 @@ +# StochasticAI + +This page covers how to use the StochasticAI ecosystem within LangChain. +It is broken into two parts: installation and setup, and then references to specific StochasticAI wrappers. + +## Installation and Setup +- Install with `pip install stochasticx` +- Get an StochasticAI api key and set it as an environment variable (`STOCHASTICAI_API_KEY`) + +## Wrappers + +### LLM + +There exists an StochasticAI LLM wrapper, which you can access with +```python +from langchain.llms import StochasticAI +``` \ No newline at end of file diff --git a/docs/ecosystem/unstructured.md b/docs/ecosystem/unstructured.md index a7a32a0079179..1133688a7f0b2 100644 --- a/docs/ecosystem/unstructured.md +++ b/docs/ecosystem/unstructured.md @@ -17,10 +17,6 @@ This page is broken into two parts: installation and setup, and then references - `poppler-utils` - `tesseract-ocr` - `libreoffice` -- Run the following to install NLTK dependencies. `unstructured` will handle this automatically - soon. - - `python -c "import nltk; nltk.download('punkt')"` - - `python -c "import nltk; nltk.download('averaged_perceptron_tagger')"` - If you are parsing PDFs, run the following to install the `detectron2` model, which `unstructured` uses for layout detection: - `pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"` diff --git a/docs/ecosystem/writer.md b/docs/ecosystem/writer.md new file mode 100644 index 0000000000000..7b38c1ca0273d --- /dev/null +++ b/docs/ecosystem/writer.md @@ -0,0 +1,16 @@ +# Writer + +This page covers how to use the Writer ecosystem within LangChain. +It is broken into two parts: installation and setup, and then references to specific Writer wrappers. + +## Installation and Setup +- Get an Writer api key and set it as an environment variable (`WRITER_API_KEY`) + +## Wrappers + +### LLM + +There exists an Writer LLM wrapper, which you can access with +```python +from langchain.llms import Writer +``` \ No newline at end of file diff --git a/docs/modules/chains/key_concepts.md b/docs/modules/chains/key_concepts.md index cd97a73465149..d8c0e734010c7 100644 --- a/docs/modules/chains/key_concepts.md +++ b/docs/modules/chains/key_concepts.md @@ -6,6 +6,6 @@ They vary greatly in complexity and are combination of generic, highly configura ## Sequential Chain This is a specific type of chain where multiple other chains are run in sequence, with the outputs being added as inputs -to the next. A subtype of this type of chain is the `SimpleSequentialChain`, where all subchains have only one input and one output, +to the next. A subtype of this type of chain is the [`SimpleSequentialChain`](./generic/sequential_chains.html#simplesequentialchain), where all subchains have only one input and one output, and the output of one is therefore used as sole input to the next chain. diff --git a/docs/modules/document_loaders/examples/example_data/facebook_chat.json b/docs/modules/document_loaders/examples/example_data/facebook_chat.json new file mode 100644 index 0000000000000..b8baaa87e953b --- /dev/null +++ b/docs/modules/document_loaders/examples/example_data/facebook_chat.json @@ -0,0 +1,64 @@ +{ + "participants": [{"name": "User 1"}, {"name": "User 2"}], + "messages": [ + {"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"}, + { + "sender_name": "User 1", + "timestamp_ms": 1675597435669, + "content": "Oh no worries! Bye", + }, + { + "sender_name": "User 2", + "timestamp_ms": 1675596277579, + "content": "No Im sorry it was my mistake, the blue one is not for sale", + }, + { + "sender_name": "User 1", + "timestamp_ms": 1675595140251, + "content": "I thought you were selling the blue one!", + }, + { + "sender_name": "User 1", + "timestamp_ms": 1675595109305, + "content": "Im not interested in this bag. Im interested in the blue one!", + }, + { + "sender_name": "User 2", + "timestamp_ms": 1675595068468, + "content": "Here is $129", + }, + { + "sender_name": "User 2", + "timestamp_ms": 1675595060730, + "photos": [ + {"uri": "url_of_some_picture.jpg", "creation_timestamp": 1675595059} + ], + }, + { + "sender_name": "User 2", + "timestamp_ms": 1675595045152, + "content": "Online is at least $100", + }, + { + "sender_name": "User 1", + "timestamp_ms": 1675594799696, + "content": "How much do you want?", + }, + { + "sender_name": "User 2", + "timestamp_ms": 1675577876645, + "content": "Goodmorning! $50 is too low.", + }, + { + "sender_name": "User 1", + "timestamp_ms": 1675549022673, + "content": "Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!", + }, + ], + "title": "User 1 and User 2 chat", + "is_still_participant": true, + "thread_path": "inbox/User 1 and User 2 chat", + "magic_words": [], + "image": {"uri": "image_of_the_chat.jpg", "creation_timestamp": 1675549016}, + "joinable_mode": {"mode": 1, "link": ""}, +} diff --git a/docs/modules/document_loaders/examples/example_data/notebook.ipynb b/docs/modules/document_loaders/examples/example_data/notebook.ipynb new file mode 100644 index 0000000000000..db1a4fdce5df8 --- /dev/null +++ b/docs/modules/document_loaders/examples/example_data/notebook.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook\n", + "\n", + "This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import NotebookLoader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loader = NotebookLoader(\"example_data/notebook.ipynb\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`NotebookLoader.load()` loads the `.ipynb` notebook file into a `Document` object.\n", + "\n", + "**Parameters**:\n", + "\n", + "* `include_outputs` (bool): whether to include cell outputs in the resulting document (default is False).\n", + "* `max_output_length` (int): the maximum number of characters to include from each cell output (default is 10).\n", + "* `remove_newline` (bool): whether to remove newline characters from the cell sources and outputs (default is False).\n", + "* `traceback` (bool): whether to include full traceback (default is False)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loader.load(include_outputs=True, max_output_length=20, remove_newline=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "981b6680a42bdb5eb22187741e1607b3aae2cf73db800d1af1f268d1de6a1f70" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/document_loaders/examples/facebook_chat.ipynb b/docs/modules/document_loaders/examples/facebook_chat.ipynb new file mode 100644 index 0000000000000..7c60f68fbeb8f --- /dev/null +++ b/docs/modules/document_loaders/examples/facebook_chat.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Facebook Chat\n", + "\n", + "This notebook covers how to load data from the Facebook Chats into a format that can be ingested into LangChain." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import FacebookChatLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "loader = FacebookChatLoader(\"example_data/facebook_chat.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='User 2 on 2023-02-05 12:46:11: Bye!\\n\\nUser 1 on 2023-02-05 12:43:55: Oh no worries! Bye\\n\\nUser 2 on 2023-02-05 12:24:37: No Im sorry it was my mistake, the blue one is not for sale\\n\\nUser 1 on 2023-02-05 12:05:40: I thought you were selling the blue one!\\n\\nUser 1 on 2023-02-05 12:05:09: Im not interested in this bag. Im interested in the blue one!\\n\\nUser 2 on 2023-02-05 12:04:28: Here is $129\\n\\nUser 2 on 2023-02-05 12:04:05: Online is at least $100\\n\\nUser 1 on 2023-02-05 11:59:59: How much do you want?\\n\\nUser 2 on 2023-02-05 07:17:56: Goodmorning! $50 is too low.\\n\\nUser 1 on 2023-02-04 23:17:02: Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!\\n\\n', lookup_str='', metadata={'source': 'docs/modules/document_loaders/examples/example_data/facebook_chat.json'}, lookup_index=0)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader.load()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "vscode": { + "interpreter": { + "hash": "384707f4965e853a82006e90614c2e1a578ea1f6eb0ee07a1dd78a657d37dd67" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/document_loaders/examples/notebook.ipynb b/docs/modules/document_loaders/examples/notebook.ipynb new file mode 100644 index 0000000000000..07cb4ef616705 --- /dev/null +++ b/docs/modules/document_loaders/examples/notebook.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook\n", + "\n", + "This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import NotebookLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "loader = NotebookLoader(\"example_data/notebook.ipynb\", include_outputs=True, max_output_length=20, remove_newline=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`NotebookLoader.load()` loads the `.ipynb` notebook file into a `Document` object.\n", + "\n", + "**Parameters**:\n", + "\n", + "* `include_outputs` (bool): whether to include cell outputs in the resulting document (default is False).\n", + "* `max_output_length` (int): the maximum number of characters to include from each cell output (default is 10).\n", + "* `remove_newline` (bool): whether to remove newline characters from the cell sources and outputs (default is False).\n", + "* `traceback` (bool): whether to include full traceback (default is False)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='\\'markdown\\' cell: \\'[\\'# Notebook\\', \\'\\', \\'This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain.\\']\\'\\n\\n \\'code\\' cell: \\'[\\'from langchain.document_loaders import NotebookLoader\\']\\'\\n\\n \\'code\\' cell: \\'[\\'loader = NotebookLoader(\"example_data/notebook.ipynb\")\\']\\'\\n\\n \\'markdown\\' cell: \\'[\\'`NotebookLoader.load()` loads the `.ipynb` notebook file into a `Document` object.\\', \\'\\', \\'**Parameters**:\\', \\'\\', \\'* `include_outputs` (bool): whether to include cell outputs in the resulting document (default is False).\\', \\'* `max_output_length` (int): the maximum number of characters to include from each cell output (default is 10).\\', \\'* `remove_newline` (bool): whether to remove newline characters from the cell sources and outputs (default is False).\\', \\'* `traceback` (bool): whether to include full traceback (default is False).\\']\\'\\n\\n \\'code\\' cell: \\'[\\'loader.load(include_outputs=True, max_output_length=20, remove_newline=True)\\']\\'\\n\\n', lookup_str='', metadata={'source': 'example_data/notebook.ipynb'}, lookup_index=0)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "vscode": { + "interpreter": { + "hash": "981b6680a42bdb5eb22187741e1607b3aae2cf73db800d1af1f268d1de6a1f70" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/document_loaders/examples/word_document.ipynb b/docs/modules/document_loaders/examples/word_document.ipynb new file mode 100644 index 0000000000000..daf70e8400c1d --- /dev/null +++ b/docs/modules/document_loaders/examples/word_document.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "39af9ecd", + "metadata": {}, + "source": [ + "# Word Documents\n", + "\n", + "This covers how to load Word documents into a document format that we can use downstream." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "721c48aa", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import UnstructuredWordDocumentLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9d3d0e35", + "metadata": {}, + "outputs": [], + "source": [ + "loader = UnstructuredWordDocumentLoader(\"fake.docx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "06073f91", + "metadata": {}, + "outputs": [], + "source": [ + "data = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c9adc5cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 'fake.docx'}, lookup_index=0)]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "id": "525d6b67", + "metadata": {}, + "source": [ + "## Retain Elements\n", + "\n", + "Under the hood, Unstructured creates different \"elements\" for different chunks of text. By default we combine those together, but you can easily keep that separation by specifying `mode=\"elements\"`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "064f9162", + "metadata": {}, + "outputs": [], + "source": [ + "loader = UnstructuredWordDocumentLoader(\"fake.docx\", mode=\"elements\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "abefbbdb", + "metadata": {}, + "outputs": [], + "source": [ + "data = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a547c534", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 'fake.docx', 'filename': 'fake.docx', 'category': 'Title'}, lookup_index=0)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/indexes/chain_examples/question_answering.ipynb b/docs/modules/indexes/chain_examples/question_answering.ipynb index 6e388aa6ad5dd..d6cbc764e3b58 100644 --- a/docs/modules/indexes/chain_examples/question_answering.ipynb +++ b/docs/modules/indexes/chain_examples/question_answering.ipynb @@ -7,7 +7,7 @@ "source": [ "# Question Answering\n", "\n", - "This notebook walks through how to use LangChain for question answering over a list of documents. It covers four different types of chaings: `stuff`, `map_reduce`, `refine`, `map-rerank`. For a more in depth explanation of what these chain types are, see [here](../combine_docs.md)." + "This notebook walks through how to use LangChain for question answering over a list of documents. It covers four different types of chains: `stuff`, `map_reduce`, `refine`, `map-rerank`. For a more in depth explanation of what these chain types are, see [here](../combine_docs.md)." ] }, { diff --git a/docs/modules/llms/integrations.rst b/docs/modules/llms/integrations.rst index faf3bf87cc58e..deb79be424935 100644 --- a/docs/modules/llms/integrations.rst +++ b/docs/modules/llms/integrations.rst @@ -17,6 +17,14 @@ The examples here are all "how-to" guides for how to integrate with various LLM `Goose AI <./integrations/gooseai_example.html>`_: Covers how to utilize the Goose AI wrapper. +`Writer <./integrations/writer.html>`_: Covers how to utilize the Writer wrapper. + +`Banana <./integrations/banana.html>`_: Covers how to utilize the Banana wrapper. + +`Modal <./integrations/modal.html>`_: Covers how to utilize the Modal wrapper. + +`StochasticAI <./integrations/stochasticai.html>`_: Covers how to utilize the Stochastic AI wrapper. + `Cerebrium <./integrations/cerebriumai_example.html>`_: Covers how to utilize the Cerebrium AI wrapper. `Petals <./integrations/petals_example.html>`_: Covers how to utilize the Petals wrapper. @@ -27,6 +35,8 @@ The examples here are all "how-to" guides for how to integrate with various LLM `Anthropic <./integrations/anthropic_example.html>`_: Covers how to use Anthropic models with Langchain. +`DeepInfra <./integrations/deepinfra_example.html>`_: Covers how to utilize the DeepInfra wrapper. + `Self-Hosted Models (via Runhouse) <./integrations/self_hosted_examples.html>`_: Covers how to run models on existing or on-demand remote compute with Langchain. diff --git a/docs/modules/llms/integrations/aleph_alpha.ipynb b/docs/modules/llms/integrations/aleph_alpha.ipynb new file mode 100644 index 0000000000000..6fb7153862756 --- /dev/null +++ b/docs/modules/llms/integrations/aleph_alpha.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, + "source": [ + "# Aleph Alpha\n", + "This example goes over how to use LangChain to interact with Aleph Alpha models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6fb585dd", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import AlephAlpha\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f81a230d", + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Q: {question}\n", + "\n", + "A:\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f0d26e48", + "metadata": {}, + "outputs": [], + "source": [ + "llm = AlephAlpha(model=\"luminous-extended\", maximum_tokens=20, stop_sequences=[\"Q:\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6811d621", + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3058e63f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' Artificial Intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems.\\n'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "question = \"What is AI?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "vscode": { + "interpreter": { + "hash": "2d002ec47225e662695b764370d7966aa11eeb4302edc2f497bbf96d49c8f899" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/llms/integrations/banana.ipynb b/docs/modules/llms/integrations/banana.ipynb new file mode 100644 index 0000000000000..1428617bfa23e --- /dev/null +++ b/docs/modules/llms/integrations/banana.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Banana\n", + "This example goes over how to use LangChain to interact with Banana models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from langchain.llms import Banana\n", + "from langchain import PromptTemplate, LLMChain\n", + "os.environ[\"BANANA_API_KEY\"] = \"YOUR_API_KEY\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = Banana(model_key=\"YOUR_MODEL_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('palm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/llms/integrations/deepinfra_example.ipynb b/docs/modules/llms/integrations/deepinfra_example.ipynb new file mode 100644 index 0000000000000..9327faecde953 --- /dev/null +++ b/docs/modules/llms/integrations/deepinfra_example.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DeepInfra LLM Example\n", + "This notebook goes over how to use Langchain with [DeepInfra](https://deepinfra.com)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from langchain.llms import DeepInfra\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set the Environment API Key\n", + "Make sure to get your API key from DeepInfra. You are given a 1 hour free of serverless GPU compute to test different models.\n", + "You can print your token with `deepctl auth token`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"DEEPINFRA_API_TOKEN\"] = \"YOUR_KEY_HERE\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create the DeepInfra instance\n", + "Make sure to deploy your model first via `deepctl deploy create -m google/flat-t5-xl` (for example)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = DeepInfra(model_id=\"DEPLOYED MODEL ID\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Prompt Template\n", + "We will create a prompt template for Question and Answer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initiate the LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the LLMChain\n", + "Provide a question and run the LLMChain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What NFL team won the Super Bowl in 2015?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('palm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/llms/integrations/modal.ipynb b/docs/modules/llms/integrations/modal.ipynb new file mode 100644 index 0000000000000..f1ec862c858cc --- /dev/null +++ b/docs/modules/llms/integrations/modal.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modal\n", + "This example goes over how to use LangChain to interact with Modal models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import Modal\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = Modal(endpoint_url=\"YOUR_ENDPOINT_URL\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('palm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/llms/integrations/openai.ipynb b/docs/modules/llms/integrations/openai.ipynb index d833376188c8a..f8133615f9fd2 100644 --- a/docs/modules/llms/integrations/openai.ipynb +++ b/docs/modules/llms/integrations/openai.ipynb @@ -88,7 +88,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3.9.12 ('palm')", "language": "python", "name": "python3" }, @@ -102,7 +102,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.9.12" + }, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } } }, "nbformat": 4, diff --git a/docs/modules/llms/integrations/stochasticai.ipynb b/docs/modules/llms/integrations/stochasticai.ipynb new file mode 100644 index 0000000000000..8160359b597af --- /dev/null +++ b/docs/modules/llms/integrations/stochasticai.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# StochasticAI\n", + "This example goes over how to use LangChain to interact with StochasticAI models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import StochasticAI\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = StochasticAI(api_url=\"YOUR_API_URL\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('palm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/llms/integrations/writer.ipynb b/docs/modules/llms/integrations/writer.ipynb new file mode 100644 index 0000000000000..ac7f77f473209 --- /dev/null +++ b/docs/modules/llms/integrations/writer.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Writer\n", + "This example goes over how to use LangChain to interact with Writer models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import Writer\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = Writer()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('palm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/utils/examples/ifttt.ipynb b/docs/modules/utils/examples/ifttt.ipynb new file mode 100644 index 0000000000000..ab21d190f2286 --- /dev/null +++ b/docs/modules/utils/examples/ifttt.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "16763ed3", + "metadata": {}, + "source": [ + "# IFTTT WebHooks\n", + "\n", + "This notebook shows how to use IFTTT Webhooks.\n", + "\n", + "From https://github.com/SidU/teams-langchain-js/wiki/Connecting-IFTTT-Services.\n", + "\n", + "# Creating a webhook\n", + "- Go to https://ifttt.com/create\n", + "\n", + "# Configuring the \"If This\"\n", + "- Click on the \"If This\" button in the IFTTT interface.\n", + "- Search for \"Webhooks\" in the search bar.\n", + "- Choose the first option for \"Receive a web request with a JSON payload.\"\n", + "- Choose an Event Name that is specific to the service you plan to connect to.\n", + "This will make it easier for you to manage the webhook URL.\n", + "For example, if you're connecting to Spotify, you could use \"Spotify\" as your\n", + "Event Name.\n", + "- Click the \"Create Trigger\" button to save your settings and create your webhook.\n", + "\n", + "# Configuring the \"Then That\"\n", + "- Tap on the \"Then That\" button in the IFTTT interface.\n", + "- Search for the service you want to connect, such as Spotify.\n", + "- Choose an action from the service, such as \"Add track to a playlist\".\n", + "- Configure the action by specifying the necessary details, such as the playlist name,\n", + "e.g., \"Songs from AI\".\n", + "- Reference the JSON Payload received by the Webhook in your action. For the Spotify\n", + "scenario, choose \"{{JsonPayload}}\" as your search query.\n", + "- Tap the \"Create Action\" button to save your action settings.\n", + "- Once you have finished configuring your action, click the \"Finish\" button to\n", + "complete the setup.\n", + "- Congratulations! You have successfully connected the Webhook to the desired\n", + "service, and you're ready to start receiving data and triggering actions 🎉\n", + "\n", + "# Finishing up\n", + "- To get your webhook URL go to https://ifttt.com/maker_webhooks/settings\n", + "- Copy the IFTTT key value from there. The URL is of the form\n", + "https://maker.ifttt.com/use/YOUR_IFTTT_KEY. Grab the YOUR_IFTTT_KEY value.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "10a46e7e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools.ifttt import IFTTTWebhook" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "12003d72", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "key = os.environ[\"IFTTTKey\"]\n", + "url = f\"https://maker.ifttt.com/trigger/spotify/json/with/key/{key}\"\n", + "tool = IFTTTWebhook(name=\"Spotify\", description=\"Add a song to spotify playlist\", url=url)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6e68f846", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"Congratulations! You've fired the spotify JSON event\"" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tool.run(\"taylor swift\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7e599c9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/__init__.py b/langchain/__init__.py index da38f6aa5621b..ae7ddbeff9304 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -24,13 +24,17 @@ from langchain.docstore import InMemoryDocstore, Wikipedia from langchain.llms import ( Anthropic, + Banana, CerebriumAI, Cohere, ForefrontAI, GooseAI, HuggingFaceHub, + Modal, OpenAI, Petals, + StochasticAI, + Writer, ) from langchain.llms.huggingface_pipeline import HuggingFacePipeline from langchain.prompts import ( @@ -67,12 +71,16 @@ "GoogleSerperAPIWrapper", "WolframAlphaAPIWrapper", "Anthropic", + "Banana", "CerebriumAI", "Cohere", "ForefrontAI", "GooseAI", + "Modal", "OpenAI", "Petals", + "StochasticAI", + "Writer", "BasePromptTemplate", "Prompt", "FewShotPromptTemplate", diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index 606bc1279ffce..b71f7f02707ba 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -179,7 +179,7 @@ def _get_bing_search(**kwargs: Any) -> BaseTool: "bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]), "google-serper": (_get_google_serper, ["serper_api_key"]), "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), - "searx-search": (_get_searx_search, ["searx_host", "searx_host"]), + "searx-search": (_get_searx_search, ["searx_host"]), } diff --git a/langchain/chains/chat_vector_db/base.py b/langchain/chains/chat_vector_db/base.py index 8948eebd0f962..55a230092b47f 100644 --- a/langchain/chains/chat_vector_db/base.py +++ b/langchain/chains/chat_vector_db/base.py @@ -102,7 +102,7 @@ def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]: else: return {self.output_key: answer} - async def _acall(self, inputs: Dict[str, Any]) -> Dict[str, str]: + async def _acall(self, inputs: Dict[str, Any]) -> Dict[str, Any]: question = inputs["question"] chat_history_str = _get_chat_history(inputs["chat_history"]) vectordbkwargs = inputs.get("vectordbkwargs", {}) @@ -120,4 +120,7 @@ async def _acall(self, inputs: Dict[str, Any]) -> Dict[str, str]: new_inputs["question"] = new_question new_inputs["chat_history"] = chat_history_str answer, _ = await self.combine_docs_chain.acombine_docs(docs, **new_inputs) - return {self.output_key: answer} + if self.return_source_documents: + return {self.output_key: answer, "source_documents": docs} + else: + return {self.output_key: answer} diff --git a/langchain/chains/constitutional_ai/base.py b/langchain/chains/constitutional_ai/base.py index 0eaccb6deaef8..be78ce3f81963 100644 --- a/langchain/chains/constitutional_ai/base.py +++ b/langchain/chains/constitutional_ai/base.py @@ -16,7 +16,7 @@ class ConstitutionalChain(Chain): .. code-block:: python from langchain.llms import OpenAI - from langchian.chains import LLMChain, ConstitutionalChain + from langchain.chains import LLMChain, ConstitutionalChain qa_prompt = PromptTemplate( template="Q: {question} A:", diff --git a/langchain/chains/sql_database/prompt.py b/langchain/chains/sql_database/prompt.py index 127579e2c0b28..8b0fd1529e5f3 100644 --- a/langchain/chains/sql_database/prompt.py +++ b/langchain/chains/sql_database/prompt.py @@ -2,7 +2,7 @@ from langchain.prompts.base import CommaSeparatedListOutputParser from langchain.prompts.prompt import PromptTemplate -_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies in his question a specific number of examples he wishes to obtain, always limit your query to at most {top_k} results using the LIMIT clause. You can order the results by a relevant column to return the most interesting examples in the database. +_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies in his question a specific number of examples he wishes to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database. Never query for all the columns from a specific table, only ask for a the few relevant columns given the question. diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index 2ca0f1a9b6b77..5de8ae02caaaa 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -7,6 +7,7 @@ from langchain.document_loaders.docx import UnstructuredDocxLoader from langchain.document_loaders.email import UnstructuredEmailLoader from langchain.document_loaders.evernote import EverNoteLoader +from langchain.document_loaders.facebook_chat import FacebookChatLoader from langchain.document_loaders.gcs_directory import GCSDirectoryLoader from langchain.document_loaders.gcs_file import GCSFileLoader from langchain.document_loaders.gitbook import GitbookLoader @@ -15,6 +16,7 @@ from langchain.document_loaders.hn import HNLoader from langchain.document_loaders.html import UnstructuredHTMLLoader from langchain.document_loaders.imsdb import IMSDbLoader +from langchain.document_loaders.notebook import NotebookLoader from langchain.document_loaders.notion import NotionDirectoryLoader from langchain.document_loaders.obsidian import ObsidianLoader from langchain.document_loaders.online_pdf import OnlinePDFLoader @@ -34,6 +36,7 @@ ) from langchain.document_loaders.url import UnstructuredURLLoader from langchain.document_loaders.web_base import WebBaseLoader +from langchain.document_loaders.word_document import UnstructuredWordDocumentLoader from langchain.document_loaders.youtube import YoutubeLoader __all__ = [ @@ -46,6 +49,7 @@ "GoogleDriveLoader", "UnstructuredHTMLLoader", "UnstructuredPowerPointLoader", + "UnstructuredWordDocumentLoader", "UnstructuredPDFLoader", "ObsidianLoader", "UnstructuredDocxLoader", @@ -71,4 +75,6 @@ "PDFMinerLoader", "TelegramChatLoader", "SRTLoader", + "FacebookChatLoader", + "NotebookLoader", ] diff --git a/langchain/document_loaders/facebook_chat.py b/langchain/document_loaders/facebook_chat.py new file mode 100644 index 0000000000000..d2dec9f0c8ed0 --- /dev/null +++ b/langchain/document_loaders/facebook_chat.py @@ -0,0 +1,57 @@ +"""Loader that loads Facebook chat json dump.""" +import datetime +import json +from pathlib import Path +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def concatenate_rows(row: dict) -> str: + """Combine message information in a readable format ready to be used.""" + sender = row["sender_name"] + text = row["content"] + date = datetime.datetime.fromtimestamp(row["timestamp_ms"] / 1000).strftime( + "%Y-%m-%d %H:%M:%S" + ) + return f"{sender} on {date}: {text}\n\n" + + +class FacebookChatLoader(BaseLoader): + """Loader that loads Facebook messages json directory dump.""" + + def __init__(self, path: str): + """Initialize with path.""" + self.file_path = path + + def load(self) -> List[Document]: + """Load documents.""" + try: + import pandas as pd + except ImportError: + raise ValueError( + "pandas is needed for Facebook chat loader, " + "please install with `pip install pandas`" + ) + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + + normalized_messages = pd.json_normalize(d["messages"]) + df_normalized_messages = pd.DataFrame(normalized_messages) + + # Only keep plain text messages + # (no services, nor links, hashtags, code, bold ...) + df_filtered = df_normalized_messages[ + (df_normalized_messages.content.apply(lambda x: type(x) == str)) + ] + + df_filtered = df_filtered[["timestamp_ms", "content", "sender_name"]] + + text = df_filtered.apply(concatenate_rows, axis=1).str.cat(sep="") + + metadata = {"source": str(p)} + + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/notebook.py b/langchain/document_loaders/notebook.py new file mode 100644 index 0000000000000..5db545ffc8c5e --- /dev/null +++ b/langchain/document_loaders/notebook.py @@ -0,0 +1,109 @@ +"""Loader that loads .ipynb notebook files.""" +import json +from pathlib import Path +from typing import Any, List + +import pandas as pd + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def concatenate_cells( + cell: dict, include_outputs: bool, max_output_length: int, traceback: bool +) -> str: + """Combine cells information in a readable format ready to be used.""" + cell_type = cell["cell_type"] + source = cell["source"] + output = cell["outputs"] + + if include_outputs and cell_type == "code" and output: + if "ename" in output[0].keys(): + error_name = output[0]["ename"] + error_value = output[0]["evalue"] + if traceback: + traceback = output[0]["traceback"] + return ( + f"'{cell_type}' cell: '{source}'\n, gives error '{error_name}'," + f" with description '{error_value}'\n" + f"and traceback '{traceback}'\n\n" + ) + else: + return ( + f"'{cell_type}' cell: '{source}'\n, gives error '{error_name}'," + f"with description '{error_value}'\n\n" + ) + elif output[0]["output_type"] == "stream": + output = output[0]["text"] + min_output = min(max_output_length, len(output)) + return ( + f"'{cell_type}' cell: '{source}'\n with " + f"output: '{output[:min_output]}'\n\n" + ) + else: + return f"'{cell_type}' cell: '{source}'\n\n" + + return "" + + +def remove_newlines(x: Any) -> Any: + """Remove recursivelly newlines, no matter the data structure they are stored in.""" + if isinstance(x, str): + return x.replace("\n", "") + elif isinstance(x, list): + return [remove_newlines(elem) for elem in x] + elif isinstance(x, pd.DataFrame): + return x.applymap(remove_newlines) + else: + return x + + +class NotebookLoader(BaseLoader): + """Loader that loads .ipynb notebook files.""" + + def __init__( + self, + path: str, + include_outputs: bool = False, + max_output_length: int = 10, + remove_newline: bool = False, + traceback: bool = False, + ): + """Initialize with path.""" + self.file_path = path + self.include_outputs = include_outputs + self.max_output_length = max_output_length + self.remove_newline = remove_newline + self.traceback = traceback + + def load( + self, + ) -> List[Document]: + """Load documents.""" + try: + import pandas as pd + except ImportError: + raise ValueError( + "pandas is needed for Notebook Loader, " + "please install with `pip install pandas`" + ) + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + + data = pd.json_normalize(d["cells"]) + filtered_data = data[["cell_type", "source", "outputs"]] + if self.remove_newline: + filtered_data = filtered_data.applymap(remove_newlines) + + text = filtered_data.apply( + lambda x: concatenate_cells( + x, self.include_outputs, self.max_output_length, self.traceback + ), + axis=1, + ).str.cat(sep=" ") + + metadata = {"source": str(p)} + + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/word_document.py b/langchain/document_loaders/word_document.py new file mode 100644 index 0000000000000..139f4d30c20de --- /dev/null +++ b/langchain/document_loaders/word_document.py @@ -0,0 +1,43 @@ +"""Loader that loads word documents.""" +import os +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredWordDocumentLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load word documents.""" + + def _get_elements(self) -> List: + from unstructured.__version__ import __version__ as __unstructured_version__ + from unstructured.file_utils.filetype import FileType, detect_filetype + + unstructured_version = tuple( + [int(x) for x in __unstructured_version__.split(".")] + ) + # NOTE(MthwRobinson) - magic will raise an import error if the libmagic + # system dependency isn't installed. If it's not installed, we'll just + # check the file extension + try: + import magic # noqa: F401 + + is_doc = detect_filetype(self.file_path) == FileType.DOC + except ImportError: + _, extension = os.path.splitext(self.file_path) + is_doc = extension == ".doc" + + if is_doc and unstructured_version < (0, 4, 11): + raise ValueError( + f"You are on unstructured version {__unstructured_version__}. " + "Partitioning .doc files is only supported in unstructured>=0.4.11. " + "Please upgrade the unstructured package and try again." + ) + + if is_doc: + from unstructured.partition.doc import partition_doc + + return partition_doc(filename=self.file_path) + else: + from unstructured.partition.docx import partition_docx + + return partition_docx(filename=self.file_path) diff --git a/langchain/document_loaders/youtube.py b/langchain/document_loaders/youtube.py index aa191e321470f..b3e0fd25fd7ad 100644 --- a/langchain/document_loaders/youtube.py +++ b/langchain/document_loaders/youtube.py @@ -10,10 +10,13 @@ class YoutubeLoader(BaseLoader): """Loader that loads Youtube transcripts.""" - def __init__(self, video_id: str, add_video_info: bool = False): + def __init__( + self, video_id: str, add_video_info: bool = False, language: str = "en" + ): """Initialize with YouTube video ID.""" self.video_id = video_id self.add_video_info = add_video_info + self.language = language @classmethod def from_youtube_url(cls, youtube_url: str, **kwargs: Any) -> YoutubeLoader: @@ -39,7 +42,9 @@ def load(self) -> List[Document]: video_info = self._get_video_info() metadata.update(video_info) - transcript_pieces = YouTubeTranscriptApi.get_transcript(self.video_id) + transcript_pieces = YouTubeTranscriptApi.get_transcript( + self.video_id, languages=(self.language,) + ) transcript = " ".join([t["text"].strip(" ") for t in transcript_pieces]) return [Document(page_content=transcript, metadata=metadata)] diff --git a/langchain/embeddings/cohere.py b/langchain/embeddings/cohere.py index de3648a906d77..c6d5055f0db27 100644 --- a/langchain/embeddings/cohere.py +++ b/langchain/embeddings/cohere.py @@ -25,7 +25,7 @@ class CohereEmbeddings(BaseModel, Embeddings): model: str = "large" """Model name to use.""" - truncate: str = "NONE" + truncate: Optional[str] = None """Truncate embeddings that are too long from start or end ("NONE"|"START"|"END")""" cohere_api_key: Optional[str] = None diff --git a/langchain/llms/__init__.py b/langchain/llms/__init__.py index cc38b63d28d99..60482dab744c1 100644 --- a/langchain/llms/__init__.py +++ b/langchain/llms/__init__.py @@ -2,28 +2,38 @@ from typing import Dict, Type from langchain.llms.ai21 import AI21 +from langchain.llms.aleph_alpha import AlephAlpha from langchain.llms.anthropic import Anthropic +from langchain.llms.bananadev import Banana from langchain.llms.base import BaseLLM from langchain.llms.cerebriumai import CerebriumAI from langchain.llms.cohere import Cohere +from langchain.llms.deepinfra import DeepInfra from langchain.llms.forefrontai import ForefrontAI from langchain.llms.gooseai import GooseAI from langchain.llms.huggingface_endpoint import HuggingFaceEndpoint from langchain.llms.huggingface_hub import HuggingFaceHub from langchain.llms.huggingface_pipeline import HuggingFacePipeline +from langchain.llms.modal import Modal from langchain.llms.nlpcloud import NLPCloud from langchain.llms.openai import AzureOpenAI, OpenAI from langchain.llms.petals import Petals from langchain.llms.promptlayer_openai import PromptLayerOpenAI from langchain.llms.self_hosted import SelfHostedPipeline from langchain.llms.self_hosted_hugging_face import SelfHostedHuggingFaceLLM +from langchain.llms.stochasticai import StochasticAI +from langchain.llms.writer import Writer __all__ = [ "Anthropic", + "AlephAlpha", + "Banana", "CerebriumAI", "Cohere", + "DeepInfra", "ForefrontAI", "GooseAI", + "Modal", "NLPCloud", "OpenAI", "Petals", @@ -35,17 +45,23 @@ "SelfHostedPipeline", "SelfHostedHuggingFaceLLM", "PromptLayerOpenAI", + "StochasticAI", + "Writer", ] type_to_cls_dict: Dict[str, Type[BaseLLM]] = { "ai21": AI21, + "aleph_alpha": AlephAlpha, "anthropic": Anthropic, + "bananadev": Banana, "cerebriumai": CerebriumAI, "cohere": Cohere, + "deepinfra": DeepInfra, "forefrontai": ForefrontAI, "gooseai": GooseAI, "huggingface_hub": HuggingFaceHub, "huggingface_endpoint": HuggingFaceEndpoint, + "modal": Modal, "nlpcloud": NLPCloud, "openai": OpenAI, "petals": Petals, @@ -53,4 +69,6 @@ "azure": AzureOpenAI, "self_hosted": SelfHostedPipeline, "self_hosted_hugging_face": SelfHostedHuggingFaceLLM, + "stochasticai": StochasticAI, + "writer": Writer, } diff --git a/langchain/llms/aleph_alpha.py b/langchain/llms/aleph_alpha.py new file mode 100644 index 0000000000000..810a8c5891d60 --- /dev/null +++ b/langchain/llms/aleph_alpha.py @@ -0,0 +1,236 @@ +"""Wrapper around Aleph Alpha APIs.""" +from typing import Any, Dict, List, Optional, Sequence + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + + +class AlephAlpha(LLM, BaseModel): + """Wrapper around Aleph Alpha large language models. + + To use, you should have the ``aleph_alpha_client`` python package installed, and the + environment variable ``ALEPH_ALPHA_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Parameters are explained more in depth here: + https://github.com/Aleph-Alpha/aleph-alpha-client/blob/c14b7dd2b4325c7da0d6a119f6e76385800e097b/aleph_alpha_client/completion.py#L10 + + Example: + .. code-block:: python + + from langchain.llms import AlephAlpha + alpeh_alpha = AlephAlpha(aleph_alpha_api_key="my-api-key") + """ + + client: Any #: :meta private: + model: Optional[str] = "luminous-base" + """Model name to use.""" + + maximum_tokens: int = 64 + """The maximum number of tokens to be generated.""" + + temperature: float = 0.0 + """A non-negative float that tunes the degree of randomness in generation.""" + + top_k: int = 0 + """Number of most likely tokens to consider at each step.""" + + top_p: float = 0.0 + """Total probability mass of tokens to consider at each step.""" + + presence_penalty: float = 0.0 + """Penalizes repeated tokens.""" + + frequency_penalty: float = 0.0 + """Penalizes repeated tokens according to frequency.""" + + repetition_penalties_include_prompt: Optional[bool] = False + """Flag deciding whether presence penalty or frequency penalty are + updated from the prompt.""" + + use_multiplicative_presence_penalty: Optional[bool] = False + """Flag deciding whether presence penalty is applied + multiplicatively (True) or additively (False).""" + + penalty_bias: Optional[str] = None + """Penalty bias for the completion.""" + + penalty_exceptions: Optional[List[str]] = None + """List of strings that may be generated without penalty, + regardless of other penalty settings""" + + penalty_exceptions_include_stop_sequences: Optional[bool] = None + """Should stop_sequences be included in penalty_exceptions.""" + + best_of: Optional[int] = None + """returns the one with the "best of" results + (highest log probability per token) + """ + + n: int = 1 + """How many completions to generate for each prompt.""" + + logit_bias: Optional[Dict[int, float]] = None + """The logit bias allows to influence the likelihood of generating tokens.""" + + log_probs: Optional[int] = None + """Number of top log probabilities to be returned for each generated token.""" + + tokens: Optional[bool] = False + """return tokens of completion.""" + + disable_optimizations: Optional[bool] = False + + minimum_tokens: Optional[int] = 0 + """Generate at least this number of tokens.""" + + echo: bool = False + """Echo the prompt in the completion.""" + + use_multiplicative_frequency_penalty: bool = False + + sequence_penalty: float = 0.0 + + sequence_penalty_min_length: int = 2 + + use_multiplicative_sequence_penalty: bool = False + + completion_bias_inclusion: Optional[Sequence[str]] = None + + completion_bias_inclusion_first_token_only: bool = False + + completion_bias_exclusion: Optional[Sequence[str]] = None + + completion_bias_exclusion_first_token_only: bool = False + """Only consider the first token for the completion_bias_exclusion.""" + + contextual_control_threshold: Optional[float] = None + """If set to None, attention control parameters only apply to those tokens that have + explicitly been set in the request. + If set to a non-None value, control parameters are also applied to similar tokens. + """ + + control_log_additive: Optional[bool] = True + """True: apply control by adding the log(control_factor) to attention scores. + False: (attention_scores - - attention_scores.min(-1)) * control_factor + """ + + repetition_penalties_include_completion: bool = True + """Flag deciding whether presence penalty or frequency penalty + are updated from the completion.""" + + raw_completion: bool = False + """Force the raw completion of the model to be returned.""" + + aleph_alpha_api_key: Optional[str] = None + """API key for Aleph Alpha API.""" + + stop_sequences: Optional[List[str]] = None + """Stop sequences to use.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + aleph_alpha_api_key = get_from_dict_or_env( + values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY" + ) + try: + import aleph_alpha_client + + values["client"] = aleph_alpha_client.Client(token=aleph_alpha_api_key) + except ImportError: + raise ValueError( + "Could not import aleph_alpha_client python package. " + "Please it install it with `pip install aleph_alpha_client`." + ) + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling the Aleph Alpha API.""" + return { + "maximum_tokens": self.maximum_tokens, + "temperature": self.temperature, + "top_k": self.top_k, + "top_p": self.top_p, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "n": self.n, + "repetition_penalties_include_prompt": self.repetition_penalties_include_prompt, # noqa: E501 + "use_multiplicative_presence_penalty": self.use_multiplicative_presence_penalty, # noqa: E501 + "penalty_bias": self.penalty_bias, + "penalty_exceptions": self.penalty_exceptions, + "penalty_exceptions_include_stop_sequences": self.penalty_exceptions_include_stop_sequences, # noqa: E501 + "best_of": self.best_of, + "logit_bias": self.logit_bias, + "log_probs": self.log_probs, + "tokens": self.tokens, + "disable_optimizations": self.disable_optimizations, + "minimum_tokens": self.minimum_tokens, + "echo": self.echo, + "use_multiplicative_frequency_penalty": self.use_multiplicative_frequency_penalty, # noqa: E501 + "sequence_penalty": self.sequence_penalty, + "sequence_penalty_min_length": self.sequence_penalty_min_length, + "use_multiplicative_sequence_penalty": self.use_multiplicative_sequence_penalty, # noqa: E501 + "completion_bias_inclusion": self.completion_bias_inclusion, + "completion_bias_inclusion_first_token_only": self.completion_bias_inclusion_first_token_only, # noqa: E501 + "completion_bias_exclusion": self.completion_bias_exclusion, + "completion_bias_exclusion_first_token_only": self.completion_bias_exclusion_first_token_only, # noqa: E501 + "contextual_control_threshold": self.contextual_control_threshold, + "control_log_additive": self.control_log_additive, + "repetition_penalties_include_completion": self.repetition_penalties_include_completion, # noqa: E501 + "raw_completion": self.raw_completion, + } + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return {**{"model": self.model}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "alpeh_alpha" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to Aleph Alpha's completion endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = alpeh_alpha("Tell me a joke.") + """ + from aleph_alpha_client import CompletionRequest, Prompt + + params = self._default_params + if self.stop_sequences is not None and stop is not None: + raise ValueError( + "stop sequences found in both the input and default params." + ) + elif self.stop_sequences is not None: + params["stop_sequences"] = self.stop_sequences + else: + params["stop_sequences"] = stop + request = CompletionRequest(prompt=Prompt.from_text(prompt), **params) + response = self.client.complete(model=self.model, request=request) + text = response.completions[0].completion + # If stop tokens are provided, Aleph Alpha's endpoint returns them. + # In order to make this consistent with other endpoints, we strip them. + if stop is not None or self.stop_sequences is not None: + text = enforce_stop_tokens(text, params["stop_sequences"]) + return text diff --git a/langchain/llms/anthropic.py b/langchain/llms/anthropic.py index f2e1df4d5d1d4..a5c57a94c6dac 100644 --- a/langchain/llms/anthropic.py +++ b/langchain/llms/anthropic.py @@ -18,7 +18,7 @@ class Anthropic(LLM, BaseModel): Example: .. code-block:: python import anthropic - from langchain import Anthropic + from langchain.llms import Anthropic model = Anthropic(model="", anthropic_api_key="my-api-key") # Simplest invocation, automatically wrapped with HUMAN_PROMPT diff --git a/langchain/llms/bananadev.py b/langchain/llms/bananadev.py new file mode 100644 index 0000000000000..03d336c28d69e --- /dev/null +++ b/langchain/llms/bananadev.py @@ -0,0 +1,112 @@ +"""Wrapper around Banana API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class Banana(LLM, BaseModel): + """Wrapper around Banana large language models. + + To use, you should have the ``banana-dev`` python package installed, + and the environment variable ``BANANA_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import Banana + banana = Banana(model_key="") + """ + + model_key: str = "" + """model endpoint to use""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + banana_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + banana_api_key = get_from_dict_or_env( + values, "banana_api_key", "BANANA_API_KEY" + ) + values["banana_api_key"] = banana_api_key + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"model_key": self.model_key}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "banana" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call to Banana endpoint.""" + try: + import banana_dev as banana + except ImportError: + raise ValueError( + "Could not import banana-dev python package. " + "Please install it with `pip install banana-dev`." + ) + params = self.model_kwargs or {} + api_key = self.banana_api_key + model_key = self.model_key + model_inputs = { + # a json specific to your model. + "prompt": prompt, + **params, + } + response = banana.run(api_key, model_key, model_inputs) + try: + text = response["modelOutputs"][0]["output"] + except KeyError: + raise ValueError( + f"Response should be {'modelOutputs': [{'output': 'text'}]}." + f"Response was: {response}" + ) + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/cerebriumai.py b/langchain/llms/cerebriumai.py index ca219be1b7eef..29f0d2fc21988 100644 --- a/langchain/llms/cerebriumai.py +++ b/langchain/llms/cerebriumai.py @@ -22,7 +22,7 @@ class CerebriumAI(LLM, BaseModel): Example: .. code-block:: python - from langchain import CerebriumAI + from langchain.llms import CerebriumAI cerebrium = CerebriumAI(endpoint_url="") """ diff --git a/langchain/llms/cohere.py b/langchain/llms/cohere.py index adb50ad3cd8cb..66bff40eb89a6 100644 --- a/langchain/llms/cohere.py +++ b/langchain/llms/cohere.py @@ -21,7 +21,7 @@ class Cohere(LLM, BaseModel): Example: .. code-block:: python - from langchain import Cohere + from langchain.llms import Cohere cohere = Cohere(model="gptd-instruct-tft", cohere_api_key="my-api-key") """ @@ -47,6 +47,10 @@ class Cohere(LLM, BaseModel): presence_penalty: int = 0 """Penalizes repeated tokens.""" + truncate: Optional[str] = None + """Specify how the client handles inputs longer than the maximum token + length: Truncate from START, END or NONE""" + cohere_api_key: Optional[str] = None stop: Optional[List[str]] = None @@ -83,6 +87,7 @@ def _default_params(self) -> Dict[str, Any]: "p": self.p, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, + "truncate": self.truncate, } @property diff --git a/langchain/llms/deepinfra.py b/langchain/llms/deepinfra.py new file mode 100644 index 0000000000000..8993a4bf3b920 --- /dev/null +++ b/langchain/llms/deepinfra.py @@ -0,0 +1,97 @@ +"""Wrapper around DeepInfra APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +DEFAULT_MODEL_ID = "google/flan-t5-xl" + + +class DeepInfra(LLM, BaseModel): + """Wrapper around DeepInfra deployed models. + + To use, you should have the ``requests`` python package installed, and the + environment variable ``DEEPINFRA_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Only supports `text-generation` and `text2text-generation` for now. + + Example: + .. code-block:: python + + from langchain.llms import DeepInfra + di = DeepInfra(model_id="google/flan-t5-xl", + deepinfra_api_token="my-api-key") + """ + + model_id: str = DEFAULT_MODEL_ID + model_kwargs: Optional[dict] = None + + deepinfra_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + deepinfra_api_token = get_from_dict_or_env( + values, "deepinfra_api_token", "DEEPINFRA_API_TOKEN" + ) + values["deepinfra_api_token"] = deepinfra_api_token + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"model_id": self.model_id}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "deepinfra" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to DeepInfra's inference API endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = di("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + + res = requests.post( + f"https://api.deepinfra.com/v1/inference/{self.model_id}", + headers={ + "Authorization": f"bearer {self.deepinfra_api_token}", + "Content-Type": "application/json", + }, + json={"input": prompt, **_model_kwargs}, + ) + + if res.status_code != 200: + raise ValueError("Error raised by inference API") + text = res.json()[0]["generated_text"] + + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/forefrontai.py b/langchain/llms/forefrontai.py index f76003b6fc0fc..806bcd85454ac 100644 --- a/langchain/llms/forefrontai.py +++ b/langchain/llms/forefrontai.py @@ -18,7 +18,7 @@ class ForefrontAI(LLM, BaseModel): Example: .. code-block:: python - from langchain import ForefrontAI + from langchain.llms import ForefrontAI forefrontai = ForefrontAI(endpoint_url="") """ diff --git a/langchain/llms/gooseai.py b/langchain/llms/gooseai.py index 891d511f5e7c7..89f17f18d326b 100644 --- a/langchain/llms/gooseai.py +++ b/langchain/llms/gooseai.py @@ -21,7 +21,7 @@ class GooseAI(LLM, BaseModel): Example: .. code-block:: python - from langchain import GooseAI + from langchain.llms import GooseAI gooseai = GooseAI(model_name="gpt-neo-20b") """ diff --git a/langchain/llms/huggingface_hub.py b/langchain/llms/huggingface_hub.py index ef53275ddd855..b9c4098879a98 100644 --- a/langchain/llms/huggingface_hub.py +++ b/langchain/llms/huggingface_hub.py @@ -23,7 +23,7 @@ class HuggingFaceHub(LLM, BaseModel): Example: .. code-block:: python - from langchain import HuggingFaceHub + from langchain.llms import HuggingFaceHub hf = HuggingFaceHub(repo_id="gpt2", huggingfacehub_api_token="my-api-key") """ diff --git a/langchain/llms/huggingface_pipeline.py b/langchain/llms/huggingface_pipeline.py index ee5678c6fc687..1138839cf6306 100644 --- a/langchain/llms/huggingface_pipeline.py +++ b/langchain/llms/huggingface_pipeline.py @@ -25,14 +25,14 @@ class HuggingFacePipeline(LLM, BaseModel): Example using from_model_id: .. code-block:: python - from langchain.llms.huggingface_pipeline import HuggingFacePipeline + from langchain.llms import HuggingFacePipeline hf = HuggingFacePipeline.from_model_id( model_id="gpt2", task="text-generation" ) Example passing pipeline in directly: .. code-block:: python - from langchain.llms.huggingface_pipeline import HuggingFacePipeline + from langchain.llms import HuggingFacePipeline from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline model_id = "gpt2" diff --git a/langchain/llms/modal.py b/langchain/llms/modal.py new file mode 100644 index 0000000000000..5037858a31e19 --- /dev/null +++ b/langchain/llms/modal.py @@ -0,0 +1,92 @@ +"""Wrapper around Modal API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + +logger = logging.getLogger(__name__) + + +class Modal(LLM, BaseModel): + """Wrapper around Modal large language models. + + To use, you should have the ``modal-client`` python package installed. + + Any parameters that are valid to be passed to the call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import Modal + modal = Modal(endpoint_url="") + + """ + + endpoint_url: str = "" + """model endpoint to use""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"endpoint_url": self.endpoint_url}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "modal" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call to Modal endpoint.""" + params = self.model_kwargs or {} + response = requests.post( + url=self.endpoint_url, + headers={ + "Content-Type": "application/json", + }, + json={"prompt": prompt, **params}, + ) + try: + if prompt in response.json()["prompt"]: + response_json = response.json() + except KeyError: + raise ValueError("LangChain requires 'prompt' key in response.") + text = response_json["prompt"] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/nlpcloud.py b/langchain/llms/nlpcloud.py index 94f0df7de0519..2c04c41960d48 100644 --- a/langchain/llms/nlpcloud.py +++ b/langchain/llms/nlpcloud.py @@ -16,7 +16,7 @@ class NLPCloud(LLM, BaseModel): Example: .. code-block:: python - from langchain import NLPCloud + from langchain.llms import NLPCloud nlpcloud = NLPCloud(model="gpt-neox-20b") """ diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index 973d35ddc16bd..728d50873acf1 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -75,7 +75,7 @@ class BaseOpenAI(BaseLLM, BaseModel): Example: .. code-block:: python - from langchain import OpenAI + from langchain.llms import OpenAI openai = OpenAI(model_name="text-davinci-003") """ @@ -251,7 +251,9 @@ def _generate( prompt=_prompts, **params ): self.callback_manager.on_llm_new_token( - stream_resp["choices"][0]["text"], verbose=self.verbose + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], ) _update_response(response, stream_resp) choices.extend(response["choices"]) @@ -285,11 +287,15 @@ async def _agenerate( ): if self.callback_manager.is_async: await self.callback_manager.on_llm_new_token( - stream_resp["choices"][0]["text"], verbose=self.verbose + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], ) else: self.callback_manager.on_llm_new_token( - stream_resp["choices"][0]["text"], verbose=self.verbose + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], ) _update_response(response, stream_resp) choices.extend(response["choices"]) diff --git a/langchain/llms/petals.py b/langchain/llms/petals.py index 535d5d1b1d423..bffe59ba817a3 100644 --- a/langchain/llms/petals.py +++ b/langchain/llms/petals.py @@ -22,7 +22,7 @@ class Petals(LLM, BaseModel): Example: .. code-block:: python - from langchain import petals + from langchain.llms import petals petals = Petals() """ diff --git a/langchain/llms/promptlayer_openai.py b/langchain/llms/promptlayer_openai.py index 23cba853df385..2704218e4e66f 100644 --- a/langchain/llms/promptlayer_openai.py +++ b/langchain/llms/promptlayer_openai.py @@ -23,7 +23,7 @@ class PromptLayerOpenAI(OpenAI, BaseModel): Example: .. code-block:: python - from langchain import OpenAI + from langchain.llms import OpenAI openai = OpenAI(model_name="text-davinci-003") """ diff --git a/langchain/llms/stochasticai.py b/langchain/llms/stochasticai.py new file mode 100644 index 0000000000000..21c32b2167402 --- /dev/null +++ b/langchain/llms/stochasticai.py @@ -0,0 +1,130 @@ +"""Wrapper around StochasticAI APIs.""" +import logging +import time +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class StochasticAI(LLM, BaseModel): + """Wrapper around StochasticAI large language models. + + To use, you should have the environment variable ``STOCHASTICAI_API_KEY`` + set with your API key. + + Example: + .. code-block:: python + + from langchain.llms import StochasticAI + stochasticai = StochasticAI(api_url="") + """ + + api_url: str = "" + """Model name to use.""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + stochasticai_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + stochasticai_api_key = get_from_dict_or_env( + values, "stochasticai_api_key", "STOCHASTICAI_API_KEY" + ) + values["stochasticai_api_key"] = stochasticai_api_key + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"endpoint_url": self.api_url}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "stochasticai" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to StochasticAI's complete endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = StochasticAI("Tell me a joke.") + """ + params = self.model_kwargs or {} + response_post = requests.post( + url=self.api_url, + json={"prompt": prompt, "params": params}, + headers={ + "apiKey": f"{self.stochasticai_api_key}", + "Accept": "application/json", + "Content-Type": "application/json", + }, + ) + response_post.raise_for_status() + response_post_json = response_post.json() + completed = False + while not completed: + response_get = requests.get( + url=response_post_json["data"]["responseUrl"], + headers={ + "apiKey": f"{self.stochasticai_api_key}", + "Accept": "application/json", + "Content-Type": "application/json", + }, + ) + response_get.raise_for_status() + response_get_json = response_get.json()["data"] + text = response_get_json.get("completion") + completed = text is not None + time.sleep(0.5) + text = text[0] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/writer.py b/langchain/llms/writer.py new file mode 100644 index 0000000000000..7959bac6f33f5 --- /dev/null +++ b/langchain/llms/writer.py @@ -0,0 +1,155 @@ +"""Wrapper around Writer APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + + +class Writer(LLM, BaseModel): + """Wrapper around Writer large language models. + + To use, you should have the environment variable ``WRITER_API_KEY`` + set with your API key. + + Example: + .. code-block:: python + + from langchain import Writer + writer = Writer(model_id="palmyra-base") + """ + + model_id: str = "palmyra-base" + """Model name to use.""" + + tokens_to_generate: int = 24 + """Max number of tokens to generate.""" + + logprobs: bool = False + """Whether to return log probabilities.""" + + temperature: float = 1.0 + """What sampling temperature to use.""" + + length: int = 256 + """The maximum number of tokens to generate in the completion.""" + + top_p: float = 1.0 + """Total probability mass of tokens to consider at each step.""" + + top_k: int = 1 + """The number of highest probability vocabulary tokens to + keep for top-k-filtering.""" + + repetition_penalty: float = 1.0 + """Penalizes repeated tokens according to frequency.""" + + random_seed: int = 0 + """The model generates random results. + Changing the random seed alone will produce a different response + with similar characteristics. It is possible to reproduce results + by fixing the random seed (assuming all other hyperparameters + are also fixed)""" + + beam_search_diversity_rate: float = 1.0 + """Only applies to beam search, i.e. when the beam width is >1. + A higher value encourages beam search to return a more diverse + set of candidates""" + + beam_width: Optional[int] = None + """The number of concurrent candidates to keep track of during + beam search""" + + length_pentaly: float = 1.0 + """Only applies to beam search, i.e. when the beam width is >1. + Larger values penalize long candidates more heavily, thus preferring + shorter candidates""" + + writer_api_key: Optional[str] = None + + stop: Optional[List[str]] = None + """Sequences when completion generation will stop""" + + base_url: Optional[str] = None + """Base url to use, if None decides based on model name.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + writer_api_key = get_from_dict_or_env( + values, "writer_api_key", "WRITER_API_KEY" + ) + values["writer_api_key"] = writer_api_key + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling Writer API.""" + return { + "tokens_to_generate": self.tokens_to_generate, + "stop": self.stop, + "logprobs": self.logprobs, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": self.top_k, + "repetition_penalty": self.repetition_penalty, + "random_seed": self.random_seed, + "beam_search_diversity_rate": self.beam_search_diversity_rate, + "beam_width": self.beam_width, + "length_pentaly": self.length_pentaly, + } + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_id": self.model_id}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "writer" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to Writer's complete endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = Writer("Tell me a joke.") + """ + if self.base_url is not None: + base_url = self.base_url + else: + base_url = ( + "https://api.llm.writer.com/v1/models/{self.model_id}/completions" + ) + response = requests.post( + url=base_url, + headers={ + "Authorization": f"Bearer {self.writer_api_key}", + "Content-Type": "application/json", + "Accept": "application/json", + }, + json={"prompt": prompt, **self._default_params}, + ) + text = response.text + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/sql_database.py b/langchain/sql_database.py index 2abca30c71141..3cb96dea87bf9 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -94,6 +94,7 @@ def get_table_info(self, table_names: Optional[List[str]] = None) -> str: tbl for tbl in self._metadata.sorted_tables if tbl.name in set(all_table_names) + and not (self.dialect == "sqlite" and tbl.name.startswith("sqlite_")) ] tables = [] diff --git a/langchain/text_splitter.py b/langchain/text_splitter.py index 76f82f14e77c6..6012b8389ac17 100644 --- a/langchain/text_splitter.py +++ b/langchain/text_splitter.py @@ -250,7 +250,7 @@ def split_text(self, text: str) -> List[str]: # Now go merging things, recursively splitting longer texts. _good_splits = [] for s in splits: - if len(s) < self._chunk_size: + if self._length_function(s) < self._chunk_size: _good_splits.append(s) else: if _good_splits: diff --git a/langchain/tools/__init__.py b/langchain/tools/__init__.py index 31d0c58bf077b..fcacd361367f7 100644 --- a/langchain/tools/__init__.py +++ b/langchain/tools/__init__.py @@ -1,5 +1,6 @@ """Core toolkit implementations.""" from langchain.tools.base import BaseTool +from langchain.tools.ifttt import IFTTTWebhook -__all__ = ["BaseTool"] +__all__ = ["BaseTool", "IFTTTWebhook"] diff --git a/langchain/tools/ifttt.py b/langchain/tools/ifttt.py new file mode 100644 index 0000000000000..8d3d943af0c59 --- /dev/null +++ b/langchain/tools/ifttt.py @@ -0,0 +1,57 @@ +"""From https://github.com/SidU/teams-langchain-js/wiki/Connecting-IFTTT-Services. + +# Creating a webhook +- Go to https://ifttt.com/create + +# Configuring the "If This" +- Click on the "If This" button in the IFTTT interface. +- Search for "Webhooks" in the search bar. +- Choose the first option for "Receive a web request with a JSON payload." +- Choose an Event Name that is specific to the service you plan to connect to. +This will make it easier for you to manage the webhook URL. +For example, if you're connecting to Spotify, you could use "Spotify" as your +Event Name. +- Click the "Create Trigger" button to save your settings and create your webhook. + +# Configuring the "Then That" +- Tap on the "Then That" button in the IFTTT interface. +- Search for the service you want to connect, such as Spotify. +- Choose an action from the service, such as "Add track to a playlist". +- Configure the action by specifying the necessary details, such as the playlist name, +e.g., "Songs from AI". +- Reference the JSON Payload received by the Webhook in your action. For the Spotify +scenario, choose "{{JsonPayload}}" as your search query. +- Tap the "Create Action" button to save your action settings. +- Once you have finished configuring your action, click the "Finish" button to +complete the setup. +- Congratulations! You have successfully connected the Webhook to the desired +service, and you're ready to start receiving data and triggering actions 🎉 + +# Finishing up +- To get your webhook URL go to https://ifttt.com/maker_webhooks/settings +- Copy the IFTTT key value from there. The URL is of the form +https://maker.ifttt.com/use/YOUR_IFTTT_KEY. Grab the YOUR_IFTTT_KEY value. +""" +import requests + +from langchain.tools.base import BaseTool + + +class IFTTTWebhook(BaseTool): + """IFTTT Webhook. + + Args: + name: name of the tool + description: description of the tool + url: url to hit with the json event. + """ + + url: str + + def _run(self, tool_input: str) -> str: + body = {"this": tool_input} + response = requests.post(self.url, data=body) + return response.text + + async def _arun(self, tool_input: str) -> str: + raise NotImplementedError("Not implemented.") diff --git a/langchain/utilities/google_search.py b/langchain/utilities/google_search.py index 95ded0d688751..9b1ec5b6fd86c 100644 --- a/langchain/utilities/google_search.py +++ b/langchain/utilities/google_search.py @@ -86,7 +86,6 @@ def validate_environment(cls, values: Dict) -> Dict: service = build("customsearch", "v1", developerKey=google_api_key) values["search_engine"] = service - # TODO: Add error handling if keys are missing return values def run(self, query: str) -> str: diff --git a/langchain/utilities/loading.py b/langchain/utilities/loading.py index 6b70318d450aa..45569e4a1dd85 100644 --- a/langchain/utilities/loading.py +++ b/langchain/utilities/loading.py @@ -1,4 +1,4 @@ -"""Utilities for loading configurations from langchian-hub.""" +"""Utilities for loading configurations from langchain-hub.""" import os import re diff --git a/langchain/utilities/searx_search.py b/langchain/utilities/searx_search.py index e0814b2f7621e..d8ba5f6e80322 100644 --- a/langchain/utilities/searx_search.py +++ b/langchain/utilities/searx_search.py @@ -78,6 +78,28 @@ # or even: s = SearxSearchWrapper("langchain library !gh") + +In some situations you might want to pass an extra string to the search query. +For example when the `run()` method is called by an agent. The search suffix can +also be used as a way to pass extra parameters to searx or the underlying search +engines. + + .. code-block:: python + + # select the github engine and pass the search suffix + s = SearchWrapper("langchain library", query_suffix="!gh") + + + s = SearchWrapper("langchain library") + # select github the conventional google search syntax + s.run("large language models", query_suffix="site:github.com") + + +*NOTE*: A search suffix can be defined on both the instance and the method level. +The resulting query will be the concatenation of the two with the former taking +precedence. + + See `SearxNG Configured Engines `_ and `SearxNG Search Syntax `_ @@ -128,12 +150,15 @@ def __str__(self) -> str: @property def results(self) -> Any: - """Silence mypy for accessing this field.""" + """Silence mypy for accessing this field. + + :meta private: + """ return self.get("results") @property def answers(self) -> Any: - """Accessor helper on the json result.""" + """Helper accessor on the json result.""" return self.get("answers") @@ -171,6 +196,7 @@ class SearxSearchWrapper(BaseModel): params: dict = Field(default_factory=_get_default_params) headers: Optional[dict] = None engines: Optional[List[str]] = [] + query_suffix: Optional[str] = "" k: int = 10 @validator("unsecure") @@ -232,13 +258,20 @@ def _searx_api_query(self, params: dict) -> SearxResults: self._result = res return res - def run(self, query: str, engines: List[str] = [], **kwargs: Any) -> str: + def run( + self, + query: str, + engines: Optional[List[str]] = None, + query_suffix: Optional[str] = "", + **kwargs: Any, + ) -> str: """Run query through Searx API and parse results. You can pass any other params to the searx query API. Args: query: The query to search for. + query_suffix: Extra suffix appended to the query. engines: List of engines to use for the query. **kwargs: extra parameters to pass to the searx API. @@ -251,12 +284,21 @@ def run(self, query: str, engines: List[str] = [], **kwargs: Any) -> str: searx = SearxSearchWrapper(searx_host="http://my.searx.host") searx.run("what is the weather in France ?", engine="qwant") + # the same result can be achieved using the `!` syntax of searx + # to select the engine using `query_suffix` + searx.run("what is the weather in France ?", query_suffix="!qwant") """ _params = { "q": query, } params = {**self.params, **_params, **kwargs} + if self.query_suffix and len(self.query_suffix) > 0: + params["q"] += " " + self.query_suffix + + if isinstance(query_suffix, str) and len(query_suffix) > 0: + params["q"] += " " + query_suffix + if isinstance(engines, list) and len(engines) > 0: params["engines"] = ",".join(engines) @@ -274,13 +316,20 @@ def run(self, query: str, engines: List[str] = [], **kwargs: Any) -> str: return toret def results( - self, query: str, num_results: int, engines: List[str] = [], **kwargs: Any + self, + query: str, + num_results: int, + engines: Optional[List[str]] = None, + query_suffix: Optional[str] = "", + **kwargs: Any, ) -> List[Dict]: """Run query through Searx API and returns the results with metadata. Args: query: The query to search for. + query_suffix: Extra suffix appended to the query. + num_results: Limit the number of results to return. engines: List of engines to use for the query. @@ -308,6 +357,10 @@ def results( "q": query, } params = {**self.params, **_params, **kwargs} + if self.query_suffix and len(self.query_suffix) > 0: + params["q"] += " " + self.query_suffix + if isinstance(query_suffix, str) and len(query_suffix) > 0: + params["q"] += " " + query_suffix if isinstance(engines, list) and len(engines) > 0: params["engines"] = ",".join(engines) results = self._searx_api_query(params).results[:num_results] diff --git a/langchain/utilities/wolfram_alpha.py b/langchain/utilities/wolfram_alpha.py index d64ce410e75da..a27aec051f40b 100644 --- a/langchain/utilities/wolfram_alpha.py +++ b/langchain/utilities/wolfram_alpha.py @@ -43,10 +43,8 @@ def validate_environment(cls, values: Dict) -> Dict: "Please install it with `pip install wolframalpha`" ) client = wolframalpha.Client(wolfram_alpha_appid) - values["wolfram_client"] = client - # TODO: Add error handling if keys are missing return values def run(self, query: str) -> str: diff --git a/poetry.lock b/poetry.lock index d35ba0db734f5..7e959dd421219 100644 --- a/poetry.lock +++ b/poetry.lock @@ -12,6 +12,21 @@ files = [ {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, ] +[[package]] +name = "aiodns" +version = "3.0.0" +description = "Simple DNS resolver for asyncio" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "aiodns-3.0.0-py3-none-any.whl", hash = "sha256:2b19bc5f97e5c936638d28e665923c093d8af2bf3aa88d35c43417fa25d136a2"}, + {file = "aiodns-3.0.0.tar.gz", hash = "sha256:946bdfabe743fceeeb093c8a010f5d1645f708a241be849e17edfb0e49e08cd6"}, +] + +[package.dependencies] +pycares = ">=4.0.0" + [[package]] name = "aiohttp" version = "3.8.3" @@ -121,6 +136,21 @@ yarl = ">=1.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns", "cchardet"] +[[package]] +name = "aiohttp-retry" +version = "2.8.3" +description = "Simple retry client for aiohttp" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiohttp_retry-2.8.3-py3-none-any.whl", hash = "sha256:3aeeead8f6afe48272db93ced9440cf4eda8b6fd7ee2abb25357b7eb28525b45"}, + {file = "aiohttp_retry-2.8.3.tar.gz", hash = "sha256:9a8e637e31682ad36e1ff9f8bcba912fcfc7d7041722bc901a4b948da4d71ea9"}, +] + +[package.dependencies] +aiohttp = "*" + [[package]] name = "aiosignal" version = "1.3.1" @@ -148,6 +178,32 @@ files = [ {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"}, ] +[[package]] +name = "aleph-alpha-client" +version = "2.15.0" +description = "python client to interact with Aleph Alpha api endpoints" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "aleph-alpha-client-2.15.0.tar.gz", hash = "sha256:b50329572c319fbf0d17fab3e34a46c15f164931f40ff6eb2379c8276d4728fa"}, + {file = "aleph_alpha_client-2.15.0-py3-none-any.whl", hash = "sha256:1cc0e17cd5f4a578f10e3cc22379f9e6765fed112108bcd84d5e3a77a094c58f"}, +] + +[package.dependencies] +aiodns = ">=3.0.0" +aiohttp = ">=3.8.3" +aiohttp-retry = ">=2.8.3" +requests = ">=2.28" +tokenizers = ">=0.13.2" +urllib3 = ">=1.26" + +[package.extras] +dev = ["black", "ipykernel", "mypy", "nbconvert", "pytest", "pytest-aiohttp", "pytest-cov", "pytest-dotenv", "pytest-httpserver", "types-requests"] +docs = ["sphinx", "sphinx-rtd-theme"] +test = ["pytest", "pytest-aiohttp", "pytest-cov", "pytest-dotenv", "pytest-httpserver"] +types = ["mypy", "types-requests"] + [[package]] name = "anthropic" version = "0.2.2" @@ -4066,6 +4122,74 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.5.0" +[[package]] +name = "pycares" +version = "4.3.0" +description = "Python interface for c-ares" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pycares-4.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:19c9cdd3322d422931982939773e453e491dfc5c0b2e23d7266959315c7a0824"}, + {file = "pycares-4.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e56e9cdf46a092970dc4b75bbabddea9f480be5eeadc3fcae3eb5c6807c4136"}, + {file = "pycares-4.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c75a6241c79b935048272cb77df498da64b8defc8c4b29fdf9870e43ba4cbb4"}, + {file = "pycares-4.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24d8654fac3742791b8bef59d1fbb3e19ae6a5c48876a6d98659f7c66ee546c4"}, + {file = "pycares-4.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebf50b049a245880f1aa16a6f72c4408e0a65b49ea1d3bf13383a44a2cabd2bf"}, + {file = "pycares-4.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:84daf560962763c0359fd79c750ef480f0fda40c08b57765088dbe362e8dc452"}, + {file = "pycares-4.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:978d10da7ee74b9979c494afa8b646411119ad0186a29c7f13c72bb4295630c6"}, + {file = "pycares-4.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c5b9d7fe52eb3d243f5ead58d5c0011884226d961df8360a34618c38c7515"}, + {file = "pycares-4.3.0-cp310-cp310-win32.whl", hash = "sha256:da7c7089ae617317d2cbe38baefd3821387b3bfef7b3ee5b797b871cb1257974"}, + {file = "pycares-4.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7106dc683db30e1d851283b7b9df7a5ea4964d6bdd000d918d91d4b1f9bed329"}, + {file = "pycares-4.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4e7a24ecef0b1933f2a3fdbf328d1b529a76cda113f8364fa0742e5b3bd76566"}, + {file = "pycares-4.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7abccc2aa4771c06994e4d9ed596453061e2b8846f887d9c98a64ccdaf4790a"}, + {file = "pycares-4.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:531fed46c5ed798a914c3207be4ae7b297c4d09e4183d3cf8fd9ee59a55d5080"}, + {file = "pycares-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c9335175af0c64a1e0ba67bdd349eb62d4eea0ad02c235ccdf0d535fd20f323"}, + {file = "pycares-4.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5f0e95535027d2dcd51e780410632b0d3ed7e9e5ceb25dc0fe937f2c2960079"}, + {file = "pycares-4.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3692179ce5fb96908ba342e1e5303608d0c976f0d5d4619fa9d3d6d9d5a9a1b4"}, + {file = "pycares-4.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c4cb6cc7fe8e0606d30b60367f59fe26d1472e88555d61e202db70dea5c8edb"}, + {file = "pycares-4.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3215445396c74103e2054e6b349d9e85883ceda2006d0039fc2d58c9b11818a2"}, + {file = "pycares-4.3.0-cp311-cp311-win32.whl", hash = "sha256:6a0c0c3a0adf490bba9dbb37dbd07ec81e4a6584f095036ac34f06a633710ffe"}, + {file = "pycares-4.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:995cb37cc39bd40ca87bb16555a0f7724f3be30d9f9059a4caab2fde45b1b903"}, + {file = "pycares-4.3.0-cp36-cp36m-win32.whl", hash = "sha256:4c9187be72449c975c11daa1d94d7ddcc494f8a4c37a6c18f977cd7024a531d9"}, + {file = "pycares-4.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d7405ba10a2903a58b8b0faedcb54994c9ee002ad01963587fabf93e7e479783"}, + {file = "pycares-4.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:40aaa12081495f879f11f4cfc95edfec1ea14711188563102f9e33fe98728fac"}, + {file = "pycares-4.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4972cac24b66c5997f3a3e2cb608e408066d80103d443e36d626a88a287b9ae7"}, + {file = "pycares-4.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35886dba7aa5b73affca8729aeb5a1f5e94d3d9a764adb1b7e75bafca44eeca5"}, + {file = "pycares-4.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5cea6e1f3be016f155d60f27f16c1074d58b4d6e123228fdbc3326d076016af8"}, + {file = "pycares-4.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3a9fd2665b053afb39226ac6f8137a60910ca7729358456df2fb94866f4297de"}, + {file = "pycares-4.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e8e9195f869120e44e0aa0a6098bb5c19947f4753054365891f592e6f9eab3ef"}, + {file = "pycares-4.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:674486ecf2afb25ee219171b07cdaba481a1aaa2dabb155779c7be9ded03eaa9"}, + {file = "pycares-4.3.0-cp37-cp37m-win32.whl", hash = "sha256:1b6cd3161851499b6894d1e23bfd633e7b775472f5af35ae35409c4a47a2d45e"}, + {file = "pycares-4.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:710120c97b9afdba443564350c3f5f72fd9aae74d95b73dc062ca8ac3d7f36d7"}, + {file = "pycares-4.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9103649bd29d84bc6bcfaf09def9c0592bbc766018fad19d76d09989608b915d"}, + {file = "pycares-4.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c072dbaf73cb5434279578dc35322867d8d5df053e14fdcdcc589994ba4804ae"}, + {file = "pycares-4.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008531733f9c7a976b59c7760a3672b191159fd69ae76c01ca051f20b5e44164"}, + {file = "pycares-4.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2aae02d97d77dcff840ab55f86cb8b99bf644acbca17e1edb7048408b9782088"}, + {file = "pycares-4.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:257953ae6d400a934fd9193aeb20990ac84a78648bdf5978e998bd007a4045cd"}, + {file = "pycares-4.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c28d481efae26936ec08cb6beea305f4b145503b152cf2c4dc68cc4ad9644f0e"}, + {file = "pycares-4.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:976249b39037dbfb709ccf7e1c40d2785905a0065536385d501b94570cfed96d"}, + {file = "pycares-4.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:98568c30cfab6b327d94ae1acdf85bbba4cffd415980804985d34ca07e6f4791"}, + {file = "pycares-4.3.0-cp38-cp38-win32.whl", hash = "sha256:a2f3c4f49f43162f7e684419d9834c2c8ec165e54cb8dc47aa9dc0c2132701c0"}, + {file = "pycares-4.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:1730ef93e33e4682fbbf0e7fb19df2ed9822779d17de8ea6e20d5b0d71c1d2be"}, + {file = "pycares-4.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5a26b3f1684557025da26ce65d076619890c82b95e38cc7284ce51c3539a1ce8"}, + {file = "pycares-4.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86112cce01655b9f63c5e53b74722084e88e784a7a8ad138d373440337c591c9"}, + {file = "pycares-4.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c01465a191dc78e923884bb45cd63c7e012623e520cf7ed67e542413ee334804"}, + {file = "pycares-4.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9fd5d6012f3ee8c8038cbfe16e988bbd17b2f21eea86650874bf63757ee6161"}, + {file = "pycares-4.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa36b8ea91eae20b5c7205f3e6654423f066af24a1df02b274770a96cbcafaa7"}, + {file = "pycares-4.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:61019151130557c1788cae52e4f2f388a7520c9d92574f3a0d61c974c6740db0"}, + {file = "pycares-4.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:231962bb46274c52632469a1e686fab065dbd106dbef586de4f7fb101e297587"}, + {file = "pycares-4.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6c979512fa51c7ccef5204fe10ed4e5c44c2bce5f335fe98a3e423f1672bd7d4"}, + {file = "pycares-4.3.0-cp39-cp39-win32.whl", hash = "sha256:655cf0df862ce3847a60e1a106dafa2ba2c14e6636bac49e874347acdc7312dc"}, + {file = "pycares-4.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:36f2251ad0f99a5ce13df45c94c3161d9734c9e9fa2b9b4cc163b853ca170dc5"}, + {file = "pycares-4.3.0.tar.gz", hash = "sha256:c542696f6dac978e9d99192384745a65f80a7d9450501151e4a7563e06010d45"}, +] + +[package.dependencies] +cffi = ">=1.5.0" + +[package.extras] +idna = ["idna (>=2.1)"] + [[package]] name = "pycodestyle" version = "2.10.0" @@ -6177,7 +6301,7 @@ name = "tokenizers" version = "0.13.2" description = "Fast and Customizable Tokenizers" category = "main" -optional = true +optional = false python-versions = "*" files = [ {file = "tokenizers-0.13.2-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:a6f36b1b499233bb4443b5e57e20630c5e02fba61109632f5e00dab970440157"}, @@ -7068,4 +7192,4 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "7997201f64373247d8799baed84a5ad11ab3d92e26cc2114b26e734cfb9664a4" +content-hash = "2f916a8467f87cb850664b564c317dab569c9fee490e05308ac85427ef3abadc" diff --git a/pyproject.toml b/pyproject.toml index 29be5a351a2af..5122d8c469358 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.0.92" +version = "0.0.94" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -48,6 +48,7 @@ sentence-transformers = {version = "^2", optional = true} aiohttp = "^3.8.3" pypdf = {version = "^3.4.0", optional = true} networkx = {version="^2.6.3", optional = true} +aleph-alpha-client = "^2.15.0" [tool.poetry.group.docs.dependencies] autodoc_pydantic = "^1.8.0" diff --git a/tests/integration_tests/llms/test_aleph_alpha.py b/tests/integration_tests/llms/test_aleph_alpha.py new file mode 100644 index 0000000000000..646b767667eb3 --- /dev/null +++ b/tests/integration_tests/llms/test_aleph_alpha.py @@ -0,0 +1,10 @@ +"""Test Aleph Alpha API wrapper.""" + +from langchain.llms.aleph_alpha import AlephAlpha + + +def test_aleph_alpha_call() -> None: + """Test valid call to cohere.""" + llm = AlephAlpha(maximum_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_banana.py b/tests/integration_tests/llms/test_banana.py new file mode 100644 index 0000000000000..03465e1ad393d --- /dev/null +++ b/tests/integration_tests/llms/test_banana.py @@ -0,0 +1,10 @@ +"""Test BananaDev API wrapper.""" + +from langchain.llms.bananadev import Banana + + +def test_banana_call() -> None: + """Test valid call to BananaDev.""" + llm = Banana() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_modal.py b/tests/integration_tests/llms/test_modal.py new file mode 100644 index 0000000000000..495da20e4787c --- /dev/null +++ b/tests/integration_tests/llms/test_modal.py @@ -0,0 +1,10 @@ +"""Test Modal API wrapper.""" + +from langchain.llms.modal import Modal + + +def test_modal_call() -> None: + """Test valid call to Modal.""" + llm = Modal() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_stochasticai.py b/tests/integration_tests/llms/test_stochasticai.py new file mode 100644 index 0000000000000..8ab45d98a0585 --- /dev/null +++ b/tests/integration_tests/llms/test_stochasticai.py @@ -0,0 +1,10 @@ +"""Test StochasticAI API wrapper.""" + +from langchain.llms.stochasticai import StochasticAI + + +def test_stochasticai_call() -> None: + """Test valid call to StochasticAI.""" + llm = StochasticAI() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_writer.py b/tests/integration_tests/llms/test_writer.py new file mode 100644 index 0000000000000..672efc613c833 --- /dev/null +++ b/tests/integration_tests/llms/test_writer.py @@ -0,0 +1,10 @@ +"""Test Writer API wrapper.""" + +from langchain.llms.writer import Writer + + +def test_writer_call() -> None: + """Test valid call to Writer.""" + llm = Writer() + output = llm("Say foo:") + assert isinstance(output, str)