diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8bb9507e9..0c5423c36 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,10 +10,10 @@ -## Link to JIRA ticket +## Relevant links - ## Things to check - [ ] I have added any new ENV vars in all deployed environments +- [ ] I have tested any code added or changed diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..330fb1af8 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,70 @@ +# Code of Conduct - Redbox Copilot + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban +temporarily or permanently any contributor for other behaviors that they deem +inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at <>. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://contributor-covenant.org/), version +[1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct/code_of_conduct.md) and +[2.0](https://www.contributor-covenant.org/version/2/0/code_of_conduct/code_of_conduct.md), +and was generated by [contributing-gen](https://github.com/bttger/contributing-gen). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..a9f5abbd2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,142 @@ + +# Contributing to Redbox Copilot + +First off, thanks for taking the time to contribute! ❤️ + +All types of contributions are encouraged and valued. See the [Table of Contents](#table-of-contents) for different ways to help and details about how this project handles them. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. 🎉 + +> And if you like the project, but just don't have time to contribute, that's fine. There are other easy ways to support the project and show your appreciation, which we would also be very happy about: +> - Star the project +> - Tweet about it +> - Refer this project in your project's readme +> - Mention the project at local meetups and tell your friends/colleagues + + +## Table of Contents + +- [Code of Conduct](#code-of-conduct) +- [I Have a Question](#i-have-a-question) +- [I Want To Contribute](#i-want-to-contribute) + - [Reporting Bugs](#reporting-bugs) + - [Suggesting Enhancements](#suggesting-enhancements) + - [Your First Code Contribution](#your-first-code-contribution) + - [Improving The Documentation](#improving-the-documentation) +- [Styleguides](#styleguides) + - [Commit Messages](#commit-messages) +- [Join The Project Team](#join-the-project-team) + + +## Code of Conduct + +This project and everyone participating in it is governed by the +[Redbox Copilot Code of Conduct](https://github.com/i-dot-ai/redbox-copilotblob/main/CODE_OF_CONDUCT.md). +By participating, you are expected to uphold this code. Please report unacceptable behavior +to i-dot-ai-enquiries@cabinetoffice.gov.uk + + +## I Have a Question + +> If you want to ask a question, we assume that you have read the available [Documentation](https://github.com/i-dot-ai/redbox-copilot/docs). + +Before you ask a question, it is best to search for existing [Issues](https://github.com/i-dot-ai/redbox-copilot/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. + +If you then still feel the need to ask a question and need clarification, we recommend the following: + +- Open an [Issue](https://github.com/i-dot-ai/redbox-copilot/issues/new). +- Provide as much context as you can about what you're running into. +- Provide project and platform versions (nodejs, npm, etc), depending on what seems relevant. + +We will then take care of the issue as soon as possible. + +## I Want To Contribute + +> ### Legal Notice +> When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. + +### Reporting Bugs + + +#### Before Submitting a Bug Report + +A good bug report shouldn't leave others needing to chase you up for more information. Therefore, we ask you to investigate carefully, collect information and describe the issue in detail in your report. Please complete the following steps in advance to help us fix any potential bug as fast as possible. + +- Make sure that you are using the latest version. +- Determine if your bug is really a bug and not an error on your side e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](https://github.com/i-dot-ai/redbox-copilot/docs). If you are looking for support, you might want to check [this section](#i-have-a-question)). +- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/i-dot-ai/redbox-copilotissues?q=label%3Abug). +- Also make sure to search the internet (including Stack Overflow) to see if users outside of the GitHub community have discussed the issue. +- Collect information about the bug: + - Stack trace (Traceback) + - OS, Platform and Version (Windows, Linux, macOS, x86, ARM) + - Version of the interpreter, compiler, SDK, runtime environment, package manager, depending on what seems relevant. + - Possibly your input and the output + - Can you reliably reproduce the issue? And can you also reproduce it with older versions? + + +#### How Do I Submit a Good Bug Report? + +> You must never report security related issues, vulnerabilities or bugs including sensitive information to the issue tracker, or elsewhere in public. Instead sensitive bugs must be sent by email to <>. + + +We use GitHub issues to track bugs and errors. If you run into an issue with the project: + +- Open an [Issue](https://github.com/i-dot-ai/redbox-copilot/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) +- Explain the behavior you would expect and the actual behavior. +- Please provide as much context as possible and describe the *reproduction steps* that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case. +- Provide the information you collected in the previous section. + +Once it's filed: + +- The project team will label the issue accordingly. +- A team member will try to reproduce the issue with your provided steps. If there are no reproduction steps or no obvious way to reproduce the issue, the team will ask you for those steps and mark the issue as `needs-repro`. Bugs with the `needs-repro` tag will not be addressed until they are reproduced. +- If the team is able to reproduce the issue, it will be marked `needs-fix`, as well as possibly other tags (such as `critical`), and the issue will be left to be [implemented by someone](#your-first-code-contribution). + + +### Suggesting Enhancements + +This section guides you through submitting an enhancement suggestion for Redbox Copilot, **including completely new features and minor improvements to existing functionality**. Following these guidelines will help maintainers and the community to understand your suggestion and find related suggestions. + + +#### Before Submitting an Enhancement + +- Make sure that you are using the latest version. +- Read the [documentation](https://github.com/i-dot-ai/redbox-copilot/docs) carefully and find out if the functionality is already covered, maybe by an individual configuration. +- Perform a [search](https://github.com/i-dot-ai/redbox-copilot/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. +- Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library. + + +#### How Do I Submit a Good Enhancement Suggestion? + +Enhancement suggestions are tracked as [GitHub issues](https://github.com/i-dot-ai/redbox-copilot/issues). + +- Use a **clear and descriptive title** for the issue to identify the suggestion. +- Provide a **step-by-step description of the suggested enhancement** in as many details as possible. +- **Describe the current behavior** and **explain which behavior you expected to see instead** and why. At this point you can also tell which alternatives do not work for you. +- You may want to **include screenshots and animated GIFs** which help you demonstrate the steps or point out the part which the suggestion is related to. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux. +- **Explain why this enhancement would be useful** to most Redbox Copilot users. You may also want to point out the other projects that solved it better and which could serve as inspiration. + + + +### Your First Code Contribution + + +### Improving The Documentation + + +## Styleguides +### Commit Messages + + +## Join The Project Team + + + +## Attribution +This guide is based on the **contributing-gen**. [Make your own](https://github.com/bttger/contributing-gen)! diff --git a/README.md b/README.md index 9019e3f64..20a53d7c6 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Redbox Copilot is a retrieval augmented generation (RAG) app that uses Claude 2 to organise, categorise and summarise civil service documents. It's designed to handle a variety of administrative sources, such as letters, briefings, minutes, and speech transcripts. +> ⚠️ **This project is in active development and a work in progress. The repo is getting regular additions so be sure to check back regularly for updates.** + - **Better retrieval**. Redbox Copilot increases organisational memory by indexing documents - **Faster, accurate summarisation**. Redbox Copilot can summarise reports read months ago, supplement them with current work, and produce a first draft that lets civil servants focus on what they do best @@ -26,6 +28,7 @@ You'll find a series of useful `docker compose` commands already maded in [`Make Any time you update code for the the repo, you'll likely need to rebuild the containers. # Codespace +For a quick start, you can use GitHub Codespaces to run the project in a cloud-based development environment. Click the button below to open the project in a new Codespace. [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/i-dot-ai/redbox-copilot?quickstart=1) diff --git a/core_api/Dockerfile b/core_api/Dockerfile index ec467753e..1e63f7b6e 100644 --- a/core_api/Dockerfile +++ b/core_api/Dockerfile @@ -10,7 +10,7 @@ RUN poetry install --no-root --no-ansi --with api --without ai,ingest,embed,dev, WORKDIR /app -ADD app/core_api/app.py /app/ +ADD core_api/src/app.py /app/app.py # Run FastAPI EXPOSE 5002 diff --git a/embed/Dockerfile b/embed/Dockerfile index 1c8e5e7a4..5e073a947 100644 --- a/embed/Dockerfile +++ b/embed/Dockerfile @@ -6,19 +6,21 @@ RUN pip install poetry ADD redbox/ /app/redbox ADD pyproject.toml poetry.lock /app/ +ADD ./download_embedder.py /app/ +ADD ./model_db.py /app/ WORKDIR /app/ RUN poetry install --no-root --no-ansi --with worker,embed,api --without ai,ingest,dev # Add the rest of the files -ADD ./embed /app/ -ADD ./download_embedder.py /app/ +ADD ./embed/src/app.py /app/app.py + ADD ./Makefile /app/ ADD ./.env.example /app/ # Download the model -RUN poetry run download-embedder +RUN poetry run download-model EXPOSE 5000 # Run FastAPI -CMD ["poetry", "run", "uvicorn", "embed.src:app", "--host", "0.0.0.0", "--port", "5000"] +CMD ["poetry", "run", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5000"] diff --git a/ingest/Dockerfile b/ingest/Dockerfile index 3e3fbf990..9b2bfb88d 100644 --- a/ingest/Dockerfile +++ b/ingest/Dockerfile @@ -6,14 +6,15 @@ RUN pip install poetry ADD redbox/ /app/redbox ADD pyproject.toml poetry.lock /app/ WORKDIR /app/ +ADD ./download_embedder.py /app/ +ADD ./model_db.py /app/ RUN poetry install --no-root --no-ansi --with worker,ingest,embed --without ai,dev,api -ADD ./download_embedder.py /app/ # Download the model -RUN poetry run download-embedder +RUN poetry run download-model WORKDIR /app -ADD ./ingest/app.py /app/app.py +ADD ./ingest/src/app.py /app/app.py # Run pika worker CMD ["poetry", "run", "python", "app.py"] diff --git a/legacy_app/Dockerfile b/legacy_app/Dockerfile index 78c6508d4..1a548b1fa 100644 --- a/legacy_app/Dockerfile +++ b/legacy_app/Dockerfile @@ -5,17 +5,20 @@ RUN apt-get install -y libgl-dev libmagic-dev inetutils-ping RUN pip install poetry +WORKDIR /app/ + ADD pyproject.toml poetry.lock ./ # Streamlit app needs a lot of dependencies +ADD ./download_embedder.py /app/ +ADD ./model_db.py /app/ RUN poetry install --no-root --no-ansi --with streamlit-app,ai,ingest,embed --without dev,worker,api ADD redbox/ /app/redbox ADD legacy_app /app # Download the model -ADD ./download_embedder.py /app/ -WORKDIR /app/ -RUN poetry run download-embedder + +RUN poetry run download-model EXPOSE 8501 diff --git a/poetry.lock b/poetry.lock index ea34dc4e2..2b7405070 100644 --- a/poetry.lock +++ b/poetry.lock @@ -956,6 +956,31 @@ types-awscrt = "*" [package.extras] botocore = ["botocore"] +[[package]] +name = "bump2version" +version = "1.0.1" +description = "Version-bump your software with a single command!" +optional = false +python-versions = ">=3.5" +files = [ + {file = "bump2version-1.0.1-py2.py3-none-any.whl", hash = "sha256:37f927ea17cde7ae2d7baf832f8e80ce3777624554a653006c9144f8017fe410"}, + {file = "bump2version-1.0.1.tar.gz", hash = "sha256:762cb2bfad61f4ec8e2bdf452c7c267416f8c70dd9ecb1653fd0bbb01fa936e6"}, +] + +[[package]] +name = "bumpversion" +version = "0.6.0" +description = "Version-bump your software with a single command!" +optional = false +python-versions = "*" +files = [ + {file = "bumpversion-0.6.0-py2.py3-none-any.whl", hash = "sha256:4eb3267a38194d09f048a2179980bb4803701969bff2c85fa8f6d1ce050be15e"}, + {file = "bumpversion-0.6.0.tar.gz", hash = "sha256:4ba55e4080d373f80177b4dabef146c07ce73c7d1377aabf9d3c3ae1f94584a6"}, +] + +[package.dependencies] +bump2version = "*" + [[package]] name = "cachetools" version = "5.3.3" @@ -2585,22 +2610,22 @@ files = [ [[package]] name = "importlib-metadata" -version = "7.0.1" +version = "7.0.2" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-7.0.1-py3-none-any.whl", hash = "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e"}, - {file = "importlib_metadata-7.0.1.tar.gz", hash = "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc"}, + {file = "importlib_metadata-7.0.2-py3-none-any.whl", hash = "sha256:f4bc4c0c070c490abf4ce96d715f68e95923320370efb66143df00199bb6c100"}, + {file = "importlib_metadata-7.0.2.tar.gz", hash = "sha256:198f568f3230878cb1b44fbd7975f87906c22336dba2e4a7f05278c281fbd792"}, ] [package.dependencies] zipp = ">=0.5" [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] [[package]] name = "iniconfig" @@ -3081,13 +3106,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (> [[package]] name = "jupyterlab" -version = "4.1.3" +version = "4.1.4" description = "JupyterLab computational environment" optional = false python-versions = ">=3.8" files = [ - {file = "jupyterlab-4.1.3-py3-none-any.whl", hash = "sha256:67dbec7057c6ad46f08a3667a80bdb890df9453822c93b5ddfd5e8313a718ef9"}, - {file = "jupyterlab-4.1.3.tar.gz", hash = "sha256:b85bd8766f995d23461e1f68a0cbc688d23e0af2b6f42a7768fc7b1826b2ec39"}, + {file = "jupyterlab-4.1.4-py3-none-any.whl", hash = "sha256:f92c3f2b12b88efcf767205f49be9b2f86b85544f9c4f342bb5e9904a16cf931"}, + {file = "jupyterlab-4.1.4.tar.gz", hash = "sha256:e03c82c124ad8a0892e498b9dde79c50868b2c267819aca3f55ce47c57ebeb1d"}, ] [package.dependencies] @@ -3312,19 +3337,19 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] [[package]] name = "langchain-community" -version = "0.0.25" +version = "0.0.27" description = "Community contributed LangChain integrations." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langchain_community-0.0.25-py3-none-any.whl", hash = "sha256:09b931ba710b1a10e449396d59f38575e0554acd527287937c33a2c4abdc6d83"}, - {file = "langchain_community-0.0.25.tar.gz", hash = "sha256:b6c8c14cd6ec2635e51e3974bf78a8de3b959bbedb4af55aad164f8cf392f0c5"}, + {file = "langchain_community-0.0.27-py3-none-any.whl", hash = "sha256:377a7429580a71d909012df5aae538d295fa6f21bc479e5dac6fd1589762b3ab"}, + {file = "langchain_community-0.0.27.tar.gz", hash = "sha256:266dffbd4c1666db1889cad953fa5102d4debff782335353b6d78636a761778d"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" dataclasses-json = ">=0.5.7,<0.7" -langchain-core = ">=0.1.28,<0.2.0" +langchain-core = ">=0.1.30,<0.2.0" langsmith = ">=0.1.0,<0.2.0" numpy = ">=1,<2" PyYAML = ">=5.3" @@ -3334,7 +3359,7 @@ tenacity = ">=8.1.0,<9.0.0" [package.extras] cli = ["typer (>=0.9.0,<0.10.0)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cloudpickle (>=2.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] [[package]] name = "langchain-core" @@ -4853,13 +4878,13 @@ pillow = "*" [[package]] name = "pdfminer-six" -version = "20221105" +version = "20231228" description = "PDF parser and analyzer" optional = false python-versions = ">=3.6" files = [ - {file = "pdfminer.six-20221105-py3-none-any.whl", hash = "sha256:1eaddd712d5b2732f8ac8486824533514f8ba12a0787b3d5fe1e686cd826532d"}, - {file = "pdfminer.six-20221105.tar.gz", hash = "sha256:8448ab7b939d18b64820478ecac5394f482d7a79f5f7eaa7703c6c959c175e1d"}, + {file = "pdfminer.six-20231228-py3-none-any.whl", hash = "sha256:e8d3c3310e6fbc1fe414090123ab01351634b4ecb021232206c4c9a8ca3e3b8f"}, + {file = "pdfminer.six-20231228.tar.gz", hash = "sha256:6004da3ad1a7a4d45930cb950393df89b068e73be365a6ff64a838d37bcb08c4"}, ] [package.dependencies] @@ -4873,17 +4898,17 @@ image = ["Pillow"] [[package]] name = "pdfplumber" -version = "0.10.4" +version = "0.11.0" description = "Plumb a PDF for detailed information about each char, rectangle, and line." optional = false python-versions = ">=3.8" files = [ - {file = "pdfplumber-0.10.4-py3-none-any.whl", hash = "sha256:c8f200259703324cd39a5c93b181a0d2370a6b2b6da670c117e75c3da6aca4a4"}, - {file = "pdfplumber-0.10.4.tar.gz", hash = "sha256:1c83a4e1fe75525ce1f161fa55a8142209a2da69b45542ce2c45be879e804fd6"}, + {file = "pdfplumber-0.11.0-py3-none-any.whl", hash = "sha256:be41686fe9515da5f54fad254e2b6941a723b9afe99eb92008df30880b6d61b3"}, + {file = "pdfplumber-0.11.0.tar.gz", hash = "sha256:e0027b8fc0ab98329cd8d445f5324d779078a6b58406471234e3e9c265dd8a48"}, ] [package.dependencies] -"pdfminer.six" = "20221105" +"pdfminer.six" = "20231228" Pillow = ">=9.1" pypdfium2 = ">=4.18.0" @@ -6205,6 +6230,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -7121,13 +7147,13 @@ transformers = ">=4.32.0,<5.0.0" [[package]] name = "sentry-sdk" -version = "1.40.6" +version = "1.41.0" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = "*" files = [ - {file = "sentry-sdk-1.40.6.tar.gz", hash = "sha256:f143f3fb4bb57c90abef6e2ad06b5f6f02b2ca13e4060ec5c0549c7a9ccce3fa"}, - {file = "sentry_sdk-1.40.6-py2.py3-none-any.whl", hash = "sha256:becda09660df63e55f307570e9817c664392655a7328bbc414b507e9cb874c67"}, + {file = "sentry-sdk-1.41.0.tar.gz", hash = "sha256:4f2d6c43c07925d8cd10dfbd0970ea7cb784f70e79523cca9dbcd72df38e5a46"}, + {file = "sentry_sdk-1.41.0-py2.py3-none-any.whl", hash = "sha256:be4f8f4b29a80b6a3b71f0f31487beb9e296391da20af8504498a328befed53f"}, ] [package.dependencies] @@ -7153,7 +7179,7 @@ huey = ["huey (>=2)"] loguru = ["loguru (>=0.5)"] opentelemetry = ["opentelemetry-distro (>=0.35b0)"] opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] -pure-eval = ["asttokens", "executing", "pure_eval"] +pure-eval = ["asttokens", "executing", "pure-eval"] pymongo = ["pymongo (>=3.1)"] pyspark = ["pyspark (>=2.4.4)"] quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] @@ -9121,4 +9147,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "fc67074d98459e0ecebe487e5705ff1c8a7ae9c6dbe0b9fb65c8b7bdd5970181" +content-hash = "f6bae3fff43219856d266b33dc4aeef1fa9736c4334783e6825ab7f3f3e916a6" diff --git a/pyproject.toml b/pyproject.toml index 270ac9473..d8d4ed3c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,9 +4,9 @@ target-version = 'py311' [tool.poetry] name = "redbox" -version = "0.1.0" +version = "1.0.0" description = "" -authors = ["i.AI "] +authors = ["i.AI "] license = "MIT" readme = "README.md" @@ -15,11 +15,12 @@ python = ">=3.11,<3.12" pydantic = "^2.6.3" elasticsearch = "^8.12.0" requests = "^2.31.0" -langchain = "^0.1.9" +langchain = "^0.1.11" tiktoken = "^0.6.0" boto3 = "^1.34.56" pika = "^1.3.2" pydantic-settings = "^2.2.1" +bumpversion = "^0.6.0" [tool.poetry.group.api.dependencies] fastapi = "^0.110.0" @@ -69,7 +70,7 @@ loguru = "^0.7.2" anthropic = "^0.18.1" litellm = "^1.29.1" openai = "^1.13.3" -langchain-community = "^0.0.25" + [tool.poetry.group.streamlitapp.dependencies] cognitojwt = "^1.4.1"