Skip to content

Commit

Permalink
feature/opensearch (#1114)
Browse files Browse the repository at this point in the history
  • Loading branch information
gecBurton authored Oct 22, 2024
1 parent bbe5233 commit 3efcd8c
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 7 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,11 @@ Follow the instructions here https://django-q2.readthedocs.io/en/master/schedule
3. func = `django.core.management.call_command`
4. args = `"delete_expired_data"`
5. save


## Vector databases

We are currently using ElasticSearch as our vector database.

We have also successfully deployed Redbox to OpenSearch Serverless but this support should be considered experimental
at this stage.
58 changes: 57 additions & 1 deletion django_app/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions django_app/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ django-plotly-dash = "^2.3.1"
django-adminplus = "^0.6"
pandas = "^2.2.2"
django-waffle = "^4.1.0"
opensearch-py = "^2.7.1"

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
Expand Down
3 changes: 1 addition & 2 deletions infrastructure/aws/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ locals {
}
django_app_secrets = {
"ELASTIC__API_KEY" : var.elastic_api_key,
"ELASTIC__CLOUD_ID" : var.cloud_id,
"ELASTIC__COLLECTION_ENDPOINT": module.opensearch.collection_enpdoint,
"AZURE_OPENAI_API_KEY": var.azure_openai_api_key,
"AZURE_OPENAI_ENDPOINT" : var.azure_openai_endpoint,
Expand Down
7 changes: 7 additions & 0 deletions infrastructure/aws/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ data "aws_iam_policy_document" "ecs_exec_role_policy" {
]
}

# Add this for OpenSearchServerless access
# statement {
# effect = "Allow"
# actions = ["aoss:*"]
# resources = ["*"] restrict this
# }

}

resource "aws_iam_policy" "redbox_policy" {
Expand Down
13 changes: 13 additions & 0 deletions infrastructure/aws/opensearch.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# rename this file to opensearch.tf to use opensearch

module "opensearch" {
# checkov:skip=CKV_TF_1: We're using semantic versions instead of commit hash
# source = "../../../i-ai-core-infrastructure//modules/opensearch" # For testing local changes
source = "git::https://github.com/i-dot-ai/i-ai-core-infrastructure.git//modules/opensearch"
account_id = var.account_id
collection_name = "${var.team_name}-${terraform.workspace}-${var.project_name}-collection"
type = "SEARCH"
environment = var.env
region = var.region
state_bucket = var.state_bucket
}
36 changes: 35 additions & 1 deletion redbox-core/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions redbox-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pytest-dotenv = "^0.5.2"
kneed = "^0.8.5"
langgraph = "^0.2.15"
langchain-aws = "^0.1.17"
opensearch-py = "^2.7.1"


[tool.poetry.group.dev.dependencies]
Expand Down
25 changes: 22 additions & 3 deletions redbox-core/redbox/models/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,20 @@
from elasticsearch import Elasticsearch
from pydantic import BaseModel
from pydantic_settings import BaseSettings, SettingsConfigDict

from opensearchpy import OpenSearch, RequestsHttpConnection
from redbox.models.chain import ChatLLMBackend


logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
log = logging.getLogger()
logger = logging.getLogger()


class OpenSearchSettings(BaseModel):
"""settings required for a aws/opensearch"""

model_config = SettingsConfigDict(frozen=True)

collection_enpdoint: str


class ElasticLocalSettings(BaseModel):
Expand Down Expand Up @@ -66,7 +75,7 @@ class Settings(BaseSettings):
partition_strategy: Literal["auto", "fast", "ocr_only", "hi_res"] = "fast"
clustering_strategy: Literal["full"] | None = None

elastic: ElasticCloudSettings | ElasticLocalSettings = ElasticLocalSettings()
elastic: ElasticCloudSettings | ElasticLocalSettings | OpenSearchSettings = ElasticLocalSettings()
elastic_root_index: str = "redbox-data"
elastic_chunk_alias: str = "redbox-data-chunk-current"

Expand Down Expand Up @@ -131,6 +140,16 @@ def elasticsearch_client(self) -> Elasticsearch:
],
basic_auth=(self.elastic.user, self.elastic.password),
)

elif isinstance(self.elastic, OpenSearchSettings):
client = OpenSearch(
hosts=[{"host": self.collection_enpdoint, "port": 443}],
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection,
pool_maxsize=100,
)

else:
client = Elasticsearch(cloud_id=self.elastic.cloud_id, api_key=self.elastic.api_key)

Expand Down

0 comments on commit 3efcd8c

Please sign in to comment.