Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add component clickhouse for vector database #3657

Merged
merged 25 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
797d473
fix: Problem in the description field when using the MariaDB or MySQL…
MarceloNunesAlves Aug 19, 2024
7c008c8
Merge pull request #1 from MarceloNunesAlves/fix/db_description_type
MarceloNunesAlves Aug 19, 2024
7a3dc36
fix: Problem in the description field when using the MariaDB or MySQL…
MarceloNunesAlves Aug 19, 2024
531544a
Merge pull request #2 from MarceloNunesAlves/fix/db_description_type
MarceloNunesAlves Aug 19, 2024
05f2b0c
fix: Add the migration script to update description columns type.
MarceloNunesAlves Aug 20, 2024
b9a1367
Merge pull request #3 from MarceloNunesAlves/fix/db_description_type
MarceloNunesAlves Aug 20, 2024
ea73048
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2024
f37f493
Update src/backend/base/langflow/alembic/versions/1d90f8a0efe1_update…
MarceloNunesAlves Aug 20, 2024
31db277
Merge branch 'main' into main
MarceloNunesAlves Aug 27, 2024
600d44b
Merge branch 'main' into main
MarceloNunesAlves Aug 29, 2024
7dc1c7e
Merge branch 'langflow-ai:main' into main
MarceloNunesAlves Aug 30, 2024
85d105d
feat: add component clickhouse for vector database
MarceloNunesAlves Sep 2, 2024
72eb87e
Merge pull request #4 from MarceloNunesAlves/feat/clickhouse_vectorst…
MarceloNunesAlves Sep 2, 2024
0699a85
Merge remote-tracking branch 'upstream/main'
MarceloNunesAlves Sep 2, 2024
d5f751b
feat: add component clickhouse for vector database - conflicts resolved
MarceloNunesAlves Sep 2, 2024
940a6d1
[autofix.ci] apply automated fixes
autofix-ci[bot] Sep 2, 2024
a39d4ba
[autofix.ci] apply automated fixes (attempt 2/3)
autofix-ci[bot] Sep 2, 2024
fe2f69f
Merge remote-tracking branch 'refs/remotes/upstream/main' into main
MarceloNunesAlves Sep 2, 2024
a33b398
feat: add component clickhouse for vector database - conflicts resolved
MarceloNunesAlves Sep 2, 2024
4c99974
Merge branch 'main' into main
MarceloNunesAlves Sep 2, 2024
c87391d
Merge branch 'main' into main
MarceloNunesAlves Sep 3, 2024
2d633ac
feat: add component clickhouse for vector database - update poetry
MarceloNunesAlves Sep 3, 2024
2e06306
Merge remote-tracking branch 'upstream/main'
MarceloNunesAlves Sep 3, 2024
8fc1978
feat: add component clickhouse for vector database - conflicts resolved
MarceloNunesAlves Sep 3, 2024
df87711
Merge branch 'main' into main
MarceloNunesAlves Sep 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 253 additions & 6 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ nltk = "^3.9.1"
bson = "^0.5.10"
lark = "^1.2.2"
jq = "^1.8.0"
clickhouse-connect = {version = "0.7.19", optional = true, extras = ["clickhouse-connect"]}


[tool.poetry.group.dev.dependencies]
Expand Down
134 changes: 134 additions & 0 deletions src/backend/base/langflow/components/vectorstores/Clickhouse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from typing import List

from langchain_community.vectorstores import Clickhouse, ClickhouseSettings

from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
from langflow.helpers.data import docs_to_data
from langflow.inputs import BoolInput, FloatInput
from langflow.io import (
HandleInput,
IntInput,
StrInput,
SecretStrInput,
DataInput,
DropdownInput,
MultilineInput,
DictInput,
)
from langflow.schema import Data


class ClickhouseVectorStoreComponent(LCVectorStoreComponent):
display_name = "Clickhouse"
description = "Clickhouse Vector Store with search capabilities"
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/clickhouse/"
name = "Clickhouse"
icon = "Clickhouse"

inputs = [
StrInput(name="host", display_name="hostname", required=True, value="localhost"),
IntInput(name="port", display_name="port", required=True, value=8123),
StrInput(name="database", display_name="database", required=True),
StrInput(name="table", display_name="Table name", required=True),
StrInput(name="username", display_name="The ClickHouse user name.", required=True),
SecretStrInput(name="password", display_name="The password for username.", required=True),
DropdownInput(
name="index_type",
display_name="index_type",
options=["annoy", "vector_similarity"],
info="Type of the index.",
value="annoy",
advanced=True,
),
DropdownInput(
name="metric",
display_name="metric",
options=["angular", "euclidean", "manhattan", "hamming", "dot"],
info="Metric to compute distance.",
value="angular",
advanced=True,
),
BoolInput(
name="secure",
display_name="Use https/TLS. This overrides inferred values from the interface or port arguments.",
value=False,
advanced=True,
),
StrInput(name="index_param", display_name="Param of the index", value="'L2Distance',100", advanced=True),
DictInput(name="index_query_params", display_name="index query params", advanced=True),
MultilineInput(name="search_query", display_name="Search Query"),
DataInput(name="ingest_data", display_name="Ingest Data", is_list=True),
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
IntInput(
name="number_of_results",
display_name="Number of Results",
info="Number of results to return.",
value=4,
advanced=True,
),
FloatInput(name="score_threshold", display_name="Score threshold", advanced=True),
]

@check_cached_vector_store
def build_vector_store(self) -> Clickhouse:
try:
import clickhouse_connect # type: ignore
except ImportError as e:
raise ImportError(
"Failed to import Clickhouse dependencies. Install it using `pip install langflow[clickhouse-connect] --pre`"
) from e

try:
client = clickhouse_connect.get_client(host=self.host, username=self.username, password=self.password)
client.command("SELECT 1")
except Exception as e:
raise ValueError(f"Failed to connect to Clickhouse: {e}")

documents = []
for _input in self.ingest_data or []:
if isinstance(_input, Data):
documents.append(_input.to_lc_document())
else:
documents.append(_input)

kwargs = {}
if self.index_param:
kwargs["index_param"] = self.index_param.split(",")
if self.index_query_params:
kwargs["index_query_params"] = self.index_query_params

settings = ClickhouseSettings(
table=self.table,
database=self.database,
host=self.host,
index_type=self.index_type,
metric=self.metric,
password=self.password,
port=self.port,
secure=self.secure,
username=self.username,
**kwargs,
)
if documents:
clickhouse_vs = Clickhouse.from_documents(documents=documents, embedding=self.embedding, config=settings)

else:
clickhouse_vs = Clickhouse(embedding=self.embedding, config=settings)

return clickhouse_vs

def search_documents(self) -> List[Data]:
vector_store = self.build_vector_store()

if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
kwargs = {}
if self.score_threshold:
kwargs["score_threshold"] = self.score_threshold

docs = vector_store.similarity_search(query=self.search_query, k=self.number_of_results, **kwargs)

data = docs_to_data(docs)
self.status = data
return data
else:
return []
Loading