Skip to content

Commit

Permalink
added search to redis
Browse files Browse the repository at this point in the history
  • Loading branch information
eavanvalkenburg committed Nov 12, 2024
1 parent b67eb84 commit ee68c94
Show file tree
Hide file tree
Showing 11 changed files with 737 additions and 65 deletions.
5 changes: 5 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,8 @@ name = "semantic_kernel"
requires = ["flit-core >= 3.9,<4.0"]
build-backend = "flit_core.buildapi"

[dependency-groups]
redis = [
"redisvl>=0.3.6",
]

125 changes: 98 additions & 27 deletions python/samples/concepts/memory/new_memory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Microsoft. All rights reserved.

import argparse
import asyncio
import time
from collections.abc import Callable
from dataclasses import dataclass, field
from typing import Annotated
Expand All @@ -25,6 +27,10 @@
VectorStoreRecordVectorField,
vectorstoremodel,
)
from semantic_kernel.data.vector_search.vector_search_filter import VectorSearchFilter
from semantic_kernel.data.vector_search.vector_search_options import VectorSearchOptions
from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin


@vectorstoremodel
Expand All @@ -44,9 +50,18 @@ class MyDataModelArray:
] = None
other: str | None = None
id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
title: Annotated[
str, VectorStoreRecordDataField(has_embedding=False, property_type="str", is_full_text_searchable=True)
] = "title1"
content: Annotated[
str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
str,
VectorStoreRecordDataField(
has_embedding=True, embedding_property_name="vector", property_type="str", is_full_text_searchable=True
),
] = "content1"
tag: Annotated[str, VectorStoreRecordDataField(has_embedding=False, property_type="str", is_filterable=True)] = (
"tag1"
)


@vectorstoremodel
Expand All @@ -64,9 +79,18 @@ class MyDataModelList:
] = None
other: str | None = None
id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
title: Annotated[
str, VectorStoreRecordDataField(has_embedding=False, property_type="str", is_full_text_searchable=True)
] = "title1"
content: Annotated[
str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
str,
VectorStoreRecordDataField(
has_embedding=True, embedding_property_name="vector", property_type="str", is_full_text_searchable=True
),
] = "content1"
tag: Annotated[str, VectorStoreRecordDataField(has_embedding=False, property_type="str", is_filterable=True)] = (
"tag1"
)


collection_name = "test"
Expand All @@ -88,7 +112,7 @@ class MyDataModelList:
# function which returns the store.
# Using a function allows for lazy initialization of the store,
# so that settings for unused stores do not cause validation errors.
stores: dict[str, Callable[[], VectorStoreRecordCollection]] = {
services: dict[str, Callable[[], VectorStoreRecordCollection]] = {
"ai_search": lambda: AzureAISearchCollection[MyDataModel](
data_model_type=MyDataModel,
),
Expand All @@ -101,7 +125,7 @@ class MyDataModelList:
collection_name=collection_name,
prefix_collection_name_to_key_names=True,
),
"redis_hashset": lambda: RedisHashsetCollection[MyDataModel](
"redis_hash": lambda: RedisHashsetCollection[MyDataModel](
data_model_type=MyDataModel,
collection_name=collection_name,
prefix_collection_name_to_key_names=True,
Expand All @@ -120,43 +144,89 @@ class MyDataModelList:
}


async def main(store: str, use_azure_openai: bool, embedding_model: str):
def print_record(record):
print(f" Found id: {record.id}")
print(f" Content: {record.content}")
if record.vector is not None:
print(f" Vector (first five): {record.vector[:5]}")


async def main(service: str, use_azure_openai: bool, embedding_model: str):
print("-" * 30)
kernel = Kernel()
service_id = "embedding"
if use_azure_openai:
kernel.add_service(AzureTextEmbedding(service_id=service_id, deployment_name=embedding_model))
embedder = AzureTextEmbedding(service_id=service_id, deployment_name=embedding_model)
else:
kernel.add_service(OpenAITextEmbedding(service_id=service_id, ai_model_id=embedding_model))
async with stores[store]() as record_store:
await record_store.create_collection_if_not_exists()

record1 = MyDataModel(content="My text", id="e6103c03-487f-4d7d-9c23-4723651c17f4")
record2 = MyDataModel(content="My other text", id="09caec77-f7e1-466a-bcec-f1d51c5b15be")

embedder = OpenAITextEmbedding(service_id=service_id, ai_model_id=embedding_model)
kernel.add_service(embedder)
async with services[service]() as record_collection:
print(f"Creating {service} collection!")
await record_collection.delete_collection()
await record_collection.create_collection()

record1 = MyDataModel(
content="Semantic Kernel is awesome",
id="e6103c03-487f-4d7d-9c23-4723651c17f4",
title="Overview",
tag="general",
)
record2 = MyDataModel(
content="Semantic Kernel is available in dotnet, python and Java.",
id="09caec77-f7e1-466a-bcec-f1d51c5b15be",
title="Semantic Kernel Languages",
tag="general",
)
print("Adding records!")
records = await VectorStoreRecordUtils(kernel).add_vector_to_records(
[record1, record2], data_model_type=MyDataModel
)
keys = await record_store.upsert_batch(records)
print(f"upserted {keys=}")

results = await record_store.get_batch([record1.id, record2.id])
keys = await record_collection.upsert_batch(records)
print(f" Upserted {keys=}")
print("Getting records!")
results = await record_collection.get_batch([record1.id, record2.id])
if results:
for result in results:
print(f"found {result.id=}")
print(f"{result.content=}")
if result.vector is not None:
print(f"{result.vector[:5]=}")
for record in results:
print_record(record)
else:
print("not found")
print("Nothing found...")
options = VectorSearchOptions(
vector_field_name="vector",
include_vectors=True,
filter=VectorSearchFilter.equal_to("tag", "general"),
)
if isinstance(record_collection, VectorTextSearchMixin):
print("-" * 30)
print("Using text search")
search_results = await record_collection.text_search("python", options)
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
[print_record(result.record) async for result in search_results.results]
if isinstance(record_collection, VectorizedSearchMixin):
print("-" * 30)
print("Using vectorized search, for `python`")
print("The distance function is set to the default of the store.")
search_results = await record_collection.vectorized_search(
vector=(await embedder.generate_raw_embeddings(["python"]))[0], options=options
)
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
async for result in search_results.results:
print_record(result.record)
print(f" Score: {result.score:.4f}\n")
print("-" * 30)
print("Deleting collection!")
await record_collection.delete_collection()
print("Done!")


if __name__ == "__main__":
import asyncio

argparse.ArgumentParser()

parser = argparse.ArgumentParser()
parser.add_argument("--store", default="in_memory", choices=stores.keys(), help="What store to use.")
parser.add_argument("--service", default="redis_json", choices=services.keys(), help="What store to use.")
# Option of whether to use OpenAI or Azure OpenAI.
parser.add_argument("--use-azure-openai", action="store_true", help="Use Azure OpenAI instead of OpenAI.")
# Model
Expand All @@ -165,4 +235,5 @@ async def main(store: str, use_azure_openai: bool, embedding_model: str):
)
args = parser.parse_args()

asyncio.run(main(store=args.store, use_azure_openai=args.use_azure_openai, embedding_model=args.model))
asyncio.run(main(service=args.service, use_azure_openai=args.use_azure_openai, embedding_model=args.model))
time.sleep(1)
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
}

DISTANCE_FUNCTION_MAP = {
DistanceFunction.COSINE_SIMILARITY: VectorSearchAlgorithmMetric.COSINE,
DistanceFunction.COSINE_DISTANCE: VectorSearchAlgorithmMetric.COSINE,
DistanceFunction.DOT_PROD: VectorSearchAlgorithmMetric.DOT_PRODUCT,
DistanceFunction.EUCLIDEAN_DISTANCE: VectorSearchAlgorithmMetric.EUCLIDEAN,
DistanceFunction.HAMMING: VectorSearchAlgorithmMetric.HAMMING,
"default": VectorSearchAlgorithmMetric.COSINE,
}

Expand Down
6 changes: 6 additions & 0 deletions python/semantic_kernel/connectors/memory/redis/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from enum import Enum

from redis.commands.search.indexDefinition import IndexType
from redisvl.schema import StorageType

from semantic_kernel.data.const import DistanceFunction

Expand All @@ -18,6 +19,11 @@ class RedisCollectionTypes(str, Enum):
RedisCollectionTypes.HASHSET: IndexType.HASH,
}

STORAGE_TYPE_MAP = {
RedisCollectionTypes.JSON: StorageType.JSON,
RedisCollectionTypes.HASHSET: StorageType.HASH,
}

DISTANCE_FUNCTION_MAP = {
DistanceFunction.COSINE_SIMILARITY: "COSINE",
DistanceFunction.DOT_PROD: "IP",
Expand Down
Loading

0 comments on commit ee68c94

Please sign in to comment.