Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure Cosmos DB for MongoDB extension #329

Closed
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
<PackageVersion Include="Microsoft.Extensions.Logging" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.1" />
<PackageVersion Include="Microsoft.Extensions.Logging.TraceSource" Version="8.0.0" />
<PackageVersion Include="MongoDB.Driver" Version="2.24.0" />
<PackageVersion Include="Microsoft.ML.Tokenizers" Version="0.22.0-preview.24179.1" />
<PackageVersion Include="Microsoft.KernelMemory.Core" Version="0.51.240513.2" />
<PackageVersion Include="Microsoft.KernelMemory.Service.AspNetCore" Version="0.51.240513.2" />
Expand Down
9 changes: 8 additions & 1 deletion KernelMemory.sln
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MongoDbAtlas", "extensions\
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MongoDbAtlas.FunctionalTests", "extensions\MongoDbAtlas\MongoDbAtlas.FunctionalTests\MongoDbAtlas.FunctionalTests.csproj", "{8A602227-B291-4F1B-ACB8-237F49501B6A}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureCosmosDBMongoDB", "extensions\AzureCosmosDBMongoDB\AzureCosmosDBMongoDB\AzureCosmosDBMongoDB.csproj", "{8b62c632-9d70-4dc1-aeab-82d057a09a19}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "107-dotnet-SemanticKernel-TextCompletion", "examples\107-dotnet-SemanticKernel-TextCompletion\107-dotnet-SemanticKernel-TextCompletion.csproj", "{494B8590-F0B2-4D40-A895-F9D7BDF26250}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "208-dotnet-lmstudio", "examples\208-dotnet-lmstudio\208-dotnet-lmstudio.csproj", "{BC8057DA-CB40-4308-96FB-EF0100822BAD}"
Expand Down Expand Up @@ -523,6 +525,10 @@ Global
{EE0D8645-2770-4E12-8E18-019B30970FE6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{EE0D8645-2770-4E12-8E18-019B30970FE6}.Debug|Any CPU.Build.0 = Debug|Any CPU
{EE0D8645-2770-4E12-8E18-019B30970FE6}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8b62c632-9d70-4dc1-aeab-82d057a09a19}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8b62c632-9d70-4dc1-aeab-82d057a09a19}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8b62c632-9d70-4dc1-aeab-82d057a09a19}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8b62c632-9d70-4dc1-aeab-82d057a09a19}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -607,8 +613,9 @@ Global
{432AC1B4-8275-4284-9A44-44988A6F0C24} = {DBEA0A6B-474A-4E8C-BCC8-D5D43C063A54}
{A0C81A29-715F-463E-A243-7E45DB8AE53F} = {155DA079-E267-49AF-973A-D1D44681970F}
{EE0D8645-2770-4E12-8E18-019B30970FE6} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
{8b62c632-9d70-4dc1-aeab-82d057a09a19} = {155DA079-E267-49AF-973A-D1D44681970F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
EndGlobalSection
EndGlobal
EndGlobal
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RollForward>LatestMajor</RollForward>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<GenerateDocumentationFile>false</GenerateDocumentationFile>
<UserSecretsId>5ee045b0-aea3-4f08-8d31-32d1a6f8fed0</UserSecretsId>
<NoWarn>$(NoWarn);CA1050,CA2007,CA1826,CA1303,CA1307,SKEXP0001</NoWarn>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\OpenAI\OpenAI.csproj"/>
<ProjectReference Include="..\AzureCosmosDBMongoDB\AzureCosmosDBMongoDB.csproj" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="MongoDB.Driver" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;

namespace AzureCosmosDBMongoDB.TestApplication;

internal sealed class MockEmbeddingGenerator : ITextEmbeddingGenerator
{
private readonly Dictionary<string, Embedding> _embeddings = new();

internal void AddFakeEmbedding(string str, Embedding vector)
{
this._embeddings.Add(str, vector);
}

/// <inheritdoc />
public int CountTokens(string text) => 0;

/// <inheritdoc />
public int MaxTokens => 0;

/// <inheritdoc />
public Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default) =>
Task.FromResult(this._embeddings[text]);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;
using Microsoft.KernelMemory.AI.OpenAI;
using Microsoft.KernelMemory.MemoryDb.AzureCosmosDBMongoDB;
using Microsoft.KernelMemory.MemoryStorage;
using MongoDB.Driver;

namespace AzureCosmosDBMongoDB.TestApplication;

public static class Program
{
private const string index = "default_index";

private const string Text1 = "this is test 1";
private const string Text2 = "this is test 2";

public static async Task Main()
{
var (memory, embeddings) = await SetupAsync();

Console.WriteLine("++++ DELETE INDEX ++++");

// await memory.DeleteIndexAsync(index);

Console.WriteLine("++++ CREATE INDEX ++++");

await memory.CreateIndexAsync(index, embeddings[0].Length);

Console.WriteLine("++++ LIST INDEXES ++++");

IEnumerable<string> indexes = await memory.GetIndexesAsync();
foreach (var indexName in indexes)
{
Console.WriteLine(indexName);
}

Console.WriteLine("===== INSERT RECORD 1 AND 2 =====");
var memoryRecord1 = new MemoryRecord
{
Id = "memory 1",
Vector = embeddings[0],
Tags = new TagCollection { { "updated", "no" }, { "type", "email" } },
Payload = new Dictionary<string, object>()
};

var memoryRecord2 = new MemoryRecord
{
Id = "memory 2",
Vector = embeddings[0],
Tags = new TagCollection { { "updated", "no" }, { "type", "email" } },
Payload = new Dictionary<string, object>()
};

var id1 = await memory.UpsertAsync(index, memoryRecord1);
Console.WriteLine($"Insert 1: {id1} {memoryRecord1.Id}");

var id2 = await memory.UpsertAsync(index, memoryRecord2);
Console.WriteLine($"Insert 2: {id2} {memoryRecord2.Id}");


Console.WriteLine("===== INSERT RECORD 3 =====");

var memoryRecord3 = new MemoryRecord
{
Id = "memory 3",
Vector = embeddings[1],
Tags = new TagCollection { { "type", "news" } },
Payload = new Dictionary<string, object>()
};

var id3 = await memory.UpsertAsync(index, memoryRecord3);
Console.WriteLine($"Insert 3: {id3} {memoryRecord3.Id}");

Console.WriteLine("===== UPDATE RECORD 3 =====");

memoryRecord3.Tags.Add("updated", "yes");
id3 = await memory.UpsertAsync(index, memoryRecord3);
Console.WriteLine($"Update 3: {id3} {memoryRecord3.Id}");

Console.WriteLine("===== SEARCH 1 =====");

var similarList = memory.GetSimilarListAsync(
index, text: Text1, limit: 10, withEmbeddings: true, minRelevance: 0.7);
await foreach ((MemoryRecord, double) record in similarList)
{
Console.WriteLine(record.Item1.Id);
Console.WriteLine(" score: " + record.Item2);
Console.WriteLine(" tags: " + record.Item1.Tags.Count);
Console.WriteLine(" size: " + record.Item1.Vector.Length);
}

Console.WriteLine("===== DELETE =====");

await memory.DeleteAsync("test", new MemoryRecord { Id = "memory 1" });
await memory.DeleteAsync("test", new MemoryRecord { Id = "memory 2" });
await memory.DeleteAsync("test", new MemoryRecord { Id = "memory 3" });

Console.WriteLine("== Done ==");

}

private static async Task<(AzureCosmosDBMongoDBMemory, Embedding[])> SetupAsync()
{
IConfiguration cfg = new ConfigurationBuilder()
.AddJsonFile("appsettings.json")
.AddJsonFile("appsettings.Development.json", optional: true)
.Build();

var config = cfg.GetSection("KernelMemory:Services:AzureCosmosDBMongoDB").Get<AzureCosmosDBMongoDBConfig>()
?? throw new ArgumentNullException(message: "AzureAISearch config not found", null);
var openAIConfig = cfg.GetSection("KernelMemory:Service:OpenAI").Get<OpenAIConfig>();
var useRealEmbeddingGenerator = cfg.GetValue<bool>("UseRealEmbeddingGenerator");
ITextEmbeddingGenerator embeddingGenerator;

if (useRealEmbeddingGenerator)
{
embeddingGenerator = new OpenAITextEmbeddingGenerator(openAIConfig, log: null);
}
else
{
embeddingGenerator = new MockEmbeddingGenerator();
}

var memory = new AzureCosmosDBMongoDBMemory(config, embeddingGenerator);

Embedding embedding1 = new[] { 0f, 0, 1, 0, 1 };
Embedding embedding2 = new[] { 0, 0, 0.95f, 0.01f, 0.95f };
if (useRealEmbeddingGenerator)
{
embedding1 = await embeddingGenerator.GenerateEmbeddingAsync(Text1);
embedding2 = await embeddingGenerator.GenerateEmbeddingAsync(Text2);
}
else
{
((MockEmbeddingGenerator)embeddingGenerator).AddFakeEmbedding(Text1, embedding1);
((MockEmbeddingGenerator)embeddingGenerator).AddFakeEmbedding(Text2, embedding2);
}

return (memory, new[] { embedding1, embedding2 });

}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"Logging": {
"LogLevel": {
"Default": "Trace",
"Microsoft.AspNetCore": "Trace"
}
},
"UseRealEmbeddingGenerator": false,
"KernelMemory": {
"Services": {
"AzureCosmosDBMongoDB": {
// Please refer here https://learn.microsoft.com/azure/cosmos-db/mongodb/vcore/vector-search for details about these configurations
"ConnectionString": "",
"DatabaseName": "cosmos_test_db",
"ContainerName": "cosmos_test_collection",
"ApplicationName": "DotNet_Kernel_Memory",
"IndexName": "default_index",
"NumLists": 1,
"NumberOfConnections": 16,
"EfConstruction": 64,
"EfSearch": 40
},
"AzureOpenAIText": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191,
// "ChatCompletion" or "TextCompletion"
"APIType": "ChatCompletion",
"MaxRetries": 3
},
"AzureOpenAIEmbedding": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191,
"MaxRetries": 3
},
"OpenAI": {
// Name of the model used to generate text (text completion or chat completion)
"TextModel": "gpt-3.5-turbo-16k",
// The max number of tokens supported by the text model.
"TextModelMaxTokenTotal": 16384,
// Name of the model used to generate text embeddings
"EmbeddingModel": "text-embedding-ada-002",
// The max number of tokens supported by the embedding model
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
"EmbeddingModelMaxTokenTotal": 8191,
// OpenAI API Key
"APIKey": "",
// OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs)
"OrgId": "",
// How many times to retry in case of throttling
"MaxRetries": 3
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<RollForward>LatestMajor</RollForward>
<AssemblyName>Microsoft.KernelMemory.MemoryDb.AzureCosmosDBMongoDB</AssemblyName>
<RootNamespace>Microsoft.KernelMemory.MemoryDb.AzureCosmosDBMongoDB</RootNamespace>
<NoWarn>$(NoWarn);CA1724;CS1591;CA1308;</NoWarn>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\service\Abstractions\Abstractions.csproj" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="System.Linq.Async" />
<PackageReference Include="MongoDB.Driver" />
</ItemGroup>

<ItemGroup>
<InternalsVisibleTo Include="Microsoft.AzureCosmosDBMongoDB.UnitTests" />
<InternalsVisibleTo Include="Microsoft.AzureCosmosDBMongoDB.TestApplication" />
</ItemGroup>

<PropertyGroup>
<IsPackable>false</IsPackable>
<PackageId>Microsoft.KernelMemory.MemoryDb.AzureCosmosDBMongoDB</PackageId>
<Product>Azure Cosmos DB for MongoDB connector for Kernel Memory</Product>
<Description>Azure Cosmos DB for MongoDB connector for Microsoft Kernel Memory, to store and search memory using Azure AI Search vector indexing and semantic features.</Description>
<PackageTags>Memory, RAG, Kernel Memory, Azure Cosmos DB for MongoDB, HNSW, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, ETL</PackageTags>
</PropertyGroup>

<ItemGroup>
<None Include="..\README.md" Link="README.md" Pack="true" PackagePath="." Visible="false" />
</ItemGroup>

</Project>
Loading
Loading