From 7457c50bc752f51957306fe1e2542038d2d2504f Mon Sep 17 00:00:00 2001
From: westey <164392973+westey-m@users.noreply.github.com>
Date: Tue, 5 Nov 2024 10:30:05 +0000
Subject: [PATCH] .Net: Add more steps to getting started project. (#9522)
### Motivation and Context
#7606
### Description
- Add a non string key common code step
- Add a generic data model step
- Add a custom mapper step
### Contribution Checklist
- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---
.../GettingStartedWithVectorStores.csproj | 1 +
.../Step3_Switch_VectorStore.cs | 2 +-
.../Step4_NonStringKey_VectorStore.cs | 195 ++++++++++++++++++
.../Step5_Use_GenericDataModel.cs | 78 +++++++
.../Step6_Use_CustomMapper.cs | 149 +++++++++++++
5 files changed, 424 insertions(+), 1 deletion(-)
create mode 100644 dotnet/samples/GettingStartedWithVectorStores/Step4_NonStringKey_VectorStore.cs
create mode 100644 dotnet/samples/GettingStartedWithVectorStores/Step5_Use_GenericDataModel.cs
create mode 100644 dotnet/samples/GettingStartedWithVectorStores/Step6_Use_CustomMapper.cs
diff --git a/dotnet/samples/GettingStartedWithVectorStores/GettingStartedWithVectorStores.csproj b/dotnet/samples/GettingStartedWithVectorStores/GettingStartedWithVectorStores.csproj
index 1e95a8187551..7a33e7c2fa3b 100644
--- a/dotnet/samples/GettingStartedWithVectorStores/GettingStartedWithVectorStores.csproj
+++ b/dotnet/samples/GettingStartedWithVectorStores/GettingStartedWithVectorStores.csproj
@@ -42,6 +42,7 @@
+
diff --git a/dotnet/samples/GettingStartedWithVectorStores/Step3_Switch_VectorStore.cs b/dotnet/samples/GettingStartedWithVectorStores/Step3_Switch_VectorStore.cs
index 9255b51b78ea..cc6c7443968c 100644
--- a/dotnet/samples/GettingStartedWithVectorStores/Step3_Switch_VectorStore.cs
+++ b/dotnet/samples/GettingStartedWithVectorStores/Step3_Switch_VectorStore.cs
@@ -22,7 +22,7 @@ public class Step3_Switch_VectorStore(ITestOutputHelper output, VectorStoresFixt
[Fact]
public async Task UseAnAzureAISearchVectorStoreAsync()
{
- // Construct a Redis vector store and get the collection.
+ // Construct an Azure AI Search vector store and get the collection.
var vectorStore = new AzureAISearchVectorStore(new SearchIndexClient(
new Uri(TestConfiguration.AzureAISearch.Endpoint),
new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey)));
diff --git a/dotnet/samples/GettingStartedWithVectorStores/Step4_NonStringKey_VectorStore.cs b/dotnet/samples/GettingStartedWithVectorStores/Step4_NonStringKey_VectorStore.cs
new file mode 100644
index 000000000000..906df16d84a1
--- /dev/null
+++ b/dotnet/samples/GettingStartedWithVectorStores/Step4_NonStringKey_VectorStore.cs
@@ -0,0 +1,195 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System.Runtime.CompilerServices;
+using Microsoft.Extensions.VectorData;
+using Microsoft.SemanticKernel.Connectors.Qdrant;
+using Qdrant.Client;
+
+namespace GettingStartedWithVectorStores;
+
+///
+/// Example that shows that you can switch between different vector stores with the same code, in this case
+/// with a vector store that doesn't use string keys.
+/// This sample demonstrates one possible approach, however it is also possible to use generics
+/// in the common code to achieve code reuse.
+///
+public class Step4_NonStringKey_VectorStore(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture
+{
+ ///
+ /// Here we are going to use the same code that we used in and
+ /// but now with an .
+ /// Qdrant uses Guid or ulong as the key type, but the common code works with a string key. The string keys of the records created
+ /// in contain numbers though, so it's possible for us to convert them to ulong.
+ /// In this example, we'll demonstrate how to do that.
+ ///
+ /// This example requires a Qdrant server up and running. To run a Qdrant server in a Docker container, use the following command:
+ /// docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant:latest
+ ///
+ [Fact]
+ public async Task UseAQdrantVectorStoreAsync()
+ {
+ // Construct a Qdrant vector store collection.
+ var collection = new QdrantVectorStoreRecordCollection(new QdrantClient("localhost"), "skglossary");
+
+ // Wrap the collection using a decorator that allows us to expose a version that uses string keys, but internally
+ // we convert to and from ulong.
+ var stringKeyCollection = new MappingVectorStoreRecordCollection(
+ collection,
+ p => ulong.Parse(p),
+ i => i.ToString(),
+ p => new UlongGlossary { Key = ulong.Parse(p.Key), Category = p.Category, Term = p.Term, Definition = p.Definition, DefinitionEmbedding = p.DefinitionEmbedding },
+ i => new Glossary { Key = i.Key.ToString("D"), Category = i.Category, Term = i.Term, Definition = i.Definition, DefinitionEmbedding = i.DefinitionEmbedding });
+
+ // Ingest data into the collection using the same code as we used in Step1 with the InMemory Vector Store.
+ await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(stringKeyCollection, fixture.TextEmbeddingGenerationService);
+
+ // Search the vector store using the same code as we used in Step2 with the InMemory Vector Store.
+ var searchResultItem = await Step2_Vector_Search.SearchVectorStoreAsync(
+ stringKeyCollection,
+ "What is an Application Programming Interface?",
+ fixture.TextEmbeddingGenerationService);
+
+ // Write the search result with its score to the console.
+ Console.WriteLine(searchResultItem.Record.Definition);
+ Console.WriteLine(searchResultItem.Score);
+ }
+
+ ///
+ /// Data model that uses a ulong as the key type instead of a string.
+ ///
+ private sealed class UlongGlossary
+ {
+ [VectorStoreRecordKey]
+ public ulong Key { get; set; }
+
+ [VectorStoreRecordData(IsFilterable = true)]
+ public string Category { get; set; }
+
+ [VectorStoreRecordData]
+ public string Term { get; set; }
+
+ [VectorStoreRecordData]
+ public string Definition { get; set; }
+
+ [VectorStoreRecordVector(Dimensions: 1536)]
+ public ReadOnlyMemory DefinitionEmbedding { get; set; }
+ }
+
+ ///
+ /// Simple decorator class that allows conversion of keys and records from one type to another.
+ ///
+ private sealed class MappingVectorStoreRecordCollection : IVectorStoreRecordCollection
+ where TPublicKey : notnull
+ where TInternalKey : notnull
+ {
+ private readonly IVectorStoreRecordCollection _collection;
+ private readonly Func _publicToInternalKeyMapper;
+ private readonly Func _internalToPublicKeyMapper;
+ private readonly Func _publicToInternalRecordMapper;
+ private readonly Func _internalToPublicRecordMapper;
+
+ public MappingVectorStoreRecordCollection(
+ IVectorStoreRecordCollection collection,
+ Func publicToInternalKeyMapper,
+ Func internalToPublicKeyMapper,
+ Func publicToInternalRecordMapper,
+ Func internalToPublicRecordMapper)
+ {
+ this._collection = collection;
+ this._publicToInternalKeyMapper = publicToInternalKeyMapper;
+ this._internalToPublicKeyMapper = internalToPublicKeyMapper;
+ this._publicToInternalRecordMapper = publicToInternalRecordMapper;
+ this._internalToPublicRecordMapper = internalToPublicRecordMapper;
+ }
+
+ ///
+ public string CollectionName => this._collection.CollectionName;
+
+ ///
+ public Task CollectionExistsAsync(CancellationToken cancellationToken = default)
+ {
+ return this._collection.CollectionExistsAsync(cancellationToken);
+ }
+
+ ///
+ public Task CreateCollectionAsync(CancellationToken cancellationToken = default)
+ {
+ return this._collection.CreateCollectionAsync(cancellationToken);
+ }
+
+ ///
+ public Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default)
+ {
+ return this._collection.CreateCollectionIfNotExistsAsync(cancellationToken);
+ }
+
+ ///
+ public Task DeleteAsync(TPublicKey key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ return this._collection.DeleteAsync(this._publicToInternalKeyMapper(key), options, cancellationToken);
+ }
+
+ ///
+ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ return this._collection.DeleteBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken);
+ }
+
+ ///
+ public Task DeleteCollectionAsync(CancellationToken cancellationToken = default)
+ {
+ return this._collection.DeleteCollectionAsync(cancellationToken);
+ }
+
+ ///
+ public async Task GetAsync(TPublicKey key, GetRecordOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ var internalRecord = await this._collection.GetAsync(this._publicToInternalKeyMapper(key), options, cancellationToken).ConfigureAwait(false);
+ if (internalRecord == null)
+ {
+ return default;
+ }
+
+ return this._internalToPublicRecordMapper(internalRecord);
+ }
+
+ ///
+ public IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ var internalRecords = this._collection.GetBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken);
+ return internalRecords.Select(this._internalToPublicRecordMapper);
+ }
+
+ ///
+ public async Task UpsertAsync(TPublicRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ var internalRecord = this._publicToInternalRecordMapper(record);
+ var internalKey = await this._collection.UpsertAsync(internalRecord, options, cancellationToken).ConfigureAwait(false);
+ return this._internalToPublicKeyMapper(internalKey);
+ }
+
+ ///
+ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ var internalRecords = records.Select(this._publicToInternalRecordMapper);
+ var internalKeys = this._collection.UpsertBatchAsync(internalRecords, options, cancellationToken);
+ await foreach (var internalKey in internalKeys.ConfigureAwait(false))
+ {
+ yield return this._internalToPublicKeyMapper(internalKey);
+ }
+ }
+
+ ///
+ public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ var searchResults = await this._collection.VectorizedSearchAsync(vector, options, cancellationToken).ConfigureAwait(false);
+ var publicResultRecords = searchResults.Results.Select(result => new VectorSearchResult(this._internalToPublicRecordMapper(result.Record), result.Score));
+
+ return new VectorSearchResults(publicResultRecords)
+ {
+ TotalCount = searchResults.TotalCount,
+ Metadata = searchResults.Metadata,
+ };
+ }
+ }
+}
diff --git a/dotnet/samples/GettingStartedWithVectorStores/Step5_Use_GenericDataModel.cs b/dotnet/samples/GettingStartedWithVectorStores/Step5_Use_GenericDataModel.cs
new file mode 100644
index 000000000000..449daf1c19b1
--- /dev/null
+++ b/dotnet/samples/GettingStartedWithVectorStores/Step5_Use_GenericDataModel.cs
@@ -0,0 +1,78 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.Extensions.VectorData;
+using Microsoft.SemanticKernel.Connectors.Redis;
+using Microsoft.SemanticKernel.Embeddings;
+using StackExchange.Redis;
+
+namespace GettingStartedWithVectorStores;
+
+///
+/// Example that shows that you can use the generic data model to interact with a vector database.
+/// This makes it possible to use the vector store abstractions without having to create your own data model.
+///
+public class Step5_Use_GenericDataModel(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture
+{
+ ///
+ /// Example showing how to query a vector store that uses the generic data model.
+ ///
+ /// This example requires a Redis server running on localhost:6379. To run a Redis server in a Docker container, use the following command:
+ /// docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
+ ///
+ [Fact]
+ public async Task SearchAVectorStoreWithGenericDataModelAsync()
+ {
+ // Construct a redis vector store.
+ var vectorStore = new RedisVectorStore(ConnectionMultiplexer.Connect("localhost:6379").GetDatabase());
+
+ // First, let's use the code from step 1 to ingest data into the vector store
+ // using the custom data model, simulating a scenario where someone else ingested
+ // the data into the database previously.
+ var collection = vectorStore.GetCollection("skglossary");
+ var customDataModelCollection = vectorStore.GetCollection("skglossary");
+ await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(customDataModelCollection, fixture.TextEmbeddingGenerationService);
+
+ // To use the generic data model, we still have to describe the storage schema to the vector store
+ // using a record definition. The benefit over a custom data model is that this definition
+ // does not have to be known at compile time.
+ // E.g. it can be read from a configuration or retrieved from a service.
+ var recordDefinition = new VectorStoreRecordDefinition
+ {
+ Properties = new List
+ {
+ new VectorStoreRecordKeyProperty("Key", typeof(string)),
+ new VectorStoreRecordDataProperty("Category", typeof(string)),
+ new VectorStoreRecordDataProperty("Term", typeof(string)),
+ new VectorStoreRecordDataProperty("Definition", typeof(string)),
+ new VectorStoreRecordVectorProperty("DefinitionEmbedding", typeof(ReadOnlyMemory)) { Dimensions = 1536 },
+ }
+ };
+
+ // Now, let's create a collection that uses the generic data model.
+ var genericDataModelCollection = vectorStore.GetCollection>("skglossary", recordDefinition);
+
+ // Generate an embedding from the search string.
+ var searchString = "How do I provide additional context to an LLM?";
+ var searchVector = await fixture.TextEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);
+
+ // Search the generic data model collection and get the single most relevant result.
+ var searchResult = await genericDataModelCollection.VectorizedSearchAsync(
+ searchVector,
+ new()
+ {
+ Top = 1,
+ });
+ var searchResultItems = await searchResult.Results.ToListAsync();
+
+ // Write the search result with its score to the console.
+ // Note that here we can loop through all the data properties
+ // without knowing the schema, since the data properties are
+ // stored as a dictionary of string keys and object values
+ // when using the generic data model.
+ foreach (var dataProperty in searchResultItems.First().Record.Data)
+ {
+ Console.WriteLine($"{dataProperty.Key}: {dataProperty.Value}");
+ }
+ Console.WriteLine(searchResultItems.First().Score);
+ }
+}
diff --git a/dotnet/samples/GettingStartedWithVectorStores/Step6_Use_CustomMapper.cs b/dotnet/samples/GettingStartedWithVectorStores/Step6_Use_CustomMapper.cs
new file mode 100644
index 000000000000..cc86a773b0c0
--- /dev/null
+++ b/dotnet/samples/GettingStartedWithVectorStores/Step6_Use_CustomMapper.cs
@@ -0,0 +1,149 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System.Text.Json;
+using System.Text.Json.Nodes;
+using Azure;
+using Azure.Search.Documents.Indexes;
+using Microsoft.Extensions.VectorData;
+using Microsoft.SemanticKernel.Connectors.AzureAISearch;
+using Microsoft.SemanticKernel.Embeddings;
+
+namespace GettingStartedWithVectorStores;
+
+///
+/// Example that shows how you can use custom mappers if you wish the data model and storage schema to differ.
+///
+public class Step6_Use_CustomMapper(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture
+{
+ ///
+ /// Example showing how to upsert and query records when using a custom mapper if you wish
+ /// the data model and storage schema to differ.
+ ///
+ /// This example requires an Azure AI Search service to be available.
+ ///
+ [Fact]
+ public async Task UseCustomMapperAsync()
+ {
+ // When using a custom mapper, we still have to describe the storage schema to the vector store
+ // using a record definition. Since the storage schema does not match the data model
+ // it won't make sense for the vector store to infer the schema from the data model.
+ var recordDefinition = new VectorStoreRecordDefinition
+ {
+ Properties = new List
+ {
+ new VectorStoreRecordKeyProperty("Key", typeof(string)),
+ new VectorStoreRecordDataProperty("Category", typeof(string)),
+ new VectorStoreRecordDataProperty("Term", typeof(string)),
+ new VectorStoreRecordDataProperty("Definition", typeof(string)),
+ new VectorStoreRecordVectorProperty("DefinitionEmbedding", typeof(ReadOnlyMemory)) { Dimensions = 1536 },
+ }
+ };
+
+ // Construct an Azure AI Search vector store collection and
+ // pass in the custom mapper and record definition.
+ var collection = new AzureAISearchVectorStoreRecordCollection(
+ new SearchIndexClient(
+ new Uri(TestConfiguration.AzureAISearch.Endpoint),
+ new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey)),
+ "skglossary",
+ new()
+ {
+ JsonObjectCustomMapper = new CustomMapper(),
+ VectorStoreRecordDefinition = recordDefinition
+ });
+
+ // Create the collection if it doesn't exist.
+ // This call will use the schena defined by the record definition
+ // above for creating the collection.
+ await collection.CreateCollectionIfNotExistsAsync();
+
+ // Now we can upsert a record using
+ // the data model, even though it doesn't match the storage schema.
+ var definition = "A set of rules and protocols that allows one software application to interact with another.";
+ await collection.UpsertAsync(new ComplexGlossary
+ {
+ Key = "1",
+ Metadata = new Metadata
+ {
+ Category = "API",
+ Term = "Application Programming Interface"
+ },
+ Definition = definition,
+ DefinitionEmbedding = await fixture.TextEmbeddingGenerationService.GenerateEmbeddingAsync(definition)
+ });
+
+ // Generate an embedding from the search string.
+ var searchVector = await fixture.TextEmbeddingGenerationService.GenerateEmbeddingAsync("How do two software applications interact with another?");
+
+ // Search the vector store.
+ var searchResult = await collection.VectorizedSearchAsync(
+ searchVector,
+ new()
+ {
+ Top = 1
+ });
+ var searchResultItem = await searchResult.Results.FirstAsync();
+
+ // Write the search result with its score to the console.
+ Console.WriteLine(searchResultItem.Record.Metadata.Term);
+ Console.WriteLine(searchResultItem.Record.Definition);
+ Console.WriteLine(searchResultItem.Score);
+ }
+
+ ///
+ /// Sample mapper class that maps between the custom data model
+ /// and the that should match the storage schema.
+ ///
+ private sealed class CustomMapper : IVectorStoreRecordMapper
+ {
+ public JsonObject MapFromDataToStorageModel(ComplexGlossary dataModel)
+ {
+ return new JsonObject
+ {
+ ["Key"] = dataModel.Key,
+ ["Category"] = dataModel.Metadata.Category,
+ ["Term"] = dataModel.Metadata.Term,
+ ["Definition"] = dataModel.Definition,
+ ["DefinitionEmbedding"] = JsonSerializer.SerializeToNode(dataModel.DefinitionEmbedding.ToArray())
+ };
+ }
+
+ public ComplexGlossary MapFromStorageToDataModel(JsonObject storageModel, StorageToDataModelMapperOptions options)
+ {
+ return new ComplexGlossary
+ {
+ Key = storageModel["Key"]!.ToString(),
+ Metadata = new Metadata
+ {
+ Category = storageModel["Category"]!.ToString(),
+ Term = storageModel["Term"]!.ToString()
+ },
+ Definition = storageModel["Definition"]!.ToString(),
+ DefinitionEmbedding = JsonSerializer.Deserialize>(storageModel["DefinitionEmbedding"])
+ };
+ }
+ }
+
+ ///
+ /// Sample model class that represents a glossary entry.
+ /// This model differs from the model used in previous steps by having a complex property
+ /// that contains the category and term.
+ ///
+ private sealed class ComplexGlossary
+ {
+ public string Key { get; set; }
+
+ public Metadata Metadata { get; set; }
+
+ public string Definition { get; set; }
+
+ public ReadOnlyMemory DefinitionEmbedding { get; set; }
+ }
+
+ private sealed class Metadata
+ {
+ public string Category { get; set; }
+
+ public string Term { get; set; }
+ }
+}