-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
.Net: Add more steps to getting started project. (#9522)
### Motivation and Context #7606 ### Description - Add a non string key common code step - Add a generic data model step - Add a custom mapper step ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
- Loading branch information
Showing
5 changed files
with
424 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
195 changes: 195 additions & 0 deletions
195
dotnet/samples/GettingStartedWithVectorStores/Step4_NonStringKey_VectorStore.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System.Runtime.CompilerServices; | ||
using Microsoft.Extensions.VectorData; | ||
using Microsoft.SemanticKernel.Connectors.Qdrant; | ||
using Qdrant.Client; | ||
|
||
namespace GettingStartedWithVectorStores; | ||
|
||
/// <summary> | ||
/// Example that shows that you can switch between different vector stores with the same code, in this case | ||
/// with a vector store that doesn't use string keys. | ||
/// This sample demonstrates one possible approach, however it is also possible to use generics | ||
/// in the common code to achieve code reuse. | ||
/// </summary> | ||
public class Step4_NonStringKey_VectorStore(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture<VectorStoresFixture> | ||
{ | ||
/// <summary> | ||
/// Here we are going to use the same code that we used in <see cref="Step1_Ingest_Data"/> and <see cref="Step2_Vector_Search"/> | ||
/// but now with an <see cref="QdrantVectorStore"/>. | ||
/// Qdrant uses Guid or ulong as the key type, but the common code works with a string key. The string keys of the records created | ||
/// in <see cref="Step1_Ingest_Data"/> contain numbers though, so it's possible for us to convert them to ulong. | ||
/// In this example, we'll demonstrate how to do that. | ||
/// | ||
/// This example requires a Qdrant server up and running. To run a Qdrant server in a Docker container, use the following command: | ||
/// docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant:latest | ||
/// </summary> | ||
[Fact] | ||
public async Task UseAQdrantVectorStoreAsync() | ||
{ | ||
// Construct a Qdrant vector store collection. | ||
var collection = new QdrantVectorStoreRecordCollection<UlongGlossary>(new QdrantClient("localhost"), "skglossary"); | ||
|
||
// Wrap the collection using a decorator that allows us to expose a version that uses string keys, but internally | ||
// we convert to and from ulong. | ||
var stringKeyCollection = new MappingVectorStoreRecordCollection<string, ulong, Glossary, UlongGlossary>( | ||
collection, | ||
p => ulong.Parse(p), | ||
i => i.ToString(), | ||
p => new UlongGlossary { Key = ulong.Parse(p.Key), Category = p.Category, Term = p.Term, Definition = p.Definition, DefinitionEmbedding = p.DefinitionEmbedding }, | ||
i => new Glossary { Key = i.Key.ToString("D"), Category = i.Category, Term = i.Term, Definition = i.Definition, DefinitionEmbedding = i.DefinitionEmbedding }); | ||
|
||
// Ingest data into the collection using the same code as we used in Step1 with the InMemory Vector Store. | ||
await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(stringKeyCollection, fixture.TextEmbeddingGenerationService); | ||
|
||
// Search the vector store using the same code as we used in Step2 with the InMemory Vector Store. | ||
var searchResultItem = await Step2_Vector_Search.SearchVectorStoreAsync( | ||
stringKeyCollection, | ||
"What is an Application Programming Interface?", | ||
fixture.TextEmbeddingGenerationService); | ||
|
||
// Write the search result with its score to the console. | ||
Console.WriteLine(searchResultItem.Record.Definition); | ||
Console.WriteLine(searchResultItem.Score); | ||
} | ||
|
||
/// <summary> | ||
/// Data model that uses a ulong as the key type instead of a string. | ||
/// </summary> | ||
private sealed class UlongGlossary | ||
{ | ||
[VectorStoreRecordKey] | ||
public ulong Key { get; set; } | ||
|
||
[VectorStoreRecordData(IsFilterable = true)] | ||
public string Category { get; set; } | ||
|
||
[VectorStoreRecordData] | ||
public string Term { get; set; } | ||
|
||
[VectorStoreRecordData] | ||
public string Definition { get; set; } | ||
|
||
[VectorStoreRecordVector(Dimensions: 1536)] | ||
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; } | ||
} | ||
|
||
/// <summary> | ||
/// Simple decorator class that allows conversion of keys and records from one type to another. | ||
/// </summary> | ||
private sealed class MappingVectorStoreRecordCollection<TPublicKey, TInternalKey, TPublicRecord, TInternalRecord> : IVectorStoreRecordCollection<TPublicKey, TPublicRecord> | ||
where TPublicKey : notnull | ||
where TInternalKey : notnull | ||
{ | ||
private readonly IVectorStoreRecordCollection<TInternalKey, TInternalRecord> _collection; | ||
private readonly Func<TPublicKey, TInternalKey> _publicToInternalKeyMapper; | ||
private readonly Func<TInternalKey, TPublicKey> _internalToPublicKeyMapper; | ||
private readonly Func<TPublicRecord, TInternalRecord> _publicToInternalRecordMapper; | ||
private readonly Func<TInternalRecord, TPublicRecord> _internalToPublicRecordMapper; | ||
|
||
public MappingVectorStoreRecordCollection( | ||
IVectorStoreRecordCollection<TInternalKey, TInternalRecord> collection, | ||
Func<TPublicKey, TInternalKey> publicToInternalKeyMapper, | ||
Func<TInternalKey, TPublicKey> internalToPublicKeyMapper, | ||
Func<TPublicRecord, TInternalRecord> publicToInternalRecordMapper, | ||
Func<TInternalRecord, TPublicRecord> internalToPublicRecordMapper) | ||
{ | ||
this._collection = collection; | ||
this._publicToInternalKeyMapper = publicToInternalKeyMapper; | ||
this._internalToPublicKeyMapper = internalToPublicKeyMapper; | ||
this._publicToInternalRecordMapper = publicToInternalRecordMapper; | ||
this._internalToPublicRecordMapper = internalToPublicRecordMapper; | ||
} | ||
|
||
/// <inheritdoc /> | ||
public string CollectionName => this._collection.CollectionName; | ||
|
||
/// <inheritdoc /> | ||
public Task<bool> CollectionExistsAsync(CancellationToken cancellationToken = default) | ||
{ | ||
return this._collection.CollectionExistsAsync(cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public Task CreateCollectionAsync(CancellationToken cancellationToken = default) | ||
{ | ||
return this._collection.CreateCollectionAsync(cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) | ||
{ | ||
return this._collection.CreateCollectionIfNotExistsAsync(cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public Task DeleteAsync(TPublicKey key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
return this._collection.DeleteAsync(this._publicToInternalKeyMapper(key), options, cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public Task DeleteBatchAsync(IEnumerable<TPublicKey> keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
return this._collection.DeleteBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) | ||
{ | ||
return this._collection.DeleteCollectionAsync(cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public async Task<TPublicRecord?> GetAsync(TPublicKey key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
var internalRecord = await this._collection.GetAsync(this._publicToInternalKeyMapper(key), options, cancellationToken).ConfigureAwait(false); | ||
if (internalRecord == null) | ||
{ | ||
return default; | ||
} | ||
|
||
return this._internalToPublicRecordMapper(internalRecord); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public IAsyncEnumerable<TPublicRecord> GetBatchAsync(IEnumerable<TPublicKey> keys, GetRecordOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
var internalRecords = this._collection.GetBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken); | ||
return internalRecords.Select(this._internalToPublicRecordMapper); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public async Task<TPublicKey> UpsertAsync(TPublicRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
var internalRecord = this._publicToInternalRecordMapper(record); | ||
var internalKey = await this._collection.UpsertAsync(internalRecord, options, cancellationToken).ConfigureAwait(false); | ||
return this._internalToPublicKeyMapper(internalKey); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public async IAsyncEnumerable<TPublicKey> UpsertBatchAsync(IEnumerable<TPublicRecord> records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | ||
{ | ||
var internalRecords = records.Select(this._publicToInternalRecordMapper); | ||
var internalKeys = this._collection.UpsertBatchAsync(internalRecords, options, cancellationToken); | ||
await foreach (var internalKey in internalKeys.ConfigureAwait(false)) | ||
{ | ||
yield return this._internalToPublicKeyMapper(internalKey); | ||
} | ||
} | ||
|
||
/// <inheritdoc /> | ||
public async Task<VectorSearchResults<TPublicRecord>> VectorizedSearchAsync<TVector>(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
var searchResults = await this._collection.VectorizedSearchAsync(vector, options, cancellationToken).ConfigureAwait(false); | ||
var publicResultRecords = searchResults.Results.Select(result => new VectorSearchResult<TPublicRecord>(this._internalToPublicRecordMapper(result.Record), result.Score)); | ||
|
||
return new VectorSearchResults<TPublicRecord>(publicResultRecords) | ||
{ | ||
TotalCount = searchResults.TotalCount, | ||
Metadata = searchResults.Metadata, | ||
}; | ||
} | ||
} | ||
} |
78 changes: 78 additions & 0 deletions
78
dotnet/samples/GettingStartedWithVectorStores/Step5_Use_GenericDataModel.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.Extensions.VectorData; | ||
using Microsoft.SemanticKernel.Connectors.Redis; | ||
using Microsoft.SemanticKernel.Embeddings; | ||
using StackExchange.Redis; | ||
|
||
namespace GettingStartedWithVectorStores; | ||
|
||
/// <summary> | ||
/// Example that shows that you can use the generic data model to interact with a vector database. | ||
/// This makes it possible to use the vector store abstractions without having to create your own data model. | ||
/// </summary> | ||
public class Step5_Use_GenericDataModel(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture<VectorStoresFixture> | ||
{ | ||
/// <summary> | ||
/// Example showing how to query a vector store that uses the generic data model. | ||
/// | ||
/// This example requires a Redis server running on localhost:6379. To run a Redis server in a Docker container, use the following command: | ||
/// docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest | ||
/// </summary> | ||
[Fact] | ||
public async Task SearchAVectorStoreWithGenericDataModelAsync() | ||
{ | ||
// Construct a redis vector store. | ||
var vectorStore = new RedisVectorStore(ConnectionMultiplexer.Connect("localhost:6379").GetDatabase()); | ||
|
||
// First, let's use the code from step 1 to ingest data into the vector store | ||
// using the custom data model, simulating a scenario where someone else ingested | ||
// the data into the database previously. | ||
var collection = vectorStore.GetCollection<string, Glossary>("skglossary"); | ||
var customDataModelCollection = vectorStore.GetCollection<string, Glossary>("skglossary"); | ||
await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(customDataModelCollection, fixture.TextEmbeddingGenerationService); | ||
|
||
// To use the generic data model, we still have to describe the storage schema to the vector store | ||
// using a record definition. The benefit over a custom data model is that this definition | ||
// does not have to be known at compile time. | ||
// E.g. it can be read from a configuration or retrieved from a service. | ||
var recordDefinition = new VectorStoreRecordDefinition | ||
{ | ||
Properties = new List<VectorStoreRecordProperty> | ||
{ | ||
new VectorStoreRecordKeyProperty("Key", typeof(string)), | ||
new VectorStoreRecordDataProperty("Category", typeof(string)), | ||
new VectorStoreRecordDataProperty("Term", typeof(string)), | ||
new VectorStoreRecordDataProperty("Definition", typeof(string)), | ||
new VectorStoreRecordVectorProperty("DefinitionEmbedding", typeof(ReadOnlyMemory<float>)) { Dimensions = 1536 }, | ||
} | ||
}; | ||
|
||
// Now, let's create a collection that uses the generic data model. | ||
var genericDataModelCollection = vectorStore.GetCollection<string, VectorStoreGenericDataModel<string>>("skglossary", recordDefinition); | ||
|
||
// Generate an embedding from the search string. | ||
var searchString = "How do I provide additional context to an LLM?"; | ||
var searchVector = await fixture.TextEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); | ||
|
||
// Search the generic data model collection and get the single most relevant result. | ||
var searchResult = await genericDataModelCollection.VectorizedSearchAsync( | ||
searchVector, | ||
new() | ||
{ | ||
Top = 1, | ||
}); | ||
var searchResultItems = await searchResult.Results.ToListAsync(); | ||
|
||
// Write the search result with its score to the console. | ||
// Note that here we can loop through all the data properties | ||
// without knowing the schema, since the data properties are | ||
// stored as a dictionary of string keys and object values | ||
// when using the generic data model. | ||
foreach (var dataProperty in searchResultItems.First().Record.Data) | ||
{ | ||
Console.WriteLine($"{dataProperty.Key}: {dataProperty.Value}"); | ||
} | ||
Console.WriteLine(searchResultItems.First().Score); | ||
} | ||
} |
Oops, something went wrong.