Skip to content

Commit

Permalink
.Net: Add more steps to getting started project. (#9522)
Browse files Browse the repository at this point in the history
### Motivation and Context

#7606

### Description

- Add a non string key common code step
- Add a generic data model step
- Add a custom mapper step

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄
  • Loading branch information
westey-m authored Nov 5, 2024
1 parent 7add1cb commit 7457c50
Show file tree
Hide file tree
Showing 5 changed files with 424 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
<ProjectReference Include="..\..\src\Connectors\Connectors.AzureOpenAI\Connectors.AzureOpenAI.csproj" />
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.AzureAISearch\Connectors.Memory.AzureAISearch.csproj" />
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.InMemory\Connectors.Memory.InMemory.csproj" />
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.Qdrant\Connectors.Memory.Qdrant.csproj" />
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.Redis\Connectors.Memory.Redis.csproj" />
<ProjectReference Include="..\..\src\SemanticKernel.Abstractions\SemanticKernel.Abstractions.csproj" />
<ProjectReference Include="..\..\src\SemanticKernel.Core\SemanticKernel.Core.csproj" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class Step3_Switch_VectorStore(ITestOutputHelper output, VectorStoresFixt
[Fact]
public async Task UseAnAzureAISearchVectorStoreAsync()
{
// Construct a Redis vector store and get the collection.
// Construct an Azure AI Search vector store and get the collection.
var vectorStore = new AzureAISearchVectorStore(new SearchIndexClient(
new Uri(TestConfiguration.AzureAISearch.Endpoint),
new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey)));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Runtime.CompilerServices;
using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using Qdrant.Client;

namespace GettingStartedWithVectorStores;

/// <summary>
/// Example that shows that you can switch between different vector stores with the same code, in this case
/// with a vector store that doesn't use string keys.
/// This sample demonstrates one possible approach, however it is also possible to use generics
/// in the common code to achieve code reuse.
/// </summary>
public class Step4_NonStringKey_VectorStore(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture<VectorStoresFixture>
{
/// <summary>
/// Here we are going to use the same code that we used in <see cref="Step1_Ingest_Data"/> and <see cref="Step2_Vector_Search"/>
/// but now with an <see cref="QdrantVectorStore"/>.
/// Qdrant uses Guid or ulong as the key type, but the common code works with a string key. The string keys of the records created
/// in <see cref="Step1_Ingest_Data"/> contain numbers though, so it's possible for us to convert them to ulong.
/// In this example, we'll demonstrate how to do that.
///
/// This example requires a Qdrant server up and running. To run a Qdrant server in a Docker container, use the following command:
/// docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant:latest
/// </summary>
[Fact]
public async Task UseAQdrantVectorStoreAsync()
{
// Construct a Qdrant vector store collection.
var collection = new QdrantVectorStoreRecordCollection<UlongGlossary>(new QdrantClient("localhost"), "skglossary");

// Wrap the collection using a decorator that allows us to expose a version that uses string keys, but internally
// we convert to and from ulong.
var stringKeyCollection = new MappingVectorStoreRecordCollection<string, ulong, Glossary, UlongGlossary>(
collection,
p => ulong.Parse(p),
i => i.ToString(),
p => new UlongGlossary { Key = ulong.Parse(p.Key), Category = p.Category, Term = p.Term, Definition = p.Definition, DefinitionEmbedding = p.DefinitionEmbedding },
i => new Glossary { Key = i.Key.ToString("D"), Category = i.Category, Term = i.Term, Definition = i.Definition, DefinitionEmbedding = i.DefinitionEmbedding });

// Ingest data into the collection using the same code as we used in Step1 with the InMemory Vector Store.
await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(stringKeyCollection, fixture.TextEmbeddingGenerationService);

// Search the vector store using the same code as we used in Step2 with the InMemory Vector Store.
var searchResultItem = await Step2_Vector_Search.SearchVectorStoreAsync(
stringKeyCollection,
"What is an Application Programming Interface?",
fixture.TextEmbeddingGenerationService);

// Write the search result with its score to the console.
Console.WriteLine(searchResultItem.Record.Definition);
Console.WriteLine(searchResultItem.Score);
}

/// <summary>
/// Data model that uses a ulong as the key type instead of a string.
/// </summary>
private sealed class UlongGlossary
{
[VectorStoreRecordKey]
public ulong Key { get; set; }

[VectorStoreRecordData(IsFilterable = true)]
public string Category { get; set; }

[VectorStoreRecordData]
public string Term { get; set; }

[VectorStoreRecordData]
public string Definition { get; set; }

[VectorStoreRecordVector(Dimensions: 1536)]
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; }
}

/// <summary>
/// Simple decorator class that allows conversion of keys and records from one type to another.
/// </summary>
private sealed class MappingVectorStoreRecordCollection<TPublicKey, TInternalKey, TPublicRecord, TInternalRecord> : IVectorStoreRecordCollection<TPublicKey, TPublicRecord>
where TPublicKey : notnull
where TInternalKey : notnull
{
private readonly IVectorStoreRecordCollection<TInternalKey, TInternalRecord> _collection;
private readonly Func<TPublicKey, TInternalKey> _publicToInternalKeyMapper;
private readonly Func<TInternalKey, TPublicKey> _internalToPublicKeyMapper;
private readonly Func<TPublicRecord, TInternalRecord> _publicToInternalRecordMapper;
private readonly Func<TInternalRecord, TPublicRecord> _internalToPublicRecordMapper;

public MappingVectorStoreRecordCollection(
IVectorStoreRecordCollection<TInternalKey, TInternalRecord> collection,
Func<TPublicKey, TInternalKey> publicToInternalKeyMapper,
Func<TInternalKey, TPublicKey> internalToPublicKeyMapper,
Func<TPublicRecord, TInternalRecord> publicToInternalRecordMapper,
Func<TInternalRecord, TPublicRecord> internalToPublicRecordMapper)
{
this._collection = collection;
this._publicToInternalKeyMapper = publicToInternalKeyMapper;
this._internalToPublicKeyMapper = internalToPublicKeyMapper;
this._publicToInternalRecordMapper = publicToInternalRecordMapper;
this._internalToPublicRecordMapper = internalToPublicRecordMapper;
}

/// <inheritdoc />
public string CollectionName => this._collection.CollectionName;

/// <inheritdoc />
public Task<bool> CollectionExistsAsync(CancellationToken cancellationToken = default)
{
return this._collection.CollectionExistsAsync(cancellationToken);
}

/// <inheritdoc />
public Task CreateCollectionAsync(CancellationToken cancellationToken = default)
{
return this._collection.CreateCollectionAsync(cancellationToken);
}

/// <inheritdoc />
public Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default)
{
return this._collection.CreateCollectionIfNotExistsAsync(cancellationToken);
}

/// <inheritdoc />
public Task DeleteAsync(TPublicKey key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default)
{
return this._collection.DeleteAsync(this._publicToInternalKeyMapper(key), options, cancellationToken);
}

/// <inheritdoc />
public Task DeleteBatchAsync(IEnumerable<TPublicKey> keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default)
{
return this._collection.DeleteBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken);
}

/// <inheritdoc />
public Task DeleteCollectionAsync(CancellationToken cancellationToken = default)
{
return this._collection.DeleteCollectionAsync(cancellationToken);
}

/// <inheritdoc />
public async Task<TPublicRecord?> GetAsync(TPublicKey key, GetRecordOptions? options = null, CancellationToken cancellationToken = default)
{
var internalRecord = await this._collection.GetAsync(this._publicToInternalKeyMapper(key), options, cancellationToken).ConfigureAwait(false);
if (internalRecord == null)
{
return default;
}

return this._internalToPublicRecordMapper(internalRecord);
}

/// <inheritdoc />
public IAsyncEnumerable<TPublicRecord> GetBatchAsync(IEnumerable<TPublicKey> keys, GetRecordOptions? options = null, CancellationToken cancellationToken = default)
{
var internalRecords = this._collection.GetBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken);
return internalRecords.Select(this._internalToPublicRecordMapper);
}

/// <inheritdoc />
public async Task<TPublicKey> UpsertAsync(TPublicRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default)
{
var internalRecord = this._publicToInternalRecordMapper(record);
var internalKey = await this._collection.UpsertAsync(internalRecord, options, cancellationToken).ConfigureAwait(false);
return this._internalToPublicKeyMapper(internalKey);
}

/// <inheritdoc />
public async IAsyncEnumerable<TPublicKey> UpsertBatchAsync(IEnumerable<TPublicRecord> records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var internalRecords = records.Select(this._publicToInternalRecordMapper);
var internalKeys = this._collection.UpsertBatchAsync(internalRecords, options, cancellationToken);
await foreach (var internalKey in internalKeys.ConfigureAwait(false))
{
yield return this._internalToPublicKeyMapper(internalKey);
}
}

/// <inheritdoc />
public async Task<VectorSearchResults<TPublicRecord>> VectorizedSearchAsync<TVector>(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default)
{
var searchResults = await this._collection.VectorizedSearchAsync(vector, options, cancellationToken).ConfigureAwait(false);
var publicResultRecords = searchResults.Results.Select(result => new VectorSearchResult<TPublicRecord>(this._internalToPublicRecordMapper(result.Record), result.Score));

return new VectorSearchResults<TPublicRecord>(publicResultRecords)
{
TotalCount = searchResults.TotalCount,
Metadata = searchResults.Metadata,
};
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.Extensions.VectorData;
using Microsoft.SemanticKernel.Connectors.Redis;
using Microsoft.SemanticKernel.Embeddings;
using StackExchange.Redis;

namespace GettingStartedWithVectorStores;

/// <summary>
/// Example that shows that you can use the generic data model to interact with a vector database.
/// This makes it possible to use the vector store abstractions without having to create your own data model.
/// </summary>
public class Step5_Use_GenericDataModel(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture<VectorStoresFixture>
{
/// <summary>
/// Example showing how to query a vector store that uses the generic data model.
///
/// This example requires a Redis server running on localhost:6379. To run a Redis server in a Docker container, use the following command:
/// docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
/// </summary>
[Fact]
public async Task SearchAVectorStoreWithGenericDataModelAsync()
{
// Construct a redis vector store.
var vectorStore = new RedisVectorStore(ConnectionMultiplexer.Connect("localhost:6379").GetDatabase());

// First, let's use the code from step 1 to ingest data into the vector store
// using the custom data model, simulating a scenario where someone else ingested
// the data into the database previously.
var collection = vectorStore.GetCollection<string, Glossary>("skglossary");
var customDataModelCollection = vectorStore.GetCollection<string, Glossary>("skglossary");
await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(customDataModelCollection, fixture.TextEmbeddingGenerationService);

// To use the generic data model, we still have to describe the storage schema to the vector store
// using a record definition. The benefit over a custom data model is that this definition
// does not have to be known at compile time.
// E.g. it can be read from a configuration or retrieved from a service.
var recordDefinition = new VectorStoreRecordDefinition
{
Properties = new List<VectorStoreRecordProperty>
{
new VectorStoreRecordKeyProperty("Key", typeof(string)),
new VectorStoreRecordDataProperty("Category", typeof(string)),
new VectorStoreRecordDataProperty("Term", typeof(string)),
new VectorStoreRecordDataProperty("Definition", typeof(string)),
new VectorStoreRecordVectorProperty("DefinitionEmbedding", typeof(ReadOnlyMemory<float>)) { Dimensions = 1536 },
}
};

// Now, let's create a collection that uses the generic data model.
var genericDataModelCollection = vectorStore.GetCollection<string, VectorStoreGenericDataModel<string>>("skglossary", recordDefinition);

// Generate an embedding from the search string.
var searchString = "How do I provide additional context to an LLM?";
var searchVector = await fixture.TextEmbeddingGenerationService.GenerateEmbeddingAsync(searchString);

// Search the generic data model collection and get the single most relevant result.
var searchResult = await genericDataModelCollection.VectorizedSearchAsync(
searchVector,
new()
{
Top = 1,
});
var searchResultItems = await searchResult.Results.ToListAsync();

// Write the search result with its score to the console.
// Note that here we can loop through all the data properties
// without knowing the schema, since the data properties are
// stored as a dictionary of string keys and object values
// when using the generic data model.
foreach (var dataProperty in searchResultItems.First().Record.Data)
{
Console.WriteLine($"{dataProperty.Key}: {dataProperty.Value}");
}
Console.WriteLine(searchResultItems.First().Score);
}
}
Loading

0 comments on commit 7457c50

Please sign in to comment.