diff --git a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs index 2d9159448419..78afa994a700 100644 --- a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs +++ b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs @@ -56,4 +56,28 @@ public async Task LocalImageAsync() Console.WriteLine(reply.Content); } + + [Fact] + public async Task LocalImageWithImageDetailInMetadataAsync() + { + var imageBytes = await EmbeddedResource.ReadAllAsync("sample_image.jpg"); + + var kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion("gpt-4-vision-preview", TestConfiguration.OpenAI.ApiKey) + .Build(); + + var chatCompletionService = kernel.GetRequiredService(); + + var chatHistory = new ChatHistory("You are a friendly assistant."); + + chatHistory.AddUserMessage( + [ + new TextContent("What’s in this image?"), + new ImageContent(imageBytes, "image/jpg") { Metadata = new Dictionary { ["ChatImageDetailLevel"] = "high" } } + ]); + + var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory); + + Console.WriteLine(reply.Content); + } } diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs index 3253c05b9ff4..74360e542358 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs @@ -642,6 +642,69 @@ public async Task GetChatMessageContentsWithChatMessageContentItemCollectionAndS Assert.Equal("image_url", contentItems[1].GetProperty("type").GetString()); } + [Theory] + [MemberData(nameof(ImageContentMetadataDetailLevelData))] + public async Task GetChatMessageContentsHandlesImageDetailLevelInMetadataCorrectlyAsync(object? detailLevel, string? expectedDetailLevel) + { + // Arrange + var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4-vision-preview", apiKey: "NOKEY", httpClient: this._httpClient); + + using var response = new HttpResponseMessage(System.Net.HttpStatusCode.OK) { Content = new StringContent(ChatCompletionResponse) }; + this._messageHandlerStub.ResponseToReturn = response; + + var chatHistory = new ChatHistory(); + chatHistory.AddUserMessage( + [ + new ImageContent(new Uri("https://image")) { Metadata = new Dictionary { ["ChatImageDetailLevel"] = detailLevel } } + ]); + + // Act + await chatCompletion.GetChatMessageContentsAsync(chatHistory); + + // Assert + var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!); + Assert.NotNull(actualRequestContent); + var optionsJson = JsonSerializer.Deserialize(actualRequestContent); + + var messages = optionsJson.GetProperty("messages"); + + Assert.Equal(1, messages.GetArrayLength()); + + var contentItems = messages[0].GetProperty("content"); + Assert.Equal(1, contentItems.GetArrayLength()); + + Assert.Equal("image_url", contentItems[0].GetProperty("type").GetString()); + + var imageProperty = contentItems[0].GetProperty("image_url"); + + Assert.Equal("https://image/", imageProperty.GetProperty("url").GetString()); + + if (detailLevel is null || (detailLevel is string detailLevelString && string.IsNullOrWhiteSpace(detailLevelString))) + { + Assert.False(imageProperty.TryGetProperty("detail", out _)); + } + else + { + Assert.Equal(expectedDetailLevel, imageProperty.GetProperty("detail").GetString()); + } + } + + [Fact] + public async Task GetChatMessageContentsThrowsExceptionWithInvalidImageDetailLevelInMetadataAsync() + { + // Arrange + var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4-vision-preview", apiKey: "NOKEY", httpClient: this._httpClient); + + var chatHistory = new ChatHistory(); + chatHistory.AddUserMessage( + [ + new ImageContent(new Uri("https://image")) { Metadata = new Dictionary { ["ChatImageDetailLevel"] = "invalid_value" } } + ]); + + // Act & Assert + await Assert.ThrowsAsync(() => chatCompletion.GetChatMessageContentsAsync(chatHistory)); + } + [Fact] public async Task FunctionCallsShouldBePropagatedToCallersViaChatMessageItemsOfTypeFunctionCallContentAsync() { @@ -1558,6 +1621,15 @@ public async Task OnAutoFunctionInvocationAsync(AutoFunctionInvocationContext co } """; + public static TheoryData ImageContentMetadataDetailLevelData => new() + { + { "auto", "auto" }, + { "high", "high" }, + { "low", "low" }, + { "", null }, + { null, null } + }; + #pragma warning disable CS8618, CA1812 private sealed class MathReasoning { diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs index d1813fbd6bd9..7e7c10767542 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs @@ -802,19 +802,44 @@ private static List CreateRequestMessages(ChatMessageContent messag private static ChatMessageContentPart GetImageContentItem(ImageContent imageContent) { + ChatImageDetailLevel? detailLevel = GetChatImageDetailLevel(imageContent); + if (imageContent.Data is { IsEmpty: false } data) { - return ChatMessageContentPart.CreateImagePart(BinaryData.FromBytes(data), imageContent.MimeType); + return ChatMessageContentPart.CreateImagePart(BinaryData.FromBytes(data), imageContent.MimeType, detailLevel); } if (imageContent.Uri is not null) { - return ChatMessageContentPart.CreateImagePart(imageContent.Uri); + return ChatMessageContentPart.CreateImagePart(imageContent.Uri, detailLevel); } throw new ArgumentException($"{nameof(ImageContent)} must have either Data or a Uri."); } + private static ChatImageDetailLevel? GetChatImageDetailLevel(ImageContent imageContent) + { + const string DetailLevelProperty = "ChatImageDetailLevel"; + + if (imageContent.Metadata is not null && + imageContent.Metadata.TryGetValue(DetailLevelProperty, out object? detailLevel) && + detailLevel is not null) + { + if (detailLevel is string detailLevelString && !string.IsNullOrWhiteSpace(detailLevelString)) + { + return detailLevelString.ToUpperInvariant() switch + { + "AUTO" => ChatImageDetailLevel.Auto, + "LOW" => ChatImageDetailLevel.Low, + "HIGH" => ChatImageDetailLevel.High, + _ => throw new ArgumentException($"Unknown image detail level '{detailLevelString}'. Supported values are 'Auto', 'Low' and 'High'.") + }; + } + } + + return null; + } + private OpenAIChatMessageContent CreateChatMessageContent(OpenAIChatCompletion completion, string targetModel) { var message = new OpenAIChatMessageContent(completion, targetModel, this.GetChatCompletionMetadata(completion)); diff --git a/dotnet/src/SemanticKernel.UnitTests/Contents/ImageContentTests.cs b/dotnet/src/SemanticKernel.UnitTests/Contents/ImageContentTests.cs index 14f86451cf71..41f7d07bf30f 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Contents/ImageContentTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Contents/ImageContentTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Generic; using System.Text; using System.Text.Json; using Microsoft.SemanticKernel; @@ -241,6 +242,25 @@ public void EmptyConstructorSerializationAndDeserializationAsExpected() Assert.Null(deserialized.Metadata); } + [Fact] + public void MetadataSerializationAndDeserializationWorksCorrectly() + { + // Arrange + var content = new ImageContent() + { + Metadata = new Dictionary { ["ChatImageDetailLevel"] = "high" } + }; + + // Act + var serialized = JsonSerializer.Serialize(content); + var deserialized = JsonSerializer.Deserialize(serialized); + + // Assert + Assert.NotNull(deserialized?.Metadata); + Assert.True(deserialized.Metadata.ContainsKey("ChatImageDetailLevel")); + Assert.Equal("high", deserialized.Metadata["ChatImageDetailLevel"]?.ToString()); + } + [Theory] [InlineData("http://localhost:9090/")] [InlineData(null)]