From 97d7056e922f05784c51c2a1bf03d614600ddd49 Mon Sep 17 00:00:00 2001 From: Stephen Hodgson Date: Wed, 13 Dec 2023 19:03:05 -0500 Subject: [PATCH] com.openai.unity 7.1.0 - Refactor ImagesEndpoint - Changed all endpoint signatures to IReadOnlyList - Fix Text To Speech generation for webgl - Updated Chat sample with image generation capabilities --- .../Runtime/Audio/AudioEndpoint.cs | 18 +- .../Runtime/Chat/Conversation.cs | 4 +- .../Runtime/Files/FilesEndpoint.cs | 2 +- .../Runtime/Images/ImageGenerationRequest.cs | 26 +- .../Runtime/Images/ImageResult.cs | 38 ++- .../Runtime/Images/ImagesEndpoint.cs | 231 ++---------------- .../Samples~/Chat/ChatBehaviour.cs | 174 +++++++++++-- .../Samples~/Chat/OpenAIChatSample.unity | 6 +- .../Tests/TestFixture_05_Images.cs | 54 ++-- OpenAI/Packages/com.openai.unity/package.json | 4 +- OpenAI/Packages/manifest.json | 2 +- OpenAI/ProjectSettings/ProjectSettings.asset | 13 +- 12 files changed, 270 insertions(+), 302 deletions(-) diff --git a/OpenAI/Packages/com.openai.unity/Runtime/Audio/AudioEndpoint.cs b/OpenAI/Packages/com.openai.unity/Runtime/Audio/AudioEndpoint.cs index 10b8cf2d..87482805 100644 --- a/OpenAI/Packages/com.openai.unity/Runtime/Audio/AudioEndpoint.cs +++ b/OpenAI/Packages/com.openai.unity/Runtime/Audio/AudioEndpoint.cs @@ -6,6 +6,7 @@ using System.Threading; using System.Threading.Tasks; using UnityEngine; +using UnityEngine.Networking; using UnityEngine.Scripting; using Utilities.WebRequestRest; @@ -56,12 +57,17 @@ public async Task> CreateSpeechAsync(SpeechRequest requ _ => throw new NotSupportedException(request.ResponseFormat.ToString()) }; var payload = JsonConvert.SerializeObject(request, OpenAIClient.JsonSerializationOptions); - var response = await Rest.PostAsync(GetUrl("/speech"), payload, new RestParameters(client.DefaultRequestHeaders), cancellationToken); - response.Validate(EnableDebug); - await Rest.ValidateCacheDirectoryAsync(); - var cachedPath = Path.Combine(Rest.DownloadCacheDirectory, $"{request.Voice}-{DateTime.UtcNow:yyyyMMddThhmmss}.{ext}"); - await File.WriteAllBytesAsync(cachedPath, response.Data, cancellationToken).ConfigureAwait(true); - var clip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioFormat, cancellationToken: cancellationToken); + var clipName = $"{request.Voice}-{DateTime.UtcNow:yyyyMMddThhmmss}.{ext}"; + var clip = await Rest.DownloadAudioClipAsync( + GetUrl("/speech"), + audioFormat, + UnityWebRequest.kHttpVerbPOST, + clipName, + payload, + parameters: new RestParameters(client.DefaultRequestHeaders), + debug: EnableDebug, + cancellationToken: cancellationToken); + Rest.TryGetDownloadCacheItem(clipName, out var cachedPath); return new Tuple(cachedPath, clip); } diff --git a/OpenAI/Packages/com.openai.unity/Runtime/Chat/Conversation.cs b/OpenAI/Packages/com.openai.unity/Runtime/Chat/Conversation.cs index 67743364..1620e9cb 100644 --- a/OpenAI/Packages/com.openai.unity/Runtime/Chat/Conversation.cs +++ b/OpenAI/Packages/com.openai.unity/Runtime/Chat/Conversation.cs @@ -13,9 +13,9 @@ public sealed class Conversation { [Preserve] [JsonConstructor] - public Conversation([JsonProperty("messages")] List messages) + public Conversation([JsonProperty("messages")] List messages = null) { - this.messages = messages; + this.messages = messages ?? new List(); } private readonly List messages; diff --git a/OpenAI/Packages/com.openai.unity/Runtime/Files/FilesEndpoint.cs b/OpenAI/Packages/com.openai.unity/Runtime/Files/FilesEndpoint.cs index 474a7627..3443c519 100644 --- a/OpenAI/Packages/com.openai.unity/Runtime/Files/FilesEndpoint.cs +++ b/OpenAI/Packages/com.openai.unity/Runtime/Files/FilesEndpoint.cs @@ -146,7 +146,7 @@ public async Task GetFileInfoAsync(string fileId, CancellationToke public async Task DownloadFileAsync(string fileId, IProgress progress = null, CancellationToken cancellationToken = default) { var file = await GetFileInfoAsync(fileId, cancellationToken); - return await Rest.DownloadFileAsync(GetUrl($"/{file.Id}/content"), file.FileName, new RestParameters(client.DefaultRequestHeaders, progress), cancellationToken: cancellationToken); + return await Rest.DownloadFileAsync(GetUrl($"/{file.Id}/content"), file.FileName, new RestParameters(client.DefaultRequestHeaders, progress), EnableDebug, cancellationToken); } } } diff --git a/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageGenerationRequest.cs b/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageGenerationRequest.cs index 21770b81..82438a13 100644 --- a/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageGenerationRequest.cs +++ b/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageGenerationRequest.cs @@ -13,13 +13,6 @@ namespace OpenAI.Images [Preserve] public sealed class ImageGenerationRequest { - [Preserve] - [Obsolete("Use new constructor")] - public ImageGenerationRequest(string prompt, int numberOfResults = 1, ImageSize size = ImageSize.Large, string user = null, ResponseFormat responseFormat = ResponseFormat.Url) - { - throw new NotSupportedException(); - } - /// /// Constructor. /// @@ -59,22 +52,23 @@ public ImageGenerationRequest(string prompt, int numberOfResults = 1, ImageSize /// /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. /// + [JsonConstructor] public ImageGenerationRequest( - string prompt, - Model model = null, - int numberOfResults = 1, - string quality = null, - ResponseFormat responseFormat = ResponseFormat.Url, - string size = null, - string style = null, - string user = null) + [JsonProperty("prompt")] string prompt, + [JsonProperty("model")] Model model = null, + [JsonProperty("n")] int numberOfResults = 1, + [JsonProperty("quality")] string quality = null, + [JsonProperty("response_format")] ResponseFormat responseFormat = ResponseFormat.Url, + [JsonProperty("size")] string size = null, + [JsonProperty("style")] string style = null, + [JsonProperty("user")] string user = null) { Prompt = prompt; Model = string.IsNullOrWhiteSpace(model?.Id) ? Models.Model.DallE_2 : model; Number = numberOfResults; Quality = quality; ResponseFormat = responseFormat; - Size = size; + Size = size ?? "1024x1024"; Style = style; User = user; } diff --git a/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageResult.cs b/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageResult.cs index ebc36feb..4e79d4de 100644 --- a/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageResult.cs +++ b/OpenAI/Packages/com.openai.unity/Runtime/Images/ImageResult.cs @@ -1,12 +1,13 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. using Newtonsoft.Json; +using UnityEngine; using UnityEngine.Scripting; namespace OpenAI.Images { [Preserve] - internal sealed class ImageResult + public sealed class ImageResult { [Preserve] [JsonConstructor] @@ -31,5 +32,40 @@ public ImageResult( [Preserve] [JsonProperty("revised_prompt")] public string RevisedPrompt { get; private set; } + + [Preserve] + [JsonIgnore] + public string CachedPath { get; internal set; } + + [Preserve] + [JsonIgnore] + public Texture2D Texture { get; internal set; } + + [Preserve] + public static implicit operator Texture2D(ImageResult imageResult) => imageResult.Texture; + + [Preserve] + public static implicit operator string(ImageResult imageResult) => imageResult.ToString(); + + [Preserve] + public override string ToString() + { + if (!string.IsNullOrWhiteSpace(CachedPath)) + { + return CachedPath; + } + + if (!string.IsNullOrWhiteSpace(Url)) + { + return Url; + } + + if (!string.IsNullOrWhiteSpace(B64_Json)) + { + return B64_Json; + } + + return string.Empty; + } } } diff --git a/OpenAI/Packages/com.openai.unity/Runtime/Images/ImagesEndpoint.cs b/OpenAI/Packages/com.openai.unity/Runtime/Images/ImagesEndpoint.cs index b924421e..f482e730 100644 --- a/OpenAI/Packages/com.openai.unity/Runtime/Images/ImagesEndpoint.cs +++ b/OpenAI/Packages/com.openai.unity/Runtime/Images/ImagesEndpoint.cs @@ -3,7 +3,6 @@ using Newtonsoft.Json; using OpenAI.Extensions; using System; -using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; using System.Linq; @@ -27,149 +26,26 @@ internal ImagesEndpoint(OpenAIClient client) : base(client) { } /// protected override string Root => "images"; - /// - /// Creates an image given a prompt. - /// - /// - /// A text description of the desired image(s). The maximum length is 1000 characters. - /// - /// - /// The number of images to generate. Must be between 1 and 10. - /// - /// - /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - /// - /// - /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - /// - /// - /// The format in which the generated images are returned. Must be one of url or b64_json. - /// Defaults to - /// - /// - /// Optional, . - /// - /// A dictionary of file urls and the preloaded that were downloaded. - [Obsolete] - public async Task> GenerateImageAsync( - string prompt, - int numberOfResults = 1, - ImageSize size = ImageSize.Large, - string user = null, - ResponseFormat responseFormat = ResponseFormat.Url, - CancellationToken cancellationToken = default) - => await GenerateImageAsync(new ImageGenerationRequest(prompt, numberOfResults, size, user, responseFormat), cancellationToken); - /// /// Creates an image given a prompt. /// /// /// Optional, . /// A dictionary of file urls and the preloaded that were downloaded. - public async Task> GenerateImageAsync(ImageGenerationRequest request, CancellationToken cancellationToken = default) + public async Task> GenerateImageAsync(ImageGenerationRequest request, CancellationToken cancellationToken = default) { var jsonContent = JsonConvert.SerializeObject(request, OpenAIClient.JsonSerializationOptions); var response = await Rest.PostAsync(GetUrl("/generations"), jsonContent, new RestParameters(client.DefaultRequestHeaders), cancellationToken); return await DeserializeResponseAsync(response, cancellationToken); } - /// - /// Creates an edited or extended image given an original image and a prompt. - /// - /// - /// The image to edit. Must be a valid PNG file, less than 4MB, and square. - /// If mask is not provided, image must have transparency, which will be used as the mask. - /// - /// - /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where image should be edited. - /// Must be a valid PNG file, less than 4MB, and have the same dimensions as image. - /// - /// - /// A text description of the desired image(s). The maximum length is 1000 characters. - /// - /// - /// The number of images to generate. Must be between 1 and 10. - /// - /// - /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - /// - /// - /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - /// - /// - /// The format in which the generated images are returned. Must be one of url or b64_json. - /// Defaults to - /// - /// - /// Optional, . - /// - /// - /// A dictionary of file urls and the preloaded that were downloaded. - /// - [Obsolete] - public async Task> CreateImageEditAsync( - string image, - string mask, - string prompt, - int numberOfResults = 1, - ImageSize size = ImageSize.Large, - string user = null, - ResponseFormat responseFormat = ResponseFormat.Url, - CancellationToken cancellationToken = default) - => await CreateImageEditAsync(new ImageEditRequest(image, mask, prompt, numberOfResults, size, user, responseFormat), cancellationToken); - - /// - /// Creates an edited or extended image given an original image and a prompt. - /// - /// - /// The image to edit. Must be a valid PNG file, less than 4MB, and square. - /// If mask is not provided, image must have transparency, which will be used as the mask. - /// - /// - /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where image should be edited. - /// Must be a valid PNG file, less than 4MB, and have the same dimensions as image. - /// - /// - /// A text description of the desired image(s). The maximum length is 1000 characters. - /// - /// - /// The number of images to generate. Must be between 1 and 10. - /// - /// - /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - /// - /// - /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - /// - /// - /// The format in which the generated images are returned. Must be one of url or b64_json. - /// Defaults to - /// - /// - /// Optional, . - /// - /// - /// A dictionary of file urls and the preloaded that were downloaded. - /// - [Obsolete] - public async Task> CreateImageEditAsync( - Texture2D image, - Texture2D mask, - string prompt, - int numberOfResults = 1, - ImageSize size = ImageSize.Large, - string user = null, - ResponseFormat responseFormat = ResponseFormat.Url, - CancellationToken cancellationToken = default) - => await CreateImageEditAsync(new ImageEditRequest(image, mask, prompt, numberOfResults, size, user, responseFormat), cancellationToken); - /// /// Creates an edited or extended image given an original image and a prompt. /// /// /// Optional, . /// A dictionary of file urls and the preloaded that were downloaded. - public async Task> CreateImageEditAsync(ImageEditRequest request, CancellationToken cancellationToken = default) + public async Task> CreateImageEditAsync(ImageEditRequest request, CancellationToken cancellationToken = default) { var form = new WWWForm(); using var imageData = new MemoryStream(); @@ -199,75 +75,13 @@ public async Task> CreateImageEditAsync(I return await DeserializeResponseAsync(response, cancellationToken); } - /// - /// Creates a variation of a given image. - /// - /// - /// The image to edit. Must be a valid PNG file, less than 4MB, and square. - /// - /// - /// The number of images to generate. Must be between 1 and 10. - /// - /// - /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - /// - /// - /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - /// - /// - /// The format in which the generated images are returned. Must be one of url or b64_json. - /// Defaults to - /// - /// Optional, . - /// A dictionary of file urls and the preloaded that were downloaded. - [Obsolete] - public async Task> CreateImageVariationAsync( - string imagePath, - int numberOfResults = 1, - ImageSize size = ImageSize.Large, - string user = null, - ResponseFormat responseFormat = ResponseFormat.Url, - CancellationToken cancellationToken = default) - => await CreateImageVariationAsync(new ImageVariationRequest(imagePath, numberOfResults, size, user, responseFormat), cancellationToken); - - /// - /// Creates a variation of a given image. - /// - /// - /// The texture to edit. Must be a valid PNG file, less than 4MB, and square. Read/Write should be enabled and Compression set to None. - /// - /// - /// The number of images to generate. Must be between 1 and 10. - /// - /// - /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. - /// - /// - /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. - /// - /// - /// The format in which the generated images are returned. Must be one of url or b64_json. - /// Defaults to - /// - /// Optional, . - /// A dictionary of file urls and the preloaded that were downloaded. - [Obsolete] - public async Task> CreateImageVariationAsync( - Texture2D texture, - int numberOfResults = 1, - ImageSize size = ImageSize.Large, - string user = null, - ResponseFormat responseFormat = ResponseFormat.Url, - CancellationToken cancellationToken = default) - => await CreateImageVariationAsync(new ImageVariationRequest(texture, numberOfResults, size, user, responseFormat), cancellationToken); - /// /// Creates a variation of a given image. /// /// /// Optional, . /// A dictionary of file urls and the preloaded that were downloaded. - public async Task> CreateImageVariationAsync(ImageVariationRequest request, CancellationToken cancellationToken = default) + public async Task> CreateImageVariationAsync(ImageVariationRequest request, CancellationToken cancellationToken = default) { var form = new WWWForm(); using var imageData = new MemoryStream(); @@ -288,10 +102,7 @@ public async Task> CreateImageVariationAs return await DeserializeResponseAsync(response, cancellationToken); } - private const string LocalFilePrefix = "file://"; - private readonly char[] localFilePrefixCharArray = LocalFilePrefix.ToCharArray(); - - private async Task> DeserializeResponseAsync(Response response, CancellationToken cancellationToken = default) + private async Task> DeserializeResponseAsync(Response response, CancellationToken cancellationToken = default) { response.Validate(EnableDebug); @@ -302,47 +113,41 @@ private async Task> DeserializeResponseAs throw new Exception($"No image content returned!\n{response.Body}"); } - var images = new ConcurrentDictionary(); await Rest.ValidateCacheDirectoryAsync(); var downloads = imagesResponse.Results.Select(DownloadAsync).ToList(); async Task DownloadAsync(ImageResult result) { - string resultImagePath; - string localFilePath; + await Awaiters.UnityMainThread; if (string.IsNullOrWhiteSpace(result.Url)) { - resultImagePath = result.B64_Json; - var imageData = Convert.FromBase64String(resultImagePath); - - if (!Rest.TryGetDownloadCacheItem(resultImagePath, out localFilePath)) + var imageData = Convert.FromBase64String(result.B64_Json); +#if PLATFORM_WEBGL + result.Texture = new Texture2D(2, 2); + result.Texture.LoadImage(imageData); +#else + if (!Rest.TryGetDownloadCacheItem(result.B64_Json, out var localFilePath)) { - await File.WriteAllBytesAsync(localFilePath, imageData, cancellationToken); + await File.WriteAllBytesAsync(localFilePath, imageData, cancellationToken).ConfigureAwait(true); } - resultImagePath = $"{LocalFilePrefix}{localFilePath}"; + result.Texture = await Rest.DownloadTextureAsync(localFilePath, debug: EnableDebug, cancellationToken: cancellationToken); +#endif } else { - resultImagePath = result.Url; + result.Texture = await Rest.DownloadTextureAsync(result.Url, debug: EnableDebug, cancellationToken: cancellationToken); } - await Awaiters.UnityMainThread; - var texture = await Rest.DownloadTextureAsync(resultImagePath, cancellationToken: cancellationToken); - - if (Rest.TryGetDownloadCacheItem(resultImagePath, out localFilePath)) - { - images.TryAdd(localFilePath.TrimStart(localFilePrefixCharArray), texture); - } - else + if (Rest.TryGetDownloadCacheItem(result, out var cachedPath)) { - Debug.LogError($"Failed to find cached item for {resultImagePath}"); + result.CachedPath = cachedPath; } } await Task.WhenAll(downloads).ConfigureAwait(true); - return images; + return imagesResponse.Results; } } } diff --git a/OpenAI/Packages/com.openai.unity/Samples~/Chat/ChatBehaviour.cs b/OpenAI/Packages/com.openai.unity/Samples~/Chat/ChatBehaviour.cs index 94d9a5d3..86dad0e1 100644 --- a/OpenAI/Packages/com.openai.unity/Samples~/Chat/ChatBehaviour.cs +++ b/OpenAI/Packages/com.openai.unity/Samples~/Chat/ChatBehaviour.cs @@ -1,16 +1,22 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; using OpenAI.Audio; using OpenAI.Chat; +using OpenAI.Images; using OpenAI.Models; using System; using System.Collections.Generic; +using System.Linq; using System.Threading; +using System.Threading.Tasks; using TMPro; using UnityEngine; using UnityEngine.EventSystems; using UnityEngine.UI; using Utilities.Extensions; +using Utilities.WebRequestRest; namespace OpenAI.Samples.Chat { @@ -34,12 +40,55 @@ public class ChatBehaviour : MonoBehaviour [SerializeField] private AudioSource audioSource; + [SerializeField] + [TextArea(3, 10)] + private string systemPrompt = "You are a helpful assistant.\n- If an image is requested then use \"![Image](output.jpg)\" to display it."; + private OpenAIClient openAI; - private readonly List chatMessages = new List(); + private readonly Conversation conversation = new Conversation(); private CancellationTokenSource lifetimeCancellationTokenSource; + private readonly List assistantTools = new List + { + new Function( + nameof(GenerateImageAsync), + "Generates an image based on the user's request.", + new JObject + { + ["type"] = "object", + ["properties"] = new JObject + { + ["prompt"] = new JObject + { + ["type"] = "string", + ["description"] = "A text description of the desired image(s). The maximum length is 1000 characters for dall-e-2 and 4000 characters for dall-e-3." + }, + ["model"] = new JObject + { + ["type"] = "string", + ["description"] = "The model to use for image generation.", + ["enum"] = new JArray { "dall-e-2", "dall-e-3" }, + ["default"] = "dall-e-2" + }, + ["size"] = new JObject + { + ["type"] = "string", + ["description"] = "The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models.", + ["enum"] = new JArray{ "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792" }, + ["default"] = "512x512" + }, + ["response_format"] = new JObject + { + ["type"] = "string", + ["enum"] = new JArray { "b64_json" } // hard coded for webgl + } + }, + ["required"] = new JArray { "prompt", "model", "response_format" } + }) + }; + private void OnValidate() { inputField.Validate(); @@ -52,8 +101,11 @@ private void Awake() { OnValidate(); lifetimeCancellationTokenSource = new CancellationTokenSource(); - openAI = new OpenAIClient(); - chatMessages.Add(new Message(Role.System, "You are a helpful assistant.")); + openAI = new OpenAIClient + { + EnableDebug = enableDebug + }; + conversation.AppendMessage(new Message(Role.System, systemPrompt)); inputField.onSubmit.AddListener(SubmitChat); submitButton.onClick.AddListener(SubmitChat); } @@ -77,29 +129,44 @@ private async void SubmitChat() inputField.ReleaseSelection(); inputField.interactable = false; submitButton.interactable = false; - var userMessage = new Message(Role.User, inputField.text); - chatMessages.Add(userMessage); - var userMessageContent = AddNewTextMessageContent(); + conversation.AppendMessage(new Message(Role.User, inputField.text)); + var userMessageContent = AddNewTextMessageContent(Role.User); userMessageContent.text = $"User: {inputField.text}"; inputField.text = string.Empty; - var assistantMessageContent = AddNewTextMessageContent(); + var assistantMessageContent = AddNewTextMessageContent(Role.Assistant); assistantMessageContent.text = "Assistant: "; try { - var request = new ChatRequest(chatMessages, Model.GPT3_5_Turbo); - openAI.ChatEndpoint.EnableDebug = enableDebug; + var request = new ChatRequest(conversation.Messages, tools: assistantTools, toolChoice: "auto"); var response = await openAI.ChatEndpoint.StreamCompletionAsync(request, resultHandler: deltaResponse => { if (deltaResponse?.FirstChoice?.Delta == null) { return; } assistantMessageContent.text += deltaResponse.FirstChoice.Delta.ToString(); scrollView.verticalNormalizedPosition = 0f; }, lifetimeCancellationTokenSource.Token); + + conversation.AppendMessage(response.FirstChoice.Message); + + if (response.FirstChoice.FinishReason == "tool_calls") + { + response = await ProcessToolCallAsync(response); + assistantMessageContent.text += response.ToString().Replace("![Image](output.jpg)", string.Empty); + } + GenerateSpeech(response); } catch (Exception e) { - Debug.LogError(e); + switch (e) + { + case TaskCanceledException: + case OperationCanceledException: + break; + default: + Debug.LogError(e); + break; + } } finally { @@ -112,32 +179,89 @@ private async void SubmitChat() isChatPending = false; } - } - private async void GenerateSpeech(ChatResponse response) - { - try + async Task ProcessToolCallAsync(ChatResponse response) { - var request = new SpeechRequest(response.FirstChoice.ToString(), Model.TTS_1); - openAI.AudioEndpoint.EnableDebug = enableDebug; - var (clipPath, clip) = await openAI.AudioEndpoint.CreateSpeechAsync(request, lifetimeCancellationTokenSource.Token); - audioSource.PlayOneShot(clip); - Debug.Log(clipPath); - } - catch (Exception e) - { - Debug.LogError(e); + var toolCall = response.FirstChoice.Message.ToolCalls.FirstOrDefault(); + + if (enableDebug) + { + Debug.Log($"{response.FirstChoice.Message.Role}: {toolCall?.Function?.Name} | Finish Reason: {response.FirstChoice.FinishReason}"); + Debug.Log($"{toolCall?.Function?.Arguments}"); + } + + if (toolCall == null || toolCall.Function?.Name != nameof(GenerateImageAsync)) + { + throw new Exception($"Failed to find a valid tool call!\n{response}"); + } + + ChatResponse toolCallResponse; + + try + { + var imageGenerationRequest = JsonConvert.DeserializeObject(toolCall.Function.Arguments.ToString()); + var imageResult = await GenerateImageAsync(imageGenerationRequest); + AddNewImageContent(imageResult); + conversation.AppendMessage(new Message(toolCall, "{\"result\":\"completed\"}")); + var toolCallRequest = new ChatRequest(conversation.Messages, tools: assistantTools, toolChoice: "auto"); + toolCallResponse = await openAI.ChatEndpoint.GetCompletionAsync(toolCallRequest); + conversation.AppendMessage(toolCallResponse.FirstChoice.Message); + } + catch (RestException restEx) + { + Debug.LogError(restEx); + conversation.AppendMessage(new Message(toolCall, restEx.Response.Body)); + var toolCallRequest = new ChatRequest(conversation.Messages, tools: assistantTools, toolChoice: "auto"); + toolCallResponse = await openAI.ChatEndpoint.GetCompletionAsync(toolCallRequest); + conversation.AppendMessage(toolCallResponse.FirstChoice.Message); + } + + if (toolCallResponse.FirstChoice.FinishReason == "tool_calls") + { + return await ProcessToolCallAsync(toolCallResponse); + } + + return toolCallResponse; } } - private TextMeshProUGUI AddNewTextMessageContent() + private async void GenerateSpeech(string text) + { + text = text.Replace("![Image](output.jpg)", string.Empty); + var request = new SpeechRequest(text, Model.TTS_1); + var (clipPath, clip) = await openAI.AudioEndpoint.CreateSpeechAsync(request, lifetimeCancellationTokenSource.Token); + audioSource.PlayOneShot(clip); + Debug.Log(clipPath); + } + + private TextMeshProUGUI AddNewTextMessageContent(Role role) { - var textObject = new GameObject($"Message_{contentArea.childCount + 1}"); + var textObject = new GameObject($"{contentArea.childCount + 1}_{role}"); textObject.transform.SetParent(contentArea, false); var textMesh = textObject.AddComponent(); textMesh.fontSize = 24; textMesh.enableWordWrapping = true; return textMesh; } + + private void AddNewImageContent(Texture2D texture) + { + var imageObject = new GameObject($"{contentArea.childCount + 1}_Image"); + imageObject.transform.SetParent(contentArea, false); + var rawImage = imageObject.AddComponent(); + rawImage.texture = texture; + var layoutElement = imageObject.AddComponent(); + layoutElement.preferredHeight = texture.height / 4f; + layoutElement.preferredWidth = texture.width / 4f; + var aspectRatioFitter = imageObject.AddComponent(); + aspectRatioFitter.aspectMode = AspectRatioFitter.AspectMode.HeightControlsWidth; + aspectRatioFitter.aspectRatio = texture.width / (float)texture.height; + } + + private async Task GenerateImageAsync(ImageGenerationRequest request) + { + var results = await openAI.ImagesEndPoint.GenerateImageAsync(request); + return results.FirstOrDefault(); + } } } diff --git a/OpenAI/Packages/com.openai.unity/Samples~/Chat/OpenAIChatSample.unity b/OpenAI/Packages/com.openai.unity/Samples~/Chat/OpenAIChatSample.unity index ef378a72..1d5d52ce 100644 --- a/OpenAI/Packages/com.openai.unity/Samples~/Chat/OpenAIChatSample.unity +++ b/OpenAI/Packages/com.openai.unity/Samples~/Chat/OpenAIChatSample.unity @@ -1962,12 +1962,16 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: a891710bf1466924297c3b3b6f1b6e51, type: 3} m_Name: m_EditorClassIdentifier: - enableDebug: 0 + enableDebug: 1 submitButton: {fileID: 1094024334} inputField: {fileID: 1377121433} contentArea: {fileID: 250955499} scrollView: {fileID: 1974642466} audioSource: {fileID: 1711080862} + systemPrompt: 'You are a helpful assistant. + + - If an image is requested then + use "![Image](output.jpg)" to display it.' --- !u!82 &1711080862 AudioSource: m_ObjectHideFlags: 0 diff --git a/OpenAI/Packages/com.openai.unity/Tests/TestFixture_05_Images.cs b/OpenAI/Packages/com.openai.unity/Tests/TestFixture_05_Images.cs index b1e16d62..23346059 100644 --- a/OpenAI/Packages/com.openai.unity/Tests/TestFixture_05_Images.cs +++ b/OpenAI/Packages/com.openai.unity/Tests/TestFixture_05_Images.cs @@ -22,10 +22,10 @@ public async Task Test_01_01_GenerateImages() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -59,10 +59,10 @@ public async Task Test_02_01_CreateImageEdit_Path() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -75,15 +75,15 @@ public async Task Test_02_03_CreateImageEdit_Texture() var maskAssetPath = AssetDatabase.GUIDToAssetPath("0be6be2fad590cc47930495d2ca37dd6"); var mask = AssetDatabase.LoadAssetAtPath(maskAssetPath); var request = new ImageEditRequest(image, mask, "A sunlit indoor lounge area with a pool containing a flamingo", size: ImageSize.Small); - var results = await OpenAIClient.ImagesEndPoint.CreateImageEditAsync(request); + var imageResults = await OpenAIClient.ImagesEndPoint.CreateImageEditAsync(request); - Assert.IsNotNull(results); - Assert.NotZero(results.Count); + Assert.IsNotNull(imageResults); + Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in results) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -101,10 +101,10 @@ public async Task Test_02_04_CreateImageEdit_Texture_B64_Json() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -120,10 +120,10 @@ public async Task Test_02_05_CreateImageEdit_MaskAsTransparency() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -138,10 +138,10 @@ public async Task Test_03_01_CreateImageVariation_Path() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -157,10 +157,10 @@ public async Task Test_03_02_CreateImageVariation_Texture() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } @@ -176,10 +176,10 @@ public async Task Test_03_04_CreateImageVariation_Texture_B64_Json() Assert.IsNotNull(imageResults); Assert.NotZero(imageResults.Count); - foreach (var (path, texture) in imageResults) + foreach (var result in imageResults) { - Debug.Log(path); - Assert.IsNotNull(texture); + Debug.Log(result.ToString()); + Assert.IsNotNull(result.Texture); } } } diff --git a/OpenAI/Packages/com.openai.unity/package.json b/OpenAI/Packages/com.openai.unity/package.json index 1ed27666..7c5aaf45 100644 --- a/OpenAI/Packages/com.openai.unity/package.json +++ b/OpenAI/Packages/com.openai.unity/package.json @@ -3,7 +3,7 @@ "displayName": "OpenAI", "description": "A OpenAI package for the Unity Game Engine to use GPT-4, GPT-3.5, GPT-3 and Dall-E though their RESTful API (currently in beta).\n\nIndependently developed, this is not an official library and I am not affiliated with OpenAI.\n\nAn OpenAI API account is required.", "keywords": [], - "version": "7.0.5", + "version": "7.1.0", "unity": "2021.3", "documentationUrl": "https://github.com/RageAgainstThePixel/com.openai.unity#documentation", "changelogUrl": "https://github.com/RageAgainstThePixel/com.openai.unity/releases", @@ -17,7 +17,7 @@ "url": "https://github.com/StephenHodgson" }, "dependencies": { - "com.utilities.rest": "2.3.1", + "com.utilities.rest": "2.4.0", "com.utilities.encoder.wav": "1.0.8", "com.utilities.encoder.ogg": "3.0.12" }, diff --git a/OpenAI/Packages/manifest.json b/OpenAI/Packages/manifest.json index c7e1c5c2..555d243f 100644 --- a/OpenAI/Packages/manifest.json +++ b/OpenAI/Packages/manifest.json @@ -1,6 +1,6 @@ { "dependencies": { - "com.unity.ide.rider": "3.0.26", + "com.unity.ide.rider": "3.0.27", "com.unity.ide.visualstudio": "2.0.22", "com.unity.textmeshpro": "3.0.6", "com.utilities.buildpipeline": "1.1.9" diff --git a/OpenAI/ProjectSettings/ProjectSettings.asset b/OpenAI/ProjectSettings/ProjectSettings.asset index ff149e76..9568c46d 100644 --- a/OpenAI/ProjectSettings/ProjectSettings.asset +++ b/OpenAI/ProjectSettings/ProjectSettings.asset @@ -136,7 +136,7 @@ PlayerSettings: vulkanEnableLateAcquireNextImage: 0 vulkanEnableCommandBufferRecycling: 1 loadStoreDebugModeEnabled: 0 - bundleVersion: 5.2.1 + bundleVersion: 7.1.0 preloadedAssets: [] metroInputSource: 0 wsaTransparentSwapchain: 0 @@ -521,7 +521,6 @@ PlayerSettings: switchScreenResolutionBehavior: 2 switchUseCPUProfiler: 0 switchEnableFileSystemTrace: 0 - switchUseGOLDLinker: 0 switchLTOSetting: 0 switchApplicationID: 0x01004b9000490000 switchNSODependencies: @@ -738,17 +737,17 @@ PlayerSettings: blurSplashScreenBackground: 1 spritePackerPolicy: webGLMemorySize: 32 - webGLExceptionSupport: 1 + webGLExceptionSupport: 3 webGLNameFilesAsHashes: 0 webGLShowDiagnostics: 0 webGLDataCaching: 1 - webGLDebugSymbols: 0 + webGLDebugSymbols: 2 webGLEmscriptenArgs: webGLModulesDirectory: webGLTemplate: APPLICATION:Default webGLAnalyzeBuildSize: 0 webGLUseEmbeddedResources: 0 - webGLCompressionFormat: 0 + webGLCompressionFormat: 2 webGLWasmArithmeticExceptions: 0 webGLLinkerTarget: 1 webGLThreadsSupport: 0 @@ -782,7 +781,7 @@ PlayerSettings: m_RenderingPath: 1 m_MobileRenderingPath: 1 metroPackageName: com.openai.unity - metroPackageVersion: 5.2.1.0 + metroPackageVersion: 7.1.0.0 metroCertificatePath: metroCertificatePassword: metroCertificateSubject: @@ -894,7 +893,7 @@ PlayerSettings: luminIsChannelApp: 0 luminVersion: m_VersionCode: 1 - m_VersionName: 5.2.1 + m_VersionName: 7.1.0 hmiPlayerDataPath: hmiForceSRGBBlit: 1 embeddedLinuxEnableGamepadInput: 1