Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenAI-DotNet 7.7.6 #268

Merged
merged 5 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 69 additions & 13 deletions OpenAI-DotNet-Tests/TestFixture_07_Audio.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,81 @@ namespace OpenAI.Tests
internal class TestFixture_07_Audio : AbstractTestFixture
{
[Test]
public async Task Test_1_Transcription()
public async Task Test_01_01_Transcription_Text()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var transcriptionAudio = Path.GetFullPath("../../../Assets/T3mt39YrlyLoq8laHSdf.mp3");
using var request = new AudioTranscriptionRequest(transcriptionAudio, temperature: 0.1f, language: "en");
var result = await OpenAIClient.AudioEndpoint.CreateTranscriptionAsync(request);
Assert.IsNotNull(result);
Console.WriteLine(result);
using var request = new AudioTranscriptionRequest(transcriptionAudio, responseFormat: AudioResponseFormat.Text, temperature: 0.1f, language: "en");
var response = await OpenAIClient.AudioEndpoint.CreateTranscriptionTextAsync(request);
Assert.IsNotNull(response);
}

[Test]
public async Task Test_2_Translation()
public async Task Test_01_02_Transcription_Json()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var transcriptionAudio = Path.GetFullPath("../../../Assets/T3mt39YrlyLoq8laHSdf.mp3");
using var request = new AudioTranscriptionRequest(transcriptionAudio, responseFormat: AudioResponseFormat.Json, temperature: 0.1f, language: "en");
var response = await OpenAIClient.AudioEndpoint.CreateTranscriptionTextAsync(request);
Assert.IsNotNull(response);
}

[Test]
public async Task Test_01_03_01_Transcription_VerboseJson()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var transcriptionAudio = Path.GetFullPath("../../../Assets/T3mt39YrlyLoq8laHSdf.mp3");
using var request = new AudioTranscriptionRequest(transcriptionAudio, responseFormat: AudioResponseFormat.Verbose_Json, temperature: 0.1f, language: "en");
var response = await OpenAIClient.AudioEndpoint.CreateTranscriptionJsonAsync(request);
Assert.IsNotNull(response);
Assert.IsNotNull(response.Duration);
Assert.IsTrue(response.Language == "english");
Assert.IsNotNull(response.Segments);
Assert.IsNotEmpty(response.Segments);
}

[Test]
public async Task Test_01_03_02_Transcription_VerboseJson_WordSimilarities()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var transcriptionAudio = Path.GetFullPath("../../../Assets/T3mt39YrlyLoq8laHSdf.mp3");
using var request = new AudioTranscriptionRequest(transcriptionAudio, responseFormat: AudioResponseFormat.Verbose_Json, timestampGranularity: TimestampGranularity.Word, temperature: 0.1f, language: "en");
var response = await OpenAIClient.AudioEndpoint.CreateTranscriptionJsonAsync(request);
Assert.IsNotNull(response);
Assert.IsNotNull(response.Duration);
Assert.IsTrue(response.Language == "english");
Assert.IsNotNull(response.Words);
Assert.IsNotEmpty(response.Words);
}

[Test]
public async Task Test_02_01_Translation_Text()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var translationAudio = Path.GetFullPath("../../../Assets/Ja-botchan_1-1_1-2.mp3");
using var request = new AudioTranslationRequest(Path.GetFullPath(translationAudio), responseFormat: AudioResponseFormat.Text);
var response = await OpenAIClient.AudioEndpoint.CreateTranslationTextAsync(request);
Assert.IsNotNull(response);
}

[Test]
public async Task Test_02_02_Translation_Json()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var translationAudio = Path.GetFullPath("../../../Assets/Ja-botchan_1-1_1-2.mp3");
using var request = new AudioTranslationRequest(Path.GetFullPath(translationAudio), responseFormat: AudioResponseFormat.Json);
var response = await OpenAIClient.AudioEndpoint.CreateTranslationJsonAsync(request);
Assert.IsNotNull(response);
}

[Test]
public async Task Test_02_03_Translation_VerboseJson()
{
Assert.IsNotNull(OpenAIClient.AudioEndpoint);
var translationAudio = Path.GetFullPath("../../../Assets/Ja-botchan_1-1_1-2.mp3");
using var request = new AudioTranslationRequest(Path.GetFullPath(translationAudio));
var result = await OpenAIClient.AudioEndpoint.CreateTranslationAsync(request);
Assert.IsNotNull(result);
Console.WriteLine(result);
using var request = new AudioTranslationRequest(Path.GetFullPath(translationAudio), responseFormat: AudioResponseFormat.Verbose_Json);
var response = await OpenAIClient.AudioEndpoint.CreateTranslationJsonAsync(request);
Assert.IsNotNull(response);
}

[Test]
Expand All @@ -43,9 +99,9 @@ async Task ChunkCallback(ReadOnlyMemory<byte> chunkCallback)
await Task.CompletedTask;
}

var result = await OpenAIClient.AudioEndpoint.CreateSpeechAsync(request, ChunkCallback);
Assert.IsFalse(result.IsEmpty);
await File.WriteAllBytesAsync("../../../Assets/HelloWorld.mp3", result.ToArray());
var response = await OpenAIClient.AudioEndpoint.CreateSpeechAsync(request, ChunkCallback);
Assert.IsFalse(response.IsEmpty);
await File.WriteAllBytesAsync("../../../Assets/HelloWorld.mp3", response.ToArray());
}
}
}
100 changes: 74 additions & 26 deletions OpenAI-DotNet/Audio/AudioEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
using System.IO;
using System.Net.Http;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;

Expand All @@ -17,17 +16,6 @@ namespace OpenAI.Audio
/// </summary>
public sealed class AudioEndpoint : OpenAIBaseEndpoint
{
private class AudioResponse
{
public AudioResponse(string text)
{
Text = text;
}

[JsonPropertyName("text")]
public string Text { get; }
}

/// <inheritdoc />
public AudioEndpoint(OpenAIClient client) : base(client) { }

Expand Down Expand Up @@ -75,44 +63,96 @@ public async Task<ReadOnlyMemory<byte>> CreateSpeechAsync(SpeechRequest request,
return new ReadOnlyMemory<byte>(memoryStream.GetBuffer(), 0, totalBytesRead);
}

[Obsolete("Use CreateTranscriptionTextAsync or CreateTranscriptionJsonAsync instead.")]
public async Task<string> CreateTranscriptionAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
=> await CreateTranscriptionTextAsync(request, cancellationToken).ConfigureAwait(false);

/// <summary>
/// Transcribes audio into the input language.
/// </summary>
/// <param name="request"><see cref="AudioTranscriptionRequest"/>.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns>The transcribed text.</returns>
public async Task<string> CreateTranscriptionAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
public async Task<string> CreateTranscriptionTextAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
{
var responseAsString = await Internal_CreateTranscriptionAsync(request, cancellationToken).ConfigureAwait(false);
return request.ResponseFormat is AudioResponseFormat.Json or AudioResponseFormat.Verbose_Json
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
: responseAsString;
}

/// <summary>
/// Transcribes audio into the input language.
/// </summary>
/// <remarks>This method expects the request format to be either <see cref="AudioResponseFormat.Json"/> or <see cref="AudioResponseFormat.Verbose_Json"/>.</remarks>
/// <param name="request"><see cref="AudioTranscriptionRequest"/>.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="AudioResponse"/>.</returns>
public async Task<AudioResponse> CreateTranscriptionJsonAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
{
if (request.ResponseFormat is not (AudioResponseFormat.Json or AudioResponseFormat.Verbose_Json))
{
throw new ArgumentException("Response format must be Json or Verbose Json.", nameof(request.ResponseFormat));
}

var responseAsString = await Internal_CreateTranscriptionAsync(request, cancellationToken).ConfigureAwait(false);
return JsonSerializer.Deserialize<AudioResponse>(responseAsString);
}

private async Task<string> Internal_CreateTranscriptionAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
{
using var content = new MultipartFormDataContent();
using var audioData = new MemoryStream();
await request.Audio.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false);
content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName);
content.Add(new StringContent(request.Model), "model");

if (!string.IsNullOrWhiteSpace(request.Language))
{
content.Add(new StringContent(request.Language), "language");
}

if (!string.IsNullOrWhiteSpace(request.Prompt))
{
content.Add(new StringContent(request.Prompt), "prompt");
}

var responseFormat = request.ResponseFormat;
content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format");
content.Add(new StringContent(request.ResponseFormat.ToString().ToLower()), "response_format");

if (request.Temperature.HasValue)
{
content.Add(new StringContent(request.Temperature.ToString()), "temperature");
}

if (!string.IsNullOrWhiteSpace(request.Language))
switch (request.TimestampGranularities)
{
content.Add(new StringContent(request.Language), "language");
case TimestampGranularity.Segment:
case TimestampGranularity.Word:
content.Add(new StringContent(request.TimestampGranularities.ToString().ToLower()), "timestamp_granularities[]");
break;
}

request.Dispose();

using var response = await client.Client.PostAsync(GetUrl("/transcriptions"), content, cancellationToken).ConfigureAwait(false);
var responseAsString = await response.ReadAsStringAsync(EnableDebug, content, null, cancellationToken).ConfigureAwait(false);
return responseAsString;
}

return responseFormat == AudioResponseFormat.Json
[Obsolete("Use CreateTranslationTextAsync or CreateTranslationJsonAsync instead.")]
public async Task<string> CreateTranslationAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
=> await CreateTranslationTextAsync(request, cancellationToken).ConfigureAwait(false);

/// <summary>
/// Translates audio into English.
/// </summary>
/// <param name="request"></param>
/// <param name="cancellationToken"></param>
/// <returns>The translated text.</returns>
public async Task<string> CreateTranslationTextAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
{
var responseAsString = await Internal_CreateTranslationAsync(request, cancellationToken).ConfigureAwait(false);
return request.ResponseFormat is AudioResponseFormat.Json or AudioResponseFormat.Verbose_Json
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
: responseAsString;
}
Expand All @@ -122,8 +162,20 @@ public async Task<string> CreateTranscriptionAsync(AudioTranscriptionRequest req
/// </summary>
/// <param name="request"></param>
/// <param name="cancellationToken"></param>
/// <returns>The translated text.</returns>
public async Task<string> CreateTranslationAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
/// <returns></returns>
/// <exception cref="ArgumentException"></exception>
public async Task<AudioResponse> CreateTranslationJsonAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
{
if (request.ResponseFormat is not (AudioResponseFormat.Json or AudioResponseFormat.Verbose_Json))
{
throw new ArgumentException("Response format must be Json or Verbose Json.", nameof(request.ResponseFormat));
}

var responseAsString = await Internal_CreateTranslationAsync(request, cancellationToken).ConfigureAwait(false);
return JsonSerializer.Deserialize<AudioResponse>(responseAsString);
}

private async Task<string> Internal_CreateTranslationAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
{
using var content = new MultipartFormDataContent();
using var audioData = new MemoryStream();
Expand All @@ -136,8 +188,7 @@ public async Task<string> CreateTranslationAsync(AudioTranslationRequest request
content.Add(new StringContent(request.Prompt), "prompt");
}

var responseFormat = request.ResponseFormat;
content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format");
content.Add(new StringContent(request.ResponseFormat.ToString().ToLower()), "response_format");

if (request.Temperature.HasValue)
{
Expand All @@ -148,10 +199,7 @@ public async Task<string> CreateTranslationAsync(AudioTranslationRequest request

using var response = await client.Client.PostAsync(GetUrl("/translations"), content, cancellationToken).ConfigureAwait(false);
var responseAsString = await response.ReadAsStringAsync(EnableDebug, content, null, cancellationToken).ConfigureAwait(false);

return responseFormat == AudioResponseFormat.Json
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
: responseAsString;
return responseAsString;
}
}
}
47 changes: 47 additions & 0 deletions OpenAI-DotNet/Audio/AudioResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Text.Json.Serialization;

namespace OpenAI.Audio
{
public sealed class AudioResponse
{
/// <summary>
/// The language of the input audio.
/// </summary>
[JsonInclude]
[JsonPropertyName("language")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public string Language { get; private set; }

/// <summary>
/// The duration of the input audio.
/// </summary>
[JsonInclude]
[JsonPropertyName("duration")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public double? Duration { get; private set; }

/// <summary>
/// The transcribed text.
/// </summary>
[JsonInclude]
[JsonPropertyName("text")]
public string Text { get; private set; }

/// <summary>
/// Extracted words and their corresponding timestamps.
/// </summary>
[JsonInclude]
[JsonPropertyName("words")]
public TranscriptionWord[] Words { get; private set; }

/// <summary>
/// Segments of the transcribed text and their corresponding details.
/// </summary>
[JsonInclude]
[JsonPropertyName("segments")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public TranscriptionSegment[] Segments { get; private set; }
}
}
Loading
Loading