-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* workflows * fix * fix * fix * fix * fix * fix * fix 1 * fix 2 * fix * fix4 * fix * dotnet-version fix * needs: call-reusable-build-test * actions/checkout@v4 * actions/setup-dotnet@v4
- Loading branch information
1 parent
1cae79c
commit 5b48c72
Showing
6 changed files
with
217 additions
and
157 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
name: Reusable Build and Test Workflow | ||
|
||
on: | ||
workflow_call: | ||
|
||
jobs: | ||
build-test: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [windows-latest, ubuntu-latest, macos-latest] | ||
dotnet: ['netcoreapp3.1', 'net6.0', 'net8.0'] | ||
runs-on: ${{ matrix.os }} | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Setup .NET SDKs | ||
uses: actions/setup-dotnet@v4 | ||
with: | ||
dotnet-version: | | ||
3.1.x | ||
6.0.x | ||
8.0.x | ||
- name: Restore dependencies | ||
run: dotnet restore | ||
|
||
- name: Build | ||
run: dotnet build --configuration Release --no-restore | ||
|
||
- name: Test | ||
run: dotnet test --no-restore --verbosity normal -f ${{ matrix.dotnet }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,166 +1,175 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Net.Http; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
using NUnit.Framework; | ||
|
||
namespace SharpToken.Tests; | ||
|
||
public class Tests | ||
namespace SharpToken.Tests | ||
{ | ||
private static readonly List<string> ModelsList = new() { "p50k_base", "r50k_base", "cl100k_base" }; | ||
|
||
private static readonly List<Tuple<string, string, List<int>>> TestData = | ||
TestHelpers.ReadTestPlans("SharpToken.Tests.data.TestPlans.txt"); | ||
|
||
[SetUp] | ||
public void Setup() | ||
public class Tests | ||
{ | ||
} | ||
private static readonly List<string> ModelsList = new List<string> { "p50k_base", "r50k_base", "cl100k_base" }; | ||
|
||
[Test] | ||
[TestCaseSource(nameof(TestData))] | ||
public void TestEncodingAndDecoding(Tuple<string, string, List<int>> resource) | ||
{ | ||
var (encodingName, textToEncode, expectedEncoded) = resource; | ||
private static readonly List<Tuple<string, string, List<int>>> TestData = | ||
TestHelpers.ReadTestPlans("SharpToken.Tests.data.TestPlans.txt"); | ||
|
||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
var encoded = encoding.Encode(textToEncode); | ||
var decodedText = encoding.Decode(encoded); | ||
Assert.Multiple(() => | ||
[SetUp] | ||
public void Setup() | ||
{ | ||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
Assert.That(decodedText, Is.EqualTo(textToEncode)); | ||
}); | ||
} | ||
} | ||
|
||
[Test] | ||
public async Task TestEncodingAndDecodingInParallel() | ||
{ | ||
var tasks = TestData.Select(_ => Task.Run(() => | ||
[Test] | ||
[TestCaseSource(nameof(TestData))] | ||
public void TestEncodingAndDecoding(Tuple<string, string, List<int>> resource) | ||
{ | ||
var (encodingName, textToEncode, expectedEncoded) = _; | ||
var (encodingName, textToEncode, expectedEncoded) = resource; | ||
|
||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
var encoded = encoding.Encode(textToEncode); | ||
var decodedText = encoding.Decode(encoded); | ||
return (textToEncode, encoded, expectedEncoded, decodedText); | ||
})); | ||
|
||
await Task.WhenAll(tasks).ConfigureAwait(false); | ||
|
||
foreach (var (textToEncode, encoded, expectedEncoded, decodedText) in tasks.Select(_ => _.Result)) | ||
{ | ||
Assert.Multiple(() => | ||
{ | ||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
Assert.That(decodedText, Is.EqualTo(textToEncode)); | ||
}); | ||
} | ||
} | ||
|
||
[Test] | ||
public async Task TestEncodingAndDecodingInParallel() | ||
{ | ||
var tasks = TestData.Select(_ => Task.Run(() => | ||
{ | ||
var (encodingName, textToEncode, expectedEncoded) = _; | ||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
var encoded = encoding.Encode(textToEncode); | ||
var decodedText = encoding.Decode(encoded); | ||
return (textToEncode, encoded, expectedEncoded, decodedText); | ||
})); | ||
|
||
[Test] | ||
public void TestEncodingWithCustomAllowedSet() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string inputText = "Some Text<|endofprompt|>"; | ||
var allowedSpecialTokens = new HashSet<string> { "<|endofprompt|>" }; | ||
await Task.WhenAll(tasks).ConfigureAwait(false); | ||
|
||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
var encoded = encoding.Encode(inputText, allowedSpecialTokens); | ||
var expectedEncoded = new List<int> { 8538, 2991, 100276 }; | ||
foreach (var (textToEncode, encoded, expectedEncoded, decodedText) in tasks.Select(_ => _.Result)) | ||
{ | ||
Assert.Multiple(() => | ||
{ | ||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
Assert.That(decodedText, Is.EqualTo(textToEncode)); | ||
}); | ||
} | ||
} | ||
|
||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
} | ||
|
||
[Test] | ||
public void TestEncodingFailsWithInvalidInputDefaultSpecial() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string inputText = "Some Text<|endofprompt|>"; | ||
[Test] | ||
public void TestEncodingWithCustomAllowedSet() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string inputText = "Some Text<|endofprompt|>"; | ||
var allowedSpecialTokens = new HashSet<string> { "<|endofprompt|>" }; | ||
|
||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
var encoded = encoding.Encode(inputText, allowedSpecialTokens); | ||
var expectedEncoded = new List<int> { 8538, 2991, 100276 }; | ||
|
||
void TestAction() | ||
{ | ||
encoding.Encode(inputText); | ||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
} | ||
|
||
Assert.Throws<ArgumentException>(TestAction); | ||
} | ||
[Test] | ||
public void TestEncodingFailsWithInvalidInputDefaultSpecial() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string inputText = "Some Text<|endofprompt|>"; | ||
|
||
[Test] | ||
public void TestEncodingFailsWithInvalidInputCustomDisallowed() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string inputText = "Some Text"; | ||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
|
||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
void TestAction() | ||
{ | ||
encoding.Encode(inputText); | ||
} | ||
|
||
void TestAction() | ||
{ | ||
encoding.Encode(inputText, disallowedSpecial: new HashSet<string> { "Some" }); | ||
Assert.Throws<ArgumentException>(TestAction); | ||
} | ||
|
||
Assert.Throws<ArgumentException>(TestAction); | ||
} | ||
[Test] | ||
public void TestEncodingFailsWithInvalidInputCustomDisallowed() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string inputText = "Some Text"; | ||
|
||
[Test] | ||
public void TestModelPrefixToEncodingMapping() | ||
{ | ||
const string encodingName = "cl100k_base"; | ||
const string modelName = "gpt-3.5-turbo-16k-0613"; | ||
const string fakeModelName = "gpt-3.6-turbo"; | ||
var encoding = GptEncoding.GetEncoding(encodingName); | ||
|
||
var encoding = Model.GetEncodingNameForModel(modelName); | ||
void TestAction() | ||
{ | ||
encoding.Encode(inputText, disallowedSpecial: new HashSet<string> { "Some" }); | ||
} | ||
|
||
static void TestModelPrefixMappingFailsAction() | ||
{ | ||
Model.GetEncodingNameForModel(fakeModelName); | ||
Assert.Throws<ArgumentException>(TestAction); | ||
} | ||
Assert.Multiple(() => | ||
|
||
[Test] | ||
public void TestModelPrefixToEncodingMapping() | ||
{ | ||
Assert.That(encoding, Is.EqualTo(encodingName)); | ||
Assert.Throws<Exception>(TestModelPrefixMappingFailsAction); | ||
}); | ||
} | ||
const string encodingName = "cl100k_base"; | ||
const string modelName = "gpt-3.5-turbo-16k-0613"; | ||
const string fakeModelName = "gpt-3.6-turbo"; | ||
|
||
[Test] | ||
[TestCaseSource(nameof(ModelsList))] | ||
public async Task TestLocalResourceMatchesRemoteResource(string modelName) | ||
{ | ||
var embeddedResourceName = $"SharpToken.data.{modelName}.tiktoken"; | ||
var remoteResourceUrl = $"https://openaipublic.blob.core.windows.net/encodings/{modelName}.tiktoken"; | ||
|
||
// Read the embedded resource file | ||
using var stream = typeof(GptEncoding).Assembly.GetManifestResourceStream(embeddedResourceName) ?? | ||
throw new InvalidOperationException(); | ||
var embeddedResourceText = new StreamReader(stream).ReadToEnd(); | ||
var normalizedEmbeddedResourceText = | ||
embeddedResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine); | ||
|
||
// Download the remote file | ||
using var httpClient = new HttpClient(); | ||
var remoteResourceBytes = await httpClient.GetByteArrayAsync(remoteResourceUrl).ConfigureAwait(true); | ||
var remoteResourceText = Encoding.UTF8.GetString(remoteResourceBytes); | ||
var normalizedRemoteResourceText = remoteResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine); | ||
|
||
// Compare the contents of the files and assert their equality | ||
Assert.That(normalizedEmbeddedResourceText, Is.EqualTo(normalizedRemoteResourceText)); | ||
} | ||
var encoding = Model.GetEncodingNameForModel(modelName); | ||
|
||
[Test] | ||
public void TestEncodingForModel() | ||
{ | ||
const string modelName = "gpt-4"; | ||
const string inputText = "Hello, world!"; | ||
var expectedEncoded = new List<int> { 9906, 11, 1917, 0 }; | ||
static void TestModelPrefixMappingFailsAction() | ||
{ | ||
Model.GetEncodingNameForModel(fakeModelName); | ||
} | ||
|
||
var encoding = GptEncoding.GetEncodingForModel(modelName); | ||
var encoded = encoding.Encode(inputText); | ||
var decodedText = encoding.Decode(encoded); | ||
Assert.Multiple(() => | ||
{ | ||
Assert.That(encoding, Is.EqualTo(encodingName)); | ||
Assert.Throws<Exception>(TestModelPrefixMappingFailsAction); | ||
}); | ||
} | ||
|
||
Assert.Multiple(() => | ||
[Test] | ||
[TestCaseSource(nameof(ModelsList))] | ||
public async Task TestLocalResourceMatchesRemoteResource(string modelName) | ||
{ | ||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
Assert.That(decodedText, Is.EqualTo(inputText)); | ||
}); | ||
var embeddedResourceName = $"SharpToken.data.{modelName}.tiktoken"; | ||
var remoteResourceUrl = $"https://openaipublic.blob.core.windows.net/encodings/{modelName}.tiktoken"; | ||
|
||
// Read the embedded resource file | ||
using var stream = typeof(GptEncoding).Assembly.GetManifestResourceStream(embeddedResourceName) ?? | ||
throw new InvalidOperationException(); | ||
var embeddedResourceText = new StreamReader(stream).ReadToEnd(); | ||
var normalizedEmbeddedResourceText = | ||
embeddedResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine); | ||
|
||
// Download the remote file | ||
using var httpClient = new HttpClient(); | ||
var remoteResourceBytes = await httpClient.GetByteArrayAsync(remoteResourceUrl).ConfigureAwait(true); | ||
var remoteResourceText = Encoding.UTF8.GetString(remoteResourceBytes); | ||
var normalizedRemoteResourceText = | ||
remoteResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine); | ||
|
||
// Compare the contents of the files and assert their equality | ||
Assert.That(normalizedEmbeddedResourceText, Is.EqualTo(normalizedRemoteResourceText)); | ||
} | ||
|
||
[Test] | ||
public void TestEncodingForModel() | ||
{ | ||
const string modelName = "gpt-4"; | ||
const string inputText = "Hello, world!"; | ||
var expectedEncoded = new List<int> { 9906, 11, 1917, 0 }; | ||
|
||
var encoding = GptEncoding.GetEncodingForModel(modelName); | ||
var encoded = encoding.Encode(inputText); | ||
var decodedText = encoding.Decode(encoded); | ||
|
||
Assert.Multiple(() => | ||
{ | ||
Assert.That(encoded, Is.EqualTo(expectedEncoded)); | ||
Assert.That(decodedText, Is.EqualTo(inputText)); | ||
}); | ||
} | ||
} | ||
} |
Oops, something went wrong.