Skip to content

Commit

Permalink
Pipelines update (#34)
Browse files Browse the repository at this point in the history
* workflows

* fix

* fix

* fix

* fix

* fix

* fix

* fix 1

* fix 2

* fix

* fix4

* fix

* dotnet-version fix

* needs: call-reusable-build-test

* actions/checkout@v4

* actions/setup-dotnet@v4
  • Loading branch information
dmitry-brazhenko authored Mar 25, 2024
1 parent 1cae79c commit 5b48c72
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 157 deletions.
11 changes: 8 additions & 3 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,23 @@ on:
- main

jobs:
call-reusable-build-test:
uses: ./.github/workflows/dotnet-build-test.yml

build-test:
needs: call-reusable-build-test
runs-on: windows-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Setup .NET
uses: actions/setup-dotnet@v1
uses: actions/setup-dotnet@v4
with:
dotnet-version: |
6.0.407
3.1.x
6.0.x
8.0.x
- name: Restore dependencies
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/build-test-and-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@ on:
- main # or your default branch name

jobs:
call-reusable-build-test:
uses: .github/workflows/reusable_build_test.yml

build-test-publish:
needs: call-reusable-build-test
runs-on: windows-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Write SNK file
shell: pwsh
Expand All @@ -22,10 +26,11 @@ jobs:
SNK_BASE64: ${{ secrets.SNK_BASE64 }}

- name: Setup .NET
uses: actions/setup-dotnet@v1
uses: actions/setup-dotnet@v4
with:
dotnet-version: |
6.0.407
3.1.x
6.0.x
8.0.x
- name: Calculate Package Version
Expand Down
33 changes: 33 additions & 0 deletions .github/workflows/dotnet-build-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Reusable Build and Test Workflow

on:
workflow_call:

jobs:
build-test:
strategy:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest, macos-latest]
dotnet: ['netcoreapp3.1', 'net6.0', 'net8.0']
runs-on: ${{ matrix.os }}
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup .NET SDKs
uses: actions/setup-dotnet@v4
with:
dotnet-version: |
3.1.x
6.0.x
8.0.x
- name: Restore dependencies
run: dotnet restore

- name: Build
run: dotnet build --configuration Release --no-restore

- name: Test
run: dotnet test --no-restore --verbosity normal -f ${{ matrix.dotnet }}
251 changes: 130 additions & 121 deletions SharpToken.Tests/SharpToken.Tests.cs
Original file line number Diff line number Diff line change
@@ -1,166 +1,175 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using NUnit.Framework;

namespace SharpToken.Tests;

public class Tests
namespace SharpToken.Tests
{
private static readonly List<string> ModelsList = new() { "p50k_base", "r50k_base", "cl100k_base" };

private static readonly List<Tuple<string, string, List<int>>> TestData =
TestHelpers.ReadTestPlans("SharpToken.Tests.data.TestPlans.txt");

[SetUp]
public void Setup()
public class Tests
{
}
private static readonly List<string> ModelsList = new List<string> { "p50k_base", "r50k_base", "cl100k_base" };

[Test]
[TestCaseSource(nameof(TestData))]
public void TestEncodingAndDecoding(Tuple<string, string, List<int>> resource)
{
var (encodingName, textToEncode, expectedEncoded) = resource;
private static readonly List<Tuple<string, string, List<int>>> TestData =
TestHelpers.ReadTestPlans("SharpToken.Tests.data.TestPlans.txt");

var encoding = GptEncoding.GetEncoding(encodingName);
var encoded = encoding.Encode(textToEncode);
var decodedText = encoding.Decode(encoded);
Assert.Multiple(() =>
[SetUp]
public void Setup()
{
Assert.That(encoded, Is.EqualTo(expectedEncoded));
Assert.That(decodedText, Is.EqualTo(textToEncode));
});
}
}

[Test]
public async Task TestEncodingAndDecodingInParallel()
{
var tasks = TestData.Select(_ => Task.Run(() =>
[Test]
[TestCaseSource(nameof(TestData))]
public void TestEncodingAndDecoding(Tuple<string, string, List<int>> resource)
{
var (encodingName, textToEncode, expectedEncoded) = _;
var (encodingName, textToEncode, expectedEncoded) = resource;

var encoding = GptEncoding.GetEncoding(encodingName);
var encoded = encoding.Encode(textToEncode);
var decodedText = encoding.Decode(encoded);
return (textToEncode, encoded, expectedEncoded, decodedText);
}));

await Task.WhenAll(tasks).ConfigureAwait(false);

foreach (var (textToEncode, encoded, expectedEncoded, decodedText) in tasks.Select(_ => _.Result))
{
Assert.Multiple(() =>
{
Assert.That(encoded, Is.EqualTo(expectedEncoded));
Assert.That(decodedText, Is.EqualTo(textToEncode));
});
}
}

[Test]
public async Task TestEncodingAndDecodingInParallel()
{
var tasks = TestData.Select(_ => Task.Run(() =>
{
var (encodingName, textToEncode, expectedEncoded) = _;
var encoding = GptEncoding.GetEncoding(encodingName);
var encoded = encoding.Encode(textToEncode);
var decodedText = encoding.Decode(encoded);
return (textToEncode, encoded, expectedEncoded, decodedText);
}));

[Test]
public void TestEncodingWithCustomAllowedSet()
{
const string encodingName = "cl100k_base";
const string inputText = "Some Text<|endofprompt|>";
var allowedSpecialTokens = new HashSet<string> { "<|endofprompt|>" };
await Task.WhenAll(tasks).ConfigureAwait(false);

var encoding = GptEncoding.GetEncoding(encodingName);
var encoded = encoding.Encode(inputText, allowedSpecialTokens);
var expectedEncoded = new List<int> { 8538, 2991, 100276 };
foreach (var (textToEncode, encoded, expectedEncoded, decodedText) in tasks.Select(_ => _.Result))
{
Assert.Multiple(() =>
{
Assert.That(encoded, Is.EqualTo(expectedEncoded));
Assert.That(decodedText, Is.EqualTo(textToEncode));
});
}
}

Assert.That(encoded, Is.EqualTo(expectedEncoded));
}

[Test]
public void TestEncodingFailsWithInvalidInputDefaultSpecial()
{
const string encodingName = "cl100k_base";
const string inputText = "Some Text<|endofprompt|>";
[Test]
public void TestEncodingWithCustomAllowedSet()
{
const string encodingName = "cl100k_base";
const string inputText = "Some Text<|endofprompt|>";
var allowedSpecialTokens = new HashSet<string> { "<|endofprompt|>" };

var encoding = GptEncoding.GetEncoding(encodingName);
var encoding = GptEncoding.GetEncoding(encodingName);
var encoded = encoding.Encode(inputText, allowedSpecialTokens);
var expectedEncoded = new List<int> { 8538, 2991, 100276 };

void TestAction()
{
encoding.Encode(inputText);
Assert.That(encoded, Is.EqualTo(expectedEncoded));
}

Assert.Throws<ArgumentException>(TestAction);
}
[Test]
public void TestEncodingFailsWithInvalidInputDefaultSpecial()
{
const string encodingName = "cl100k_base";
const string inputText = "Some Text<|endofprompt|>";

[Test]
public void TestEncodingFailsWithInvalidInputCustomDisallowed()
{
const string encodingName = "cl100k_base";
const string inputText = "Some Text";
var encoding = GptEncoding.GetEncoding(encodingName);

var encoding = GptEncoding.GetEncoding(encodingName);
void TestAction()
{
encoding.Encode(inputText);
}

void TestAction()
{
encoding.Encode(inputText, disallowedSpecial: new HashSet<string> { "Some" });
Assert.Throws<ArgumentException>(TestAction);
}

Assert.Throws<ArgumentException>(TestAction);
}
[Test]
public void TestEncodingFailsWithInvalidInputCustomDisallowed()
{
const string encodingName = "cl100k_base";
const string inputText = "Some Text";

[Test]
public void TestModelPrefixToEncodingMapping()
{
const string encodingName = "cl100k_base";
const string modelName = "gpt-3.5-turbo-16k-0613";
const string fakeModelName = "gpt-3.6-turbo";
var encoding = GptEncoding.GetEncoding(encodingName);

var encoding = Model.GetEncodingNameForModel(modelName);
void TestAction()
{
encoding.Encode(inputText, disallowedSpecial: new HashSet<string> { "Some" });
}

static void TestModelPrefixMappingFailsAction()
{
Model.GetEncodingNameForModel(fakeModelName);
Assert.Throws<ArgumentException>(TestAction);
}
Assert.Multiple(() =>

[Test]
public void TestModelPrefixToEncodingMapping()
{
Assert.That(encoding, Is.EqualTo(encodingName));
Assert.Throws<Exception>(TestModelPrefixMappingFailsAction);
});
}
const string encodingName = "cl100k_base";
const string modelName = "gpt-3.5-turbo-16k-0613";
const string fakeModelName = "gpt-3.6-turbo";

[Test]
[TestCaseSource(nameof(ModelsList))]
public async Task TestLocalResourceMatchesRemoteResource(string modelName)
{
var embeddedResourceName = $"SharpToken.data.{modelName}.tiktoken";
var remoteResourceUrl = $"https://openaipublic.blob.core.windows.net/encodings/{modelName}.tiktoken";

// Read the embedded resource file
using var stream = typeof(GptEncoding).Assembly.GetManifestResourceStream(embeddedResourceName) ??
throw new InvalidOperationException();
var embeddedResourceText = new StreamReader(stream).ReadToEnd();
var normalizedEmbeddedResourceText =
embeddedResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine);

// Download the remote file
using var httpClient = new HttpClient();
var remoteResourceBytes = await httpClient.GetByteArrayAsync(remoteResourceUrl).ConfigureAwait(true);
var remoteResourceText = Encoding.UTF8.GetString(remoteResourceBytes);
var normalizedRemoteResourceText = remoteResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine);

// Compare the contents of the files and assert their equality
Assert.That(normalizedEmbeddedResourceText, Is.EqualTo(normalizedRemoteResourceText));
}
var encoding = Model.GetEncodingNameForModel(modelName);

[Test]
public void TestEncodingForModel()
{
const string modelName = "gpt-4";
const string inputText = "Hello, world!";
var expectedEncoded = new List<int> { 9906, 11, 1917, 0 };
static void TestModelPrefixMappingFailsAction()
{
Model.GetEncodingNameForModel(fakeModelName);
}

var encoding = GptEncoding.GetEncodingForModel(modelName);
var encoded = encoding.Encode(inputText);
var decodedText = encoding.Decode(encoded);
Assert.Multiple(() =>
{
Assert.That(encoding, Is.EqualTo(encodingName));
Assert.Throws<Exception>(TestModelPrefixMappingFailsAction);
});
}

Assert.Multiple(() =>
[Test]
[TestCaseSource(nameof(ModelsList))]
public async Task TestLocalResourceMatchesRemoteResource(string modelName)
{
Assert.That(encoded, Is.EqualTo(expectedEncoded));
Assert.That(decodedText, Is.EqualTo(inputText));
});
var embeddedResourceName = $"SharpToken.data.{modelName}.tiktoken";
var remoteResourceUrl = $"https://openaipublic.blob.core.windows.net/encodings/{modelName}.tiktoken";

// Read the embedded resource file
using var stream = typeof(GptEncoding).Assembly.GetManifestResourceStream(embeddedResourceName) ??
throw new InvalidOperationException();
var embeddedResourceText = new StreamReader(stream).ReadToEnd();
var normalizedEmbeddedResourceText =
embeddedResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine);

// Download the remote file
using var httpClient = new HttpClient();
var remoteResourceBytes = await httpClient.GetByteArrayAsync(remoteResourceUrl).ConfigureAwait(true);
var remoteResourceText = Encoding.UTF8.GetString(remoteResourceBytes);
var normalizedRemoteResourceText =
remoteResourceText.Replace("\r\n", "\n").Replace("\n", Environment.NewLine);

// Compare the contents of the files and assert their equality
Assert.That(normalizedEmbeddedResourceText, Is.EqualTo(normalizedRemoteResourceText));
}

[Test]
public void TestEncodingForModel()
{
const string modelName = "gpt-4";
const string inputText = "Hello, world!";
var expectedEncoded = new List<int> { 9906, 11, 1917, 0 };

var encoding = GptEncoding.GetEncodingForModel(modelName);
var encoded = encoding.Encode(inputText);
var decodedText = encoding.Decode(encoded);

Assert.Multiple(() =>
{
Assert.That(encoded, Is.EqualTo(expectedEncoded));
Assert.That(decodedText, Is.EqualTo(inputText));
});
}
}
}
Loading

0 comments on commit 5b48c72

Please sign in to comment.