diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index f628112..0d18be5 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -13,14 +13,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 - - name: Write SNK file - shell: pwsh - run: | - $env:SNK_BASE64 -split ' ' -join "`n" | Out-File -Encoding utf8 ./SharpToken/keypair.snk.base64.txt - certutil -decode ./SharpToken/keypair.snk.base64.txt ./SharpToken/keypair.snk - env: - SNK_BASE64: ${{ secrets.SNK_BASE64 }} - - name: Setup .NET uses: actions/setup-dotnet@v1 with: diff --git a/.github/workflows/build-test-and-publish.yml b/.github/workflows/build-test-and-publish.yml index df774d1..dd32fca 100644 --- a/.github/workflows/build-test-and-publish.yml +++ b/.github/workflows/build-test-and-publish.yml @@ -13,14 +13,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 - - name: Write SNK file - shell: pwsh - run: | - $env:SNK_BASE64 -split ' ' -join "`n" | Out-File -Encoding utf8 ./SharpToken/keypair.snk.base64.txt - certutil -decode ./SharpToken/keypair.snk.base64.txt ./SharpToken/keypair.snk - env: - SNK_BASE64: ${{ secrets.SNK_BASE64 }} - - name: Setup .NET uses: actions/setup-dotnet@v1 with: @@ -37,7 +29,7 @@ jobs: run: dotnet restore - name: Build - run: dotnet build --configuration Release --no-restore + run: dotnet build --configuration Release --no-restore /p:EnableSigning=true - name: Test run: dotnet test --no-restore --verbosity normal diff --git a/README.md b/README.md index 3375bb7..4a8bf6b 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,34 @@ var p50kEditEncoding = GptEncoding.GetEncoding("p50k_edit"); var cl100kBaseEncoding = GptEncoding.GetEncoding("cl100k_base"); ``` +### Model Prefix Matching + +Apart from specifying direct model names, SharpToken also provides functionality to map model names based on specific prefixes. This allows users to retrieve an encoding based on a model's prefix. + +Here are the current supported prefixes and their corresponding encodings: + +| Model Prefix | Encoding | +|---------------------|------------| +| `gpt-4-` | `cl100k_base` | +| `gpt-3.5-turbo-` | `cl100k_base` | +| `gpt-35-turbo` | `cl100k_base` | + +Examples of model names that fall under these prefixes include: +- For the prefix `gpt-4-`: `gpt-4-0314`, `gpt-4-32k`, etc. +- For the prefix `gpt-3.5-turbo-`: `gpt-3.5-turbo-0301`, `gpt-3.5-turbo-0401`, etc. +- For the Azure deployment name `gpt-35-turbo`. + +To retrieve the encoding name based on a model name or its prefix, you can use the `GetEncodingNameForModel` method: + +```csharp +string encodingName = GetEncodingNameForModel("gpt-4-0314"); // This will return "cl100k_base" +``` + +If the provided model name doesn't match any direct model names or prefixes, the method will return `null`. + + + + ## Understanding Encoded Values When you encode a string using the Encode method, the returned value is a list of integers that represent tokens in the diff --git a/SharpToken/SharpToken.csproj b/SharpToken/SharpToken.csproj index e8fc047..6c172ce 100644 --- a/SharpToken/SharpToken.csproj +++ b/SharpToken/SharpToken.csproj @@ -12,12 +12,12 @@ true - keypair.snk + false - + true $(KeyFilePath)